diff options
Diffstat (limited to '0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch')
-rw-r--r-- | 0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch b/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch new file mode 100644 index 0000000..3d2f9bb --- /dev/null +++ b/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch @@ -0,0 +1,151 @@ +From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 7 Dec 2023 09:17:27 +0800 +Subject: [PATCH 14/28] Don't assume it's AVX_U128_CLEAN after call_insn whose + abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS. + +If the function desn't clobber any sse registers or only clobber +128-bit part, then vzeroupper isn't issued before the function exit. +the status not CLEAN but ANY after the function. + +Also for sibling_call, it's safe to issue an vzeroupper. Also there +could be missing vzeroupper since there's no mode_exit for +sibling_call_p. + +gcc/ChangeLog: + + PR target/112891 + * config/i386/i386.cc (ix86_avx_u128_mode_after): Return + AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to + align with ix86_avx_u128_mode_needed. + (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for + sibling_call. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr112891.c: New test. + * gcc.target/i386/pr112891-2.c: New test. + +(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642) +--- + gcc/config/i386/i386.cc | 22 +++++++++++++--- + gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++ + 3 files changed, 78 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index e75d37023..60f3296b0 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) + modes wider than 256 bits. It's only safe to issue a + vzeroupper if all SSE registers are clobbered. */ + const function_abi &abi = insn_callee_abi (insn); +- if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS], +- abi.mode_clobbers (V4DImode))) ++ /* Should be safe to issue an vzeroupper before sibling_call_p. ++ Also there not mode_exit for sibling_call, so there could be ++ missing vzeroupper for that. */ ++ if (!(SIBLING_CALL_P (insn) ++ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], ++ abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; +@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) + bool avx_upper_reg_found = false; + note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); + +- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; ++ if (avx_upper_reg_found) ++ return AVX_U128_DIRTY; ++ ++ /* If the function desn't clobber any sse registers or only clobber ++ 128-bit part, Then vzeroupper isn't issued before the function exit. ++ the status not CLEAN but ANY after the function. */ ++ const function_abi &abi = insn_callee_abi (insn); ++ if (!(SIBLING_CALL_P (insn) ++ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], ++ abi.mode_clobbers (V4DImode)))) ++ return AVX_U128_ANY; ++ ++ return AVX_U128_CLEAN; + } + + /* Otherwise, return current mode. Remember that if insn +diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c +new file mode 100644 +index 000000000..164c3985d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O3" } */ ++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ ++ ++void ++__attribute__((noinline)) ++bar (double* a) ++{ ++ a[0] = 1.0; ++ a[1] = 2.0; ++} ++ ++double ++__attribute__((noinline)) ++foo (double* __restrict a, double* b) ++{ ++ a[0] += b[0]; ++ a[1] += b[1]; ++ a[2] += b[2]; ++ a[3] += b[3]; ++ bar (b); ++ return a[5] + b[5]; ++} ++ ++double ++foo1 (double* __restrict a, double* b) ++{ ++ double c = foo (a, b); ++ return __builtin_exp (c); ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c +new file mode 100644 +index 000000000..dbf6c6794 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr112891.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O3" } */ ++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ ++ ++void ++__attribute__((noinline)) ++bar (double* a) ++{ ++ a[0] = 1.0; ++ a[1] = 2.0; ++} ++ ++void ++__attribute__((noinline)) ++foo (double* __restrict a, double* b) ++{ ++ a[0] += b[0]; ++ a[1] += b[1]; ++ a[2] += b[2]; ++ a[3] += b[3]; ++ bar (b); ++} ++ ++double ++foo1 (double* __restrict a, double* b) ++{ ++ foo (a, b); ++ return __builtin_exp (b[1]); ++} +-- +2.31.1 + |