summaryrefslogtreecommitdiff
path: root/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
diff options
context:
space:
mode:
Diffstat (limited to '0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch')
-rw-r--r--0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch151
1 files changed, 151 insertions, 0 deletions
diff --git a/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch b/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
new file mode 100644
index 0000000..3d2f9bb
--- /dev/null
+++ b/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
@@ -0,0 +1,151 @@
+From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Thu, 7 Dec 2023 09:17:27 +0800
+Subject: [PATCH 14/28] Don't assume it's AVX_U128_CLEAN after call_insn whose
+ abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
+
+If the function desn't clobber any sse registers or only clobber
+128-bit part, then vzeroupper isn't issued before the function exit.
+the status not CLEAN but ANY after the function.
+
+Also for sibling_call, it's safe to issue an vzeroupper. Also there
+could be missing vzeroupper since there's no mode_exit for
+sibling_call_p.
+
+gcc/ChangeLog:
+
+ PR target/112891
+ * config/i386/i386.cc (ix86_avx_u128_mode_after): Return
+ AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
+ align with ix86_avx_u128_mode_needed.
+ (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
+ sibling_call.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/i386/pr112891.c: New test.
+ * gcc.target/i386/pr112891-2.c: New test.
+
+(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)
+---
+ gcc/config/i386/i386.cc | 22 +++++++++++++---
+ gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++
+ gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++
+ 3 files changed, 78 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c
+
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index e75d37023..60f3296b0 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
+ modes wider than 256 bits. It's only safe to issue a
+ vzeroupper if all SSE registers are clobbered. */
+ const function_abi &abi = insn_callee_abi (insn);
+- if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+- abi.mode_clobbers (V4DImode)))
++ /* Should be safe to issue an vzeroupper before sibling_call_p.
++ Also there not mode_exit for sibling_call, so there could be
++ missing vzeroupper for that. */
++ if (!(SIBLING_CALL_P (insn)
++ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
++ abi.mode_clobbers (V4DImode))))
+ return AVX_U128_ANY;
+
+ return AVX_U128_CLEAN;
+@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
+ bool avx_upper_reg_found = false;
+ note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
+
+- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
++ if (avx_upper_reg_found)
++ return AVX_U128_DIRTY;
++
++ /* If the function desn't clobber any sse registers or only clobber
++ 128-bit part, Then vzeroupper isn't issued before the function exit.
++ the status not CLEAN but ANY after the function. */
++ const function_abi &abi = insn_callee_abi (insn);
++ if (!(SIBLING_CALL_P (insn)
++ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
++ abi.mode_clobbers (V4DImode))))
++ return AVX_U128_ANY;
++
++ return AVX_U128_CLEAN;
+ }
+
+ /* Otherwise, return current mode. Remember that if insn
+diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
+new file mode 100644
+index 000000000..164c3985d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
+@@ -0,0 +1,30 @@
++/* { dg-do compile } */
++/* { dg-options "-mavx2 -O3" } */
++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
++
++void
++__attribute__((noinline))
++bar (double* a)
++{
++ a[0] = 1.0;
++ a[1] = 2.0;
++}
++
++double
++__attribute__((noinline))
++foo (double* __restrict a, double* b)
++{
++ a[0] += b[0];
++ a[1] += b[1];
++ a[2] += b[2];
++ a[3] += b[3];
++ bar (b);
++ return a[5] + b[5];
++}
++
++double
++foo1 (double* __restrict a, double* b)
++{
++ double c = foo (a, b);
++ return __builtin_exp (c);
++}
+diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
+new file mode 100644
+index 000000000..dbf6c6794
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr112891.c
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-options "-mavx2 -O3" } */
++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
++
++void
++__attribute__((noinline))
++bar (double* a)
++{
++ a[0] = 1.0;
++ a[1] = 2.0;
++}
++
++void
++__attribute__((noinline))
++foo (double* __restrict a, double* b)
++{
++ a[0] += b[0];
++ a[1] += b[1];
++ a[2] += b[2];
++ a[3] += b[3];
++ bar (b);
++}
++
++double
++foo1 (double* __restrict a, double* b)
++{
++ foo (a, b);
++ return __builtin_exp (b[1]);
++}
+--
+2.31.1
+