summaryrefslogtreecommitdiff
path: root/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
blob: 3d2f9bb0d765b894e1d7f5a17fc964d2e914f9cc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 7 Dec 2023 09:17:27 +0800
Subject: [PATCH 14/28] Don't assume it's AVX_U128_CLEAN after call_insn whose
 abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.

If the function desn't clobber any sse registers or only clobber
128-bit part, then vzeroupper isn't issued before the function exit.
the status not CLEAN but ANY after the function.

Also for sibling_call, it's safe to issue an vzeroupper. Also there
could be missing vzeroupper since there's no mode_exit for
sibling_call_p.

gcc/ChangeLog:

	PR target/112891
	* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
	AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
	align with ix86_avx_u128_mode_needed.
	(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
	sibling_call.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr112891.c: New test.
	* gcc.target/i386/pr112891-2.c: New test.

(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)
---
 gcc/config/i386/i386.cc                    | 22 +++++++++++++---
 gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr112891.c   | 29 +++++++++++++++++++++
 3 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e75d37023..60f3296b0 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
 	 modes wider than 256 bits.  It's only safe to issue a
 	 vzeroupper if all SSE registers are clobbered.  */
       const function_abi &abi = insn_callee_abi (insn);
-      if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
-				  abi.mode_clobbers (V4DImode)))
+      /* Should be safe to issue an vzeroupper before sibling_call_p.
+	 Also there not mode_exit for sibling_call, so there could be
+	 missing vzeroupper for that.  */
+      if (!(SIBLING_CALL_P (insn)
+	    || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+				      abi.mode_clobbers (V4DImode))))
 	return AVX_U128_ANY;
 
       return AVX_U128_CLEAN;
@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
       bool avx_upper_reg_found = false;
       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
 
-      return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
+      if (avx_upper_reg_found)
+	return AVX_U128_DIRTY;
+
+      /* If the function desn't clobber any sse registers or only clobber
+	 128-bit part, Then vzeroupper isn't issued before the function exit.
+	 the status not CLEAN but ANY after the function.  */
+      const function_abi &abi = insn_callee_abi (insn);
+      if (!(SIBLING_CALL_P (insn)
+	    || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+				      abi.mode_clobbers (V4DImode))))
+	return AVX_U128_ANY;
+
+      return  AVX_U128_CLEAN;
     }
 
   /* Otherwise, return current mode.  Remember that if insn
diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
new file mode 100644
index 000000000..164c3985d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+  a[0] = 1.0;
+  a[1] = 2.0;
+}
+
+double
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+  a[0] += b[0];
+  a[1] += b[1];
+  a[2] += b[2];
+  a[3] += b[3];
+  bar (b);
+  return a[5] + b[5];
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+  double c = foo (a, b);
+  return __builtin_exp (c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
new file mode 100644
index 000000000..dbf6c6794
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+  a[0] = 1.0;
+  a[1] = 2.0;
+}
+
+void
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+  a[0] += b[0];
+  a[1] += b[1];
+  a[2] += b[2];
+  a[3] += b[3];
+  bar (b);
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+  foo (a, b);
+  return __builtin_exp (b[1]);
+}
-- 
2.31.1