summaryrefslogtreecommitdiff
path: root/0263-Software-mitigation-Disable-gather-generation-in-vec.patch
blob: 2a1e4e7928d606a9096cae0329d568cee39fccd2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 10 Aug 2023 11:41:39 +0800
Subject: [PATCH 09/28] Software mitigation: Disable gather generation in
 vectorization for GDS affected Intel Processors.

For more details of GDS (Gather Data Sampling), refer to
https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html

After microcode update, there's performance regression. To avoid that,
the patch disables gather generation in autovectorization but uses
gather scalar emulation instead.

gcc/ChangeLog:

	* config/i386/i386-options.cc (m_GDS): New macro.
	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't
	enable for m_GDS.
	(X86_TUNE_USE_GATHER_4PARTS): Ditto.
	(X86_TUNE_USE_GATHER): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx2-gather-2.c: Adjust options to keep
	gather vectorization.
	* gcc.target/i386/avx2-gather-6.c: Ditto.
	* gcc.target/i386/avx512f-pr88464-1.c: Ditto.
	* gcc.target/i386/avx512f-pr88464-5.c: Ditto.
	* gcc.target/i386/avx512vl-pr88464-1.c: Ditto.
	* gcc.target/i386/avx512vl-pr88464-11.c: Ditto.
	* gcc.target/i386/avx512vl-pr88464-3.c: Ditto.
	* gcc.target/i386/avx512vl-pr88464-9.c: Ditto.
	* gcc.target/i386/pr88531-1b.c: Ditto.
	* gcc.target/i386/pr88531-1c.c: Ditto.

(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d)
---
 gcc/config/i386/i386-options.cc                     | 5 +++++
 gcc/config/i386/x86-tune.def                        | 9 ++++++---
 gcc/testsuite/gcc.target/i386/avx2-gather-2.c       | 2 +-
 gcc/testsuite/gcc.target/i386/avx2-gather-6.c       | 2 +-
 gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c   | 2 +-
 gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c   | 2 +-
 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c  | 2 +-
 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +-
 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c  | 2 +-
 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr88531-1b.c          | 2 +-
 gcc/testsuite/gcc.target/i386/pr88531-1c.c          | 2 +-
 12 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index fb2ed942f..9617fc162 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3.  If not see
 #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
 #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
 #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
+/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828.
+   Software mitigation.  */
+#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \
+	       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
+	       | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
 
 #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
 #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index e6b9e2125..4392709fc 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
 /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
+	    | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
    elements.  */
@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
 /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 |  m_ALDERLAKE | m_GENERIC))
+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
+	    | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
    elements.  */
@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
 /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
+	    | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
    elements.  */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
index ad5ef7310..978924b0f 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */
 
 #include "avx2-gather-1.c"
 
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
index b9119581a..067b251e3 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */
+/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details  -mtune=haswell" } */
 
 #include "avx2-gather-5.c"
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
index 06d21bb01..d1a229861 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/88464 */
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
index 462e951fd..d7b0b2b28 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/88464 */
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
index 55a28dddb..07439185e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/88464 */
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
index 969600885..3a9810827 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/88464 */
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
index 6b0c8a859..ac669e048 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/88464 */
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
index 3af568ab3..14a1083b6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/88464 */
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
index 812c8a10f..e6df789de 100644
--- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c
+++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=skylake -mfpmath=sse" } */
+/* { dg-options "-O3 -march=skylake -mfpmath=sse -mtune=haswell" } */
 
 #include "pr88531-1a.c"
 
diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1c.c b/gcc/testsuite/gcc.target/i386/pr88531-1c.c
index 43fc5913e..a093c87c0 100644
--- a/gcc/testsuite/gcc.target/i386/pr88531-1c.c
+++ b/gcc/testsuite/gcc.target/i386/pr88531-1c.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse" } */
+/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse -mtune=haswell" } */
 
 #include "pr88531-1a.c"
 
-- 
2.31.1