| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
 | From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 10 Aug 2023 16:26:13 +0800
Subject: [PATCH 10/28] Support -m[no-]gather -m[no-]scatter to enable/disable
 vectorization for all gather/scatter instructions
Rename original use_gather to use_gather_8parts, Support
-mtune-ctrl={,^}use_gather to set/clear tune features
use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
as alias of -mtune-ctrl=, use_gather, ^use_gather.
Similar for use_scatter.
gcc/ChangeLog:
	* config/i386/i386-builtins.cc
	(ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
	* config/i386/i386-options.cc (parse_mtune_ctrl_str):
	Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
	8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
	* config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
	for use_scatter_8parts
	* config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
	(TARGET_USE_GATHER_8PARTS): .. this.
	(TARGET_USE_SCATTER): Rename to ..
	(TARGET_USE_SCATTER_8PARTS): .. this.
	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
	(X86_TUNE_USE_GATHER_8PARTS): .. this.
	(X86_TUNE_USE_SCATTER): Rename to
	(X86_TUNE_USE_SCATTER_8PARTS): .. this.
	* config/i386/i386.opt: Add new options mgather, mscatter.
(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737)
---
 gcc/config/i386/i386-builtins.cc |  2 +-
 gcc/config/i386/i386-options.cc  | 54 +++++++++++++++++++++++---------
 gcc/config/i386/i386.cc          |  2 +-
 gcc/config/i386/i386.h           |  8 ++---
 gcc/config/i386/i386.opt         |  4 +++
 gcc/config/i386/x86-tune.def     |  4 +--
 6 files changed, 52 insertions(+), 22 deletions(-)
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 050c6228a..8ed32e14f 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
 	  ? !TARGET_USE_GATHER_2PARTS
 	  : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
 	     ? !TARGET_USE_GATHER_4PARTS
-	     : !TARGET_USE_GATHER)))
+	     : !TARGET_USE_GATHER_8PARTS)))
     return NULL_TREE;
 
   if ((TREE_CODE (index_type) != INTEGER_TYPE
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 9617fc162..3df1f0c41 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
           curr_feature_string++;
           clear = true;
         }
-      for (i = 0; i < X86_TUNE_LAST; i++)
-        {
-          if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
-            {
-              ix86_tune_features[i] = !clear;
-              if (dump)
-                fprintf (stderr, "Explicitly %s feature %s\n",
-                         clear ? "clear" : "set", ix86_tune_feature_names[i]);
-              break;
-            }
-        }
-      if (i == X86_TUNE_LAST)
-	error ("unknown parameter to option %<-mtune-ctrl%>: %s",
-	       clear ? curr_feature_string - 1 : curr_feature_string);
+
+      if (!strcmp (curr_feature_string, "use_gather"))
+	{
+	  ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
+	  ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
+	  ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
+	  if (dump)
+	    fprintf (stderr, "Explicitly %s features use_gather_2parts,"
+		     " use_gather_4parts, use_gather_8parts\n",
+		     clear ? "clear" : "set");
+
+	}
+      else if (!strcmp (curr_feature_string, "use_scatter"))
+	{
+	  ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
+	  ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
+	  ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
+	  if (dump)
+	    fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
+		     " use_scatter_4parts, use_scatter_8parts\n",
+		     clear ? "clear" : "set");
+	}
+      else
+	{
+	  for (i = 0; i < X86_TUNE_LAST; i++)
+	    {
+	      if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
+		{
+		  ix86_tune_features[i] = !clear;
+		  if (dump)
+		    fprintf (stderr, "Explicitly %s feature %s\n",
+			     clear ? "clear" : "set", ix86_tune_feature_names[i]);
+		  break;
+		}
+	    }
+
+	  if (i == X86_TUNE_LAST)
+	    error ("unknown parameter to option %<-mtune-ctrl%>: %s",
+		   clear ? curr_feature_string - 1 : curr_feature_string);
+	}
       curr_feature_string = next_feature_string;
     }
   while (curr_feature_string);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 479fc6010..e75d37023 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
       ? !TARGET_USE_SCATTER_2PARTS
       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
 	 ? !TARGET_USE_SCATTER_4PARTS
-	 : !TARGET_USE_SCATTER))
+	 : !TARGET_USE_SCATTER_8PARTS))
     return NULL_TREE;
 
   if ((TREE_CODE (index_type) != INTEGER_TYPE
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 688aaabd3..aaa136ba0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
 #define TARGET_USE_SCATTER_4PARTS \
 	ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
-#define TARGET_USE_GATHER \
-	ix86_tune_features[X86_TUNE_USE_GATHER]
-#define TARGET_USE_SCATTER \
-	ix86_tune_features[X86_TUNE_USE_SCATTER]
+#define TARGET_USE_GATHER_8PARTS \
+	ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
+#define TARGET_USE_SCATTER_8PARTS \
+	ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
 #define TARGET_FUSE_CMP_AND_BRANCH_32 \
 	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
 #define TARGET_FUSE_CMP_AND_BRANCH_64 \
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 498fb454d..b154110d8 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated.
 munroll-only-small-loops
 Target Var(ix86_unroll_only_small_loops) Init(0) Save
 Enable conservative small loop unrolling.
+
+mscatter
+Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
+Enable vectorization for scatter instruction.
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 4392709fc..bdb455d20 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
 
 /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
    elements.  */
-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
+DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
 	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
 	    | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
    elements.  */
-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
+DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
 	  ~(m_ZNVER4))
 
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
-- 
2.31.1
 |