summaryrefslogtreecommitdiff
path: root/0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
diff options
context:
space:
mode:
Diffstat (limited to '0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch')
-rw-r--r--0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch187
1 files changed, 187 insertions, 0 deletions
diff --git a/0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch b/0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
new file mode 100644
index 0000000..89a7fe7
--- /dev/null
+++ b/0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
@@ -0,0 +1,187 @@
+From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Thu, 10 Aug 2023 16:26:13 +0800
+Subject: [PATCH 10/28] Support -m[no-]gather -m[no-]scatter to enable/disable
+ vectorization for all gather/scatter instructions
+
+Rename original use_gather to use_gather_8parts, Support
+-mtune-ctrl={,^}use_gather to set/clear tune features
+use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
+as alias of -mtune-ctrl=, use_gather, ^use_gather.
+
+Similar for use_scatter.
+
+gcc/ChangeLog:
+
+ * config/i386/i386-builtins.cc
+ (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
+ * config/i386/i386-options.cc (parse_mtune_ctrl_str):
+ Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
+ 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
+ * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
+ for use_scatter_8parts
+ * config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
+ (TARGET_USE_GATHER_8PARTS): .. this.
+ (TARGET_USE_SCATTER): Rename to ..
+ (TARGET_USE_SCATTER_8PARTS): .. this.
+ * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
+ (X86_TUNE_USE_GATHER_8PARTS): .. this.
+ (X86_TUNE_USE_SCATTER): Rename to
+ (X86_TUNE_USE_SCATTER_8PARTS): .. this.
+ * config/i386/i386.opt: Add new options mgather, mscatter.
+
+(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737)
+---
+ gcc/config/i386/i386-builtins.cc | 2 +-
+ gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++---------
+ gcc/config/i386/i386.cc | 2 +-
+ gcc/config/i386/i386.h | 8 ++---
+ gcc/config/i386/i386.opt | 4 +++
+ gcc/config/i386/x86-tune.def | 4 +--
+ 6 files changed, 52 insertions(+), 22 deletions(-)
+
+diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
+index 050c6228a..8ed32e14f 100644
+--- a/gcc/config/i386/i386-builtins.cc
++++ b/gcc/config/i386/i386-builtins.cc
+@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
+ ? !TARGET_USE_GATHER_2PARTS
+ : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
+ ? !TARGET_USE_GATHER_4PARTS
+- : !TARGET_USE_GATHER)))
++ : !TARGET_USE_GATHER_8PARTS)))
+ return NULL_TREE;
+
+ if ((TREE_CODE (index_type) != INTEGER_TYPE
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 9617fc162..3df1f0c41 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
+ curr_feature_string++;
+ clear = true;
+ }
+- for (i = 0; i < X86_TUNE_LAST; i++)
+- {
+- if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
+- {
+- ix86_tune_features[i] = !clear;
+- if (dump)
+- fprintf (stderr, "Explicitly %s feature %s\n",
+- clear ? "clear" : "set", ix86_tune_feature_names[i]);
+- break;
+- }
+- }
+- if (i == X86_TUNE_LAST)
+- error ("unknown parameter to option %<-mtune-ctrl%>: %s",
+- clear ? curr_feature_string - 1 : curr_feature_string);
++
++ if (!strcmp (curr_feature_string, "use_gather"))
++ {
++ ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
++ ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
++ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
++ if (dump)
++ fprintf (stderr, "Explicitly %s features use_gather_2parts,"
++ " use_gather_4parts, use_gather_8parts\n",
++ clear ? "clear" : "set");
++
++ }
++ else if (!strcmp (curr_feature_string, "use_scatter"))
++ {
++ ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
++ ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
++ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
++ if (dump)
++ fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
++ " use_scatter_4parts, use_scatter_8parts\n",
++ clear ? "clear" : "set");
++ }
++ else
++ {
++ for (i = 0; i < X86_TUNE_LAST; i++)
++ {
++ if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
++ {
++ ix86_tune_features[i] = !clear;
++ if (dump)
++ fprintf (stderr, "Explicitly %s feature %s\n",
++ clear ? "clear" : "set", ix86_tune_feature_names[i]);
++ break;
++ }
++ }
++
++ if (i == X86_TUNE_LAST)
++ error ("unknown parameter to option %<-mtune-ctrl%>: %s",
++ clear ? curr_feature_string - 1 : curr_feature_string);
++ }
+ curr_feature_string = next_feature_string;
+ }
+ while (curr_feature_string);
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 479fc6010..e75d37023 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
+ ? !TARGET_USE_SCATTER_2PARTS
+ : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
+ ? !TARGET_USE_SCATTER_4PARTS
+- : !TARGET_USE_SCATTER))
++ : !TARGET_USE_SCATTER_8PARTS))
+ return NULL_TREE;
+
+ if ((TREE_CODE (index_type) != INTEGER_TYPE
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index 688aaabd3..aaa136ba0 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
+ ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
+ #define TARGET_USE_SCATTER_4PARTS \
+ ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
+-#define TARGET_USE_GATHER \
+- ix86_tune_features[X86_TUNE_USE_GATHER]
+-#define TARGET_USE_SCATTER \
+- ix86_tune_features[X86_TUNE_USE_SCATTER]
++#define TARGET_USE_GATHER_8PARTS \
++ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
++#define TARGET_USE_SCATTER_8PARTS \
++ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
+ #define TARGET_FUSE_CMP_AND_BRANCH_32 \
+ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
+ #define TARGET_FUSE_CMP_AND_BRANCH_64 \
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index 498fb454d..b154110d8 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated.
+ munroll-only-small-loops
+ Target Var(ix86_unroll_only_small_loops) Init(0) Save
+ Enable conservative small loop unrolling.
++
++mscatter
++Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
++Enable vectorization for scatter instruction.
+diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
+index 4392709fc..bdb455d20 100644
+--- a/gcc/config/i386/x86-tune.def
++++ b/gcc/config/i386/x86-tune.def
+@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
+
+ /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
+ elements. */
+-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
++DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
+ | m_GENERIC | m_GDS))
+
+ /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
+ elements. */
+-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
++DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
+ ~(m_ZNVER4))
+
+ /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
+--
+2.31.1
+