summaryrefslogtreecommitdiff
path: root/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
diff options
context:
space:
mode:
Diffstat (limited to '0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch')
-rw-r--r--0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch68
1 files changed, 68 insertions, 0 deletions
diff --git a/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch b/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
new file mode 100644
index 0000000..d365246
--- /dev/null
+++ b/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
@@ -0,0 +1,68 @@
+From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001
+From: Hongyu Wang <hongyu.wang@intel.com>
+Date: Sun, 25 Jun 2023 09:50:21 +0800
+Subject: [PATCH 04/28] i386: Sync tune_string with arch_string for target
+ attribute
+
+arch=*
+
+For function with target attribute arch=*, current logic will set its
+tune to -mtune from command line so all target_clones will get same
+tuning flags which would affect the performance for each clone. Override
+tune with arch if tune was not explicitly specified to get proper tuning
+flags for target_clones.
+
+gcc/ChangeLog:
+
+ * config/i386/i386-options.cc (ix86_valid_target_attribute_tree):
+ Override tune_string with arch_string if tune_string is not
+ explicitly specified.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/i386/mvc17.c: New test.
+
+(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8)
+---
+ gcc/config/i386/i386-options.cc | 6 +++++-
+ gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c
+
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 74e969b68..fb2ed942f 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
+ if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
+ opts->x_ix86_tune_string
+ = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
+- else if (orig_tune_defaulted)
++ /* If we have explicit arch string and no tune string specified, set
++ tune_string to NULL and later it will be overriden by arch_string
++ so target clones can get proper optimization. */
++ else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
++ || orig_tune_defaulted)
+ opts->x_ix86_tune_string = NULL;
+
+ /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
+diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c
+new file mode 100644
+index 000000000..8b83c1aec
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/mvc17.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-require-ifunc "" } */
++/* { dg-options "-O2 -march=x86-64" } */
++/* { dg-final { scan-assembler-times "rep mov" 1 } } */
++
++__attribute__((target_clones("default","arch=icelake-server")))
++void
++foo (char *a, char *b, int size)
++{
++ __builtin_memcpy (a, b, size & 0x7F);
++}
+--
+2.31.1
+