diff options
Diffstat (limited to '0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch')
-rw-r--r-- | 0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch b/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch new file mode 100644 index 0000000..d365246 --- /dev/null +++ b/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch @@ -0,0 +1,68 @@ +From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Sun, 25 Jun 2023 09:50:21 +0800 +Subject: [PATCH 04/28] i386: Sync tune_string with arch_string for target + attribute + +arch=* + +For function with target attribute arch=*, current logic will set its +tune to -mtune from command line so all target_clones will get same +tuning flags which would affect the performance for each clone. Override +tune with arch if tune was not explicitly specified to get proper tuning +flags for target_clones. + +gcc/ChangeLog: + + * config/i386/i386-options.cc (ix86_valid_target_attribute_tree): + Override tune_string with arch_string if tune_string is not + explicitly specified. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/mvc17.c: New test. + +(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8) +--- + gcc/config/i386/i386-options.cc | 6 +++++- + gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 74e969b68..fb2ed942f 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, + if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) + opts->x_ix86_tune_string + = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); +- else if (orig_tune_defaulted) ++ /* If we have explicit arch string and no tune string specified, set ++ tune_string to NULL and later it will be overriden by arch_string ++ so target clones can get proper optimization. */ ++ else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH] ++ || orig_tune_defaulted) + opts->x_ix86_tune_string = NULL; + + /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ +diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c +new file mode 100644 +index 000000000..8b83c1aec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/mvc17.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-ifunc "" } */ ++/* { dg-options "-O2 -march=x86-64" } */ ++/* { dg-final { scan-assembler-times "rep mov" 1 } } */ ++ ++__attribute__((target_clones("default","arch=icelake-server"))) ++void ++foo (char *a, char *b, int size) ++{ ++ __builtin_memcpy (a, b, size & 0x7F); ++} +-- +2.31.1 + |