summaryrefslogtreecommitdiff
path: root/0257-Make-option-mvzeroupper-independent-of-optimization-.patch
diff options
context:
space:
mode:
Diffstat (limited to '0257-Make-option-mvzeroupper-independent-of-optimization-.patch')
-rw-r--r--0257-Make-option-mvzeroupper-independent-of-optimization-.patch138
1 files changed, 138 insertions, 0 deletions
diff --git a/0257-Make-option-mvzeroupper-independent-of-optimization-.patch b/0257-Make-option-mvzeroupper-independent-of-optimization-.patch
new file mode 100644
index 0000000..ffdffb1
--- /dev/null
+++ b/0257-Make-option-mvzeroupper-independent-of-optimization-.patch
@@ -0,0 +1,138 @@
+From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Mon, 26 Jun 2023 09:50:25 +0800
+Subject: [PATCH 03/28] Make option mvzeroupper independent of optimization
+ level.
+
+pass_insert_vzeroupper is under condition
+
+TARGET_AVX && TARGET_VZEROUPPER
+&& flag_expensive_optimizations && !optimize_size
+
+But the document of mvzeroupper doesn't mention the insertion
+required -O2 and above, it may confuse users when they explicitly
+use -Os -mvzeroupper.
+
+------------
+mvzeroupper
+Target Mask(VZEROUPPER) Save
+Generate vzeroupper instruction before a transfer of control flow out of
+the function.
+------------
+
+The patch moves flag_expensive_optimizations && !optimize_size to
+ix86_option_override_internal. It makes -mvzeroupper independent of
+optimization level, but still keeps the behavior of architecture
+tuning(emit_vzeroupper) unchanged.
+
+gcc/ChangeLog:
+
+ * config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
+ Move flag_expensive_optimizations && !optimize_size to ..
+ * config/i386/i386-options.cc (ix86_option_override_internal):
+ .. this, it makes -mvzeroupper independent of optimization
+ level, but still keeps the behavior of architecture
+ tuning(emit_vzeroupper) unchanged.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/i386/avx-vzeroupper-29.c: New testcase.
+ * gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
+ * gcc.target/i386/avx-vzeroupper-7.c: Ditto.
+ * gcc.target/i386/avx-vzeroupper-9.c: Ditto.
+---
+ gcc/config/i386/i386-features.cc | 3 +--
+ gcc/config/i386/i386-options.cc | 4 +++-
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++-
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++-
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++-
+ 6 files changed, 24 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
+
+diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
+index 6fe41c3c2..6a2444eb6 100644
+--- a/gcc/config/i386/i386-features.cc
++++ b/gcc/config/i386/i386-features.cc
+@@ -1875,8 +1875,7 @@ public:
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+- return TARGET_AVX && TARGET_VZEROUPPER
+- && flag_expensive_optimizations && !optimize_size;
++ return TARGET_AVX && TARGET_VZEROUPPER;
+ }
+
+ virtual unsigned int execute (function *)
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index ff44ad4e0..74e969b68 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
+ sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
+
+ if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
+- && TARGET_EMIT_VZEROUPPER)
++ && TARGET_EMIT_VZEROUPPER
++ && flag_expensive_optimizations
++ && !optimize_size)
+ opts->x_target_flags |= MASK_VZEROUPPER;
+ if (!(opts_set->x_target_flags & MASK_STV))
+ opts->x_target_flags |= MASK_STV;
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
+index e694d4048..5a40e8783 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
+@@ -16,5 +16,6 @@ foo ()
+ _mm256_zeroupper ();
+ }
+
+-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
+ /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
+new file mode 100644
+index 000000000..4af637757
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
++
++#include <immintrin.h>
++
++extern __m256 x, y;
++
++void
++foo ()
++{
++ x = y;
++}
++
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
+index ab6d68779..75fe58897 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
+@@ -12,4 +12,5 @@ foo ()
+ _mm256_zeroupper ();
+ }
+
+-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
+index 974e1626a..fa0a6dfca 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
+@@ -15,4 +15,5 @@ foo ()
+ _mm256_zeroupper ();
+ }
+
+-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
+--
+2.31.1
+