summaryrefslogtreecommitdiff
path: root/0257-Make-option-mvzeroupper-independent-of-optimization-.patch
blob: ffdffb163293589aa92a1bc71bb4e1f38e48f289 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Mon, 26 Jun 2023 09:50:25 +0800
Subject: [PATCH 03/28] Make option mvzeroupper independent of optimization
 level.

pass_insert_vzeroupper is under condition

TARGET_AVX && TARGET_VZEROUPPER
&& flag_expensive_optimizations && !optimize_size

But the document of mvzeroupper doesn't mention the insertion
required -O2 and above, it may confuse users when they explicitly
use -Os -mvzeroupper.

------------
mvzeroupper
Target Mask(VZEROUPPER) Save
Generate vzeroupper instruction before a transfer of control flow out of
the function.
------------

The patch moves flag_expensive_optimizations && !optimize_size to
ix86_option_override_internal. It makes -mvzeroupper independent of
optimization level, but still keeps the behavior of architecture
tuning(emit_vzeroupper) unchanged.

gcc/ChangeLog:

	* config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
	Move flag_expensive_optimizations && !optimize_size to ..
	* config/i386/i386-options.cc (ix86_option_override_internal):
	.. this, it makes -mvzeroupper independent of optimization
	level, but still keeps the behavior of architecture
	tuning(emit_vzeroupper) unchanged.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-vzeroupper-29.c: New testcase.
	* gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
	* gcc.target/i386/avx-vzeroupper-7.c: Ditto.
	* gcc.target/i386/avx-vzeroupper-9.c: Ditto.
---
 gcc/config/i386/i386-features.cc                  |  3 +--
 gcc/config/i386/i386-options.cc                   |  4 +++-
 gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c |  3 ++-
 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c  |  3 ++-
 gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c  |  3 ++-
 6 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 6fe41c3c2..6a2444eb6 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1875,8 +1875,7 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return TARGET_AVX && TARGET_VZEROUPPER
-	&& flag_expensive_optimizations && !optimize_size;
+      return TARGET_AVX && TARGET_VZEROUPPER;
     }
 
   virtual unsigned int execute (function *)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index ff44ad4e0..74e969b68 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
     sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
 
   if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
-      && TARGET_EMIT_VZEROUPPER)
+      && TARGET_EMIT_VZEROUPPER
+      && flag_expensive_optimizations
+      && !optimize_size)
     opts->x_target_flags |= MASK_VZEROUPPER;
   if (!(opts_set->x_target_flags & MASK_STV))
     opts->x_target_flags |= MASK_STV;
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
index e694d4048..5a40e8783 100644
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
@@ -16,5 +16,6 @@ foo ()
   _mm256_zeroupper ();
 }
 
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
new file mode 100644
index 000000000..4af637757
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
+
+#include <immintrin.h>
+
+extern __m256 x, y;
+
+void
+foo ()
+{
+  x = y;
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
index ab6d68779..75fe58897 100644
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
@@ -12,4 +12,5 @@ foo ()
   _mm256_zeroupper ();
 }
 
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
index 974e1626a..fa0a6dfca 100644
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
@@ -15,4 +15,5 @@ foo ()
   _mm256_zeroupper ();
 }
 
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
-- 
2.31.1