summaryrefslogtreecommitdiff
path: root/0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch
blob: 6f89af0b59810287568674b79cd19e55140738aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
From 96898a9cd8c159625848247bd2f3a09e5c12fcfa Mon Sep 17 00:00:00 2001
From: Hongyu Wang <hongyu.wang@intel.com>
Date: Sat, 19 Nov 2022 09:38:00 +0800
Subject: [PATCH 13/22] i386: Only enable small loop unrolling in backend [PR
 107692]

Followed by the discussion in pr107692, -munroll-only-small-loops
Does not turns on/off -funroll-loops, and current check in
pass_rtl_unroll_loops::gate would cause -fno-unroll-loops do not take
effect. Revert the change about targetm.loop_unroll_adjust and apply
the backend option change to strictly follow the rule that
-funroll-loops takes full control of loop unrolling, and
munroll-only-small-loops just change its behavior to unroll small size
loops.

gcc/ChangeLog:

	PR target/107692
	* common/config/i386/i386-common.cc (ix86_optimization_table):
	Enable loop unroll O2, disable -fweb and -frename-registers
	by default.
	* config/i386/i386-options.cc
	(ix86_override_options_after_change):
	Disable small loop unroll when funroll-loops enabled, reset
	cunroll_grow_size when it is not explicitly enabled.
	(ix86_option_override_internal): Call
	ix86_override_options_after_change instead of calling
	ix86_recompute_optlev_based_flags and ix86_default_align
	separately.
	* config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll
	factor if -munroll-only-small-loops enabled.
	* loop-init.cc (pass_rtl_unroll_loops::gate): Do not enable
	loop unrolling for -O2-speed.
	(pass_rtl_unroll_loops::execute): Rmove
	targetm.loop_unroll_adjust check.

gcc/testsuite/ChangeLog:

	PR target/107692
	* gcc.dg/guality/loop-1.c: Remove additional option for ia32.
	* gcc.target/i386/pr86270.c: Add -fno-unroll-loops.
	* gcc.target/i386/pr93002.c: Likewise.
---
 gcc/common/config/i386/i386-common.cc   |  8 ++++++
 gcc/config/i386/i386-options.cc         | 34 ++++++++++++++++++++++---
 gcc/config/i386/i386.cc                 | 18 ++++---------
 gcc/loop-init.cc                        | 10 +++-----
 gcc/testsuite/gcc.dg/guality/loop-1.c   |  2 --
 gcc/testsuite/gcc.target/i386/pr86270.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr93002.c |  2 +-
 7 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index cdd5caa55..f650e255f 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1687,7 +1687,15 @@ static const struct default_options ix86_option_optimization_table[] =
     /* The STC algorithm produces the smallest code at -Os, for x86.  */
     { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL,
       REORDER_BLOCKS_ALGORITHM_STC },
+
+    /* Turn on -funroll-loops with -munroll-only-small-loops to enable small
+       loop unrolling at -O2.  */
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
     { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
+    /* Turns off -frename-registers and -fweb which are enabled by
+       funroll-loops.  */
+    { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
+    { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
     /* Turn off -fschedule-insns by default.  It tends to make the
        problem with not enough registers even worse.  */
     { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 099cec4b6..ff44ad4e0 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1816,8 +1816,37 @@ ix86_recompute_optlev_based_flags (struct gcc_options *opts,
 void
 ix86_override_options_after_change (void)
 {
+  /* Default align_* from the processor table.  */
   ix86_default_align (&global_options);
+
   ix86_recompute_optlev_based_flags (&global_options, &global_options_set);
+
+  /* Disable unrolling small loops when there's explicit
+     -f{,no}unroll-loop.  */
+  if ((OPTION_SET_P (flag_unroll_loops))
+     || (OPTION_SET_P (flag_unroll_all_loops)
+	 && flag_unroll_all_loops))
+    {
+      if (!OPTION_SET_P (ix86_unroll_only_small_loops))
+	ix86_unroll_only_small_loops = 0;
+      /* Re-enable -frename-registers and -fweb if funroll-loops
+	 enabled.  */
+      if (!OPTION_SET_P (flag_web))
+	flag_web = flag_unroll_loops;
+      if (!OPTION_SET_P (flag_rename_registers))
+	flag_rename_registers = flag_unroll_loops;
+      /* -fcunroll-grow-size default follws -f[no]-unroll-loops.  */
+      if (!OPTION_SET_P (flag_cunroll_grow_size))
+	flag_cunroll_grow_size = flag_unroll_loops
+				 || flag_peel_loops
+				 || optimize >= 3;
+    }
+  else
+    {
+      if (!OPTION_SET_P (flag_cunroll_grow_size))
+	flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
+    }
+
 }
 
 /* Clear stack slot assignments remembered from previous functions.
@@ -2329,7 +2358,7 @@ ix86_option_override_internal (bool main_args_p,
 
   set_ix86_tune_features (opts, ix86_tune, opts->x_ix86_dump_tunes);
 
-  ix86_recompute_optlev_based_flags (opts, opts_set);
+  ix86_override_options_after_change ();
 
   ix86_tune_cost = processor_cost_table[ix86_tune];
   /* TODO: ix86_cost should be chosen at instruction or function granuality
@@ -2360,9 +2389,6 @@ ix86_option_override_internal (bool main_args_p,
       || TARGET_64BIT_P (opts->x_ix86_isa_flags))
     opts->x_ix86_regparm = REGPARM_MAX;
 
-  /* Default align_* from the processor table.  */
-  ix86_default_align (opts);
-
   /* Provide default for -mbranch-cost= value.  */
   SET_OPTION_IF_UNSET (opts, opts_set, ix86_branch_cost,
 		       ix86_tune_cost->branch_cost);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e56004300..462dce10e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23572,20 +23572,12 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
 
   /* Unroll small size loop when unroll factor is not explicitly
      specified.  */
-  if (!(flag_unroll_loops
-	|| flag_unroll_all_loops
-	|| loop->unroll))
+  if (ix86_unroll_only_small_loops && !loop->unroll)
     {
-      nunroll = 1;
-
-      /* Any explicit -f{no-}unroll-{all-}loops turns off
-	 -munroll-only-small-loops.  */
-      if (ix86_unroll_only_small_loops
-	  && !OPTION_SET_P (flag_unroll_loops)
-	  && loop->ninsns <= ix86_cost->small_unroll_ninsns)
-	nunroll = ix86_cost->small_unroll_factor;
-
-      return nunroll;
+      if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
+	return MIN (nunroll, ix86_cost->small_unroll_factor);
+      else
+	return 1;
     }
 
   if (!TARGET_ADJUST_UNROLL)
diff --git a/gcc/loop-init.cc b/gcc/loop-init.cc
index f1c717041..1e4f6cfd7 100644
--- a/gcc/loop-init.cc
+++ b/gcc/loop-init.cc
@@ -565,12 +565,9 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *fun)
+  virtual bool gate (function *)
     {
-      return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll
-	      || (targetm.loop_unroll_adjust
-		  && optimize >= 2
-		  && optimize_function_for_speed_p (fun)));
+      return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
@@ -586,8 +583,7 @@ pass_rtl_unroll_loops::execute (function *fun)
       if (dump_file)
 	df_dump (dump_file);
 
-      if (flag_unroll_loops
-	  || targetm.loop_unroll_adjust)
+      if (flag_unroll_loops)
 	flags |= UAP_UNROLL;
       if (flag_unroll_all_loops)
 	flags |= UAP_UNROLL_ALL;
diff --git a/gcc/testsuite/gcc.dg/guality/loop-1.c b/gcc/testsuite/gcc.dg/guality/loop-1.c
index a32ea445a..1b1f6d322 100644
--- a/gcc/testsuite/gcc.dg/guality/loop-1.c
+++ b/gcc/testsuite/gcc.dg/guality/loop-1.c
@@ -1,7 +1,5 @@
 /* { dg-do run } */
 /* { dg-options "-fno-tree-scev-cprop -fno-tree-vectorize -g" } */
-/* { dg-additional-options "-mno-unroll-only-small-loops" { target ia32 } } */
-
 
 #include "../nop.h"
 
diff --git a/gcc/testsuite/gcc.target/i386/pr86270.c b/gcc/testsuite/gcc.target/i386/pr86270.c
index cbc9fbb04..98b012caf 100644
--- a/gcc/testsuite/gcc.target/i386/pr86270.c
+++ b/gcc/testsuite/gcc.target/i386/pr86270.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-unroll-only-small-loops" } */
+/* { dg-options "-O2 -fno-unroll-loops" } */
 
 int *a;
 long len;
diff --git a/gcc/testsuite/gcc.target/i386/pr93002.c b/gcc/testsuite/gcc.target/i386/pr93002.c
index f75a847f7..7e2d869e1 100644
--- a/gcc/testsuite/gcc.target/i386/pr93002.c
+++ b/gcc/testsuite/gcc.target/i386/pr93002.c
@@ -1,6 +1,6 @@
 /* PR target/93002 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-unroll-only-small-loops" } */
+/* { dg-options "-O2 -fno-unroll-loops" } */
 /* { dg-final { scan-assembler-not "cmp\[^\n\r]*-1" } } */
 
 volatile int sink;
-- 
2.33.0