summaryrefslogtreecommitdiff
path: root/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
blob: 6f99e5cd8d50b9b5c17e50976412c7aab94aebbc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Sat, 29 Jul 2023 18:27:10 +0800
Subject: [PATCH 18/22] [ccmp] Add another optimization opportunity for ccmp
 instruction

Add flag -fccmp2.
Enables the use of the ccmp instruction by creating a new conflict
relationship for instances where temporary expressions replacement
cannot be effectively created.
---
 gcc/ccmp.cc                               |  33 ++++
 gcc/ccmp.h                                |   1 +
 gcc/common.opt                            |   4 +
 gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  15 ++
 gcc/tree-ssa-coalesce.cc                  | 197 ++++++++++++++++++++++
 5 files changed, 250 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c

diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
index 3db0a264e..e34f3bcc6 100644
--- a/gcc/ccmp.cc
+++ b/gcc/ccmp.cc
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgexpand.h"
 #include "ccmp.h"
 #include "predict.h"
+#include "gimple-iterator.h"
 
 /* Check whether T is a simple boolean variable or a SSA name
    set by a comparison operator in the same basic block.  */
@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
   return false;
 }
 
+/* Check whether bb is a potential conditional compare candidate.  */
+bool
+check_ccmp_candidate (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  gimple *bb_last_stmt, *stmt;
+  tree op0, op1;
+
+  gsi = gsi_last_bb (bb);
+  bb_last_stmt = gsi_stmt (gsi);
+
+  if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
+    {
+      op0 = gimple_cond_lhs (bb_last_stmt);
+      op1 = gimple_cond_rhs (bb_last_stmt);
+
+      if (TREE_CODE (op0) == SSA_NAME
+	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
+	  && TREE_CODE (op1) == INTEGER_CST
+	  && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
+	      || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
+	{
+	  stmt = SSA_NAME_DEF_STMT (op0);
+	  if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
+	    {
+	      return ccmp_candidate_p (stmt);
+	    }
+	}
+    }
+  return false;
+}
+
 /* Extract the comparison we want to do from the tree.  */
 void
 get_compare_parts (tree t, int *up, rtx_code *rcode,
diff --git a/gcc/ccmp.h b/gcc/ccmp.h
index 1799d5fed..efe3a1c14 100644
--- a/gcc/ccmp.h
+++ b/gcc/ccmp.h
@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_CCMP_H
 
 extern rtx expand_ccmp_expr (gimple *, machine_mode);
+extern bool check_ccmp_candidate (basic_block bb);
 
 #endif  /* GCC_CCMP_H  */
diff --git a/gcc/common.opt b/gcc/common.opt
index 4d91ce8cf..0aa516719 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2017,6 +2017,10 @@ fira-verbose=
 Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
 -fira-verbose=<number>	Control IRA's level of diagnostic messages.
 
+fccmp2
+Common Var(flag_ccmp2) Init(0) Optimization
+Optimize potential ccmp instruction in complex scenarios.
+
 fivopts
 Common Var(flag_ivopts) Init(1) Optimization
 Optimize induction variables on trees.
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
new file mode 100644
index 000000000..b509ba810
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
+
+int func (int a, int b, int c)
+{
+  while(1)
+    {
+      if(a-- == 0 || b >= c)
+	{
+	  return 1;
+	}
+    }
+}
+
+/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc
index dccf41ab8..195e06428 100644
--- a/gcc/tree-ssa-coalesce.cc
+++ b/gcc/tree-ssa-coalesce.cc
@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "explow.h"
 #include "tree-dfa.h"
 #include "stor-layout.h"
+#include "ccmp.h"
+#include "target.h"
+#include "tree-outof-ssa.h"
 
 /* This set of routines implements a coalesce_list.  This is an object which
    is used to track pairs of ssa_names which are desirable to coalesce
@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
   bitmap_clear (&ptr->live_base_var);
 }
 
+/* Return true if gimple is a copy assignment.  */
+
+static inline bool
+gimple_is_assign_copy_p (gimple *gs)
+{
+  return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
+	  && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
+	  && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
+}
+
+#define MAX_CCMP_CONFLICT_NUM 5
+
+/* Clear high-cost conflict graphs.  */
+
+static void
+remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
+{
+  unsigned x = 0;
+  int add_conflict_num = 0;
+  bitmap b;
+  FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
+    {
+      if (b)
+	{
+	  add_conflict_num++;
+	}
+    }
+  if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
+    {
+      conflict_graph->conflicts.release ();
+    }
+}
+
+/* Adding a new conflict graph to the original graph.  */
+
+static void
+process_add_graph (live_track *live, basic_block bb,
+		   ssa_conflicts *conflict_graph)
+{
+  tree use, def;
+  ssa_op_iter iter;
+  gimple *first_visit_stmt = NULL;
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+       gsi_next (&gsi))
+    {
+      if (gimple_visited_p (gsi_stmt (gsi)))
+	{
+	  first_visit_stmt = gsi_stmt (gsi);
+	  break;
+	}
+    }
+  if (!first_visit_stmt)
+    return;
+
+  for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
+       gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
+	{
+	  continue;
+	}
+      if (gimple_is_assign_copy_p (stmt))
+	{
+	  live_track_clear_var (live, gimple_assign_rhs1 (stmt));
+	}
+      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
+	{
+	  live_track_process_def (live, def, conflict_graph);
+	}
+      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+	{
+	  live_track_process_use (live, use);
+	}
+    }
+}
+
+/* Build a conflict graph based on ccmp candidate.  */
+
+static void
+add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
+			 tree_live_info_p liveinfo, var_map map, basic_block bb)
+{
+  live_track *live;
+  tree use, def;
+  ssa_op_iter iter;
+  live = new_live_track (map);
+  live_track_init (live, live_on_exit (liveinfo, bb));
+
+  gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
+  gcc_assert (gimple_cond_lhs (last_stmt));
+
+  auto_vec<tree> stack;
+  stack.safe_push (gimple_cond_lhs (last_stmt));
+  while (!stack.is_empty ())
+    {
+      tree op = stack.pop ();
+      gimple *op_stmt = SSA_NAME_DEF_STMT (op);
+      if (!op_stmt || gimple_bb (op_stmt) != bb
+	  || !is_gimple_assign (op_stmt)
+	  || !ssa_is_replaceable_p (op_stmt))
+	{
+	  continue;
+	}
+      if (gimple_is_assign_copy_p (op_stmt))
+	{
+	  live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
+	}
+      gimple_set_visited (op_stmt, true);
+      FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
+	{
+	  live_track_process_def (live, def, conflict_graph);
+	}
+      FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
+	{
+	  stack.safe_push (use);
+	  live_track_process_use (live, use);
+	}
+    }
+
+  process_add_graph (live, bb, conflict_graph);
+  delete_live_track (live);
+  remove_high_cost_graph_for_ccmp (conflict_graph);
+}
+
+/* Determine whether the ccmp conflict graph can be added.
+   i.e,
+
+   ;;   basic block 3, loop depth 1
+   ;;    pred:		2
+   ;;		     	3
+   # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
+   _7 = b_4 (D) >= c_5 (D);
+   _8 = ivtmp.5_10 == 0;
+   _9 = _7 | _8;
+   ivtmp.5_11 = ivtmp.5_10 - 1;
+   if (_9 != 0)
+     goto <bb 4>; [10.70%]
+   else
+     goto <bb 3>; [89.30%]
+
+   In the above loop, the expression will be replaced:
+
+   _7 replaced by b_4 (D) >= c_5 (D)
+   _8 replaced by ivtmp.5_10 == 0
+
+   If the current case want use the ccmp instruction, then
+
+   _9 can replaced by _7 | _8
+
+   So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
+   partitions.
+
+   Now this function can achieve this ability.  */
+
+static void
+determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
+				   var_map map, ssa_conflicts *graph)
+{
+  if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
+    return;
+  for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
+       gsi_next (&bsi))
+    {
+      gimple_set_visited (gsi_stmt (bsi), false);
+    }
+  ssa_conflicts *ccmp_conflict_graph;
+  ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
+  add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
+  unsigned x;
+  bitmap b;
+  if (ccmp_conflict_graph)
+    {
+      FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
+	{
+	  if (!b)
+	    continue;
+	  unsigned y = bitmap_first_set_bit (b);
+	  if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
+	    {
+	      ssa_conflicts_add (graph, x, y);
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file, "potential ccmp: add additional "
+				      "conflict-ssa : bb[%d]  %d:%d\n",
+			   bb->index, x, y);
+		}
+	    }
+	}
+    }
+  ssa_conflicts_delete (ccmp_conflict_graph);
+}
 
 /* Build a conflict graph based on LIVEINFO.  Any partitions which are in the
    partition view of the var_map liveinfo is based on get entries in the
@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
 	    live_track_process_use (live, var);
 	}
 
+	determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
+
       /* If result of a PHI is unused, looping over the statements will not
 	 record any conflicts since the def was never live.  Since the PHI node
 	 is going to be translated out of SSA form, it will insert a copy.
-- 
2.33.0