1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
|
From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 5 Jan 2021 16:35:22 +0100
Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31)
^ y [PR96928]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146
As requested in the PR, the one's complement abs can be done more
efficiently without cmov or branching.
Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
code, but with -m64:
movl %edi, %eax
- notl %eax
- cmpl %edi, %eax
- cmovl %edi, %eax
+ sarl $31, %eax
+ xorl %edi, %eax
ret
2021-01-05 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/96928
* tree-ssa-phiopt.c (xor_replacement): New function.
(tree_ssa_phiopt_worker): Call it.
* gcc.dg/tree-ssa/pr96928.c: New test.
* gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
instead of scanning rtl dump for ifcvt message check assembly
for xor instruction.
---
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++
gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++
2 files changed, 146 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
new file mode 100644
index 000000000..209135726
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
@@ -0,0 +1,38 @@
+/* PR tree-optimization/96928 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
+
+int
+foo (int a)
+{
+ return a < 0 ? ~a : a;
+}
+
+int
+bar (int a, int b)
+{
+ return a < 0 ? ~b : b;
+}
+
+unsigned
+baz (int a, unsigned int b)
+{
+ return a < 0 ? ~b : b;
+}
+
+unsigned
+qux (int a, unsigned int c)
+{
+ return a >= 0 ? ~c : c;
+}
+
+int
+corge (int a, int b)
+{
+ return a >= 0 ? b : ~b;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 707a5882e..b9cd07a60 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
+static bool xor_replacement (basic_block, basic_block,
+ edge, edge, gimple *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
edge, edge, gimple *,
tree, tree);
@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
+ else if (!early_p
+ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
else if (!early_p
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
+/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
+
+static bool
+xor_replacement (basic_block cond_bb, basic_block middle_bb,
+ edge e0 ATTRIBUTE_UNUSED, edge e1,
+ gimple *phi, tree arg0, tree arg1)
+{
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
+ return false;
+
+ /* OTHER_BLOCK must have only one executable statement which must have the
+ form arg0 = ~arg1 or arg1 = ~arg0. */
+
+ gimple *assign = last_and_only_stmt (middle_bb);
+ /* If we did not find the proper one's complement assignment, then we cannot
+ optimize. */
+ if (assign == NULL)
+ return false;
+
+ /* If we got here, then we have found the only executable statement
+ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
+ arg1 = ~arg0, then we cannot optimize. */
+ if (!is_gimple_assign (assign))
+ return false;
+
+ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
+ return false;
+
+ tree lhs = gimple_assign_lhs (assign);
+ tree rhs = gimple_assign_rhs1 (assign);
+
+ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
+ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
+ return false;
+
+ gimple *cond = last_stmt (cond_bb);
+ tree result = PHI_RESULT (phi);
+
+ /* Only relationals comparing arg[01] against zero are interesting. */
+ enum tree_code cond_code = gimple_cond_code (cond);
+ if (cond_code != LT_EXPR && cond_code != GE_EXPR)
+ return false;
+
+ /* Make sure the conditional is x OP 0. */
+ tree clhs = gimple_cond_lhs (cond);
+ if (TREE_CODE (clhs) != SSA_NAME
+ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
+ || TYPE_UNSIGNED (TREE_TYPE (clhs))
+ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
+ || !integer_zerop (gimple_cond_rhs (cond)))
+ return false;
+
+ /* We need to know which is the true edge and which is the false
+ edge so that we know if have xor or inverted xor. */
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+
+ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
+ will need to invert the result. Similarly for LT_EXPR if
+ the false edge goes to OTHER_BLOCK. */
+ edge e;
+ if (cond_code == GE_EXPR)
+ e = true_edge;
+ else
+ e = false_edge;
+
+ bool invert = e->dest == middle_bb;
+
+ result = duplicate_ssa_name (result, NULL);
+
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+
+ int prec = TYPE_PRECISION (TREE_TYPE (clhs));
+ gimple *new_stmt
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
+ build_int_cst (integer_type_node, prec - 1));
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+
+ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
+ {
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
+ NOP_EXPR, gimple_assign_lhs (new_stmt));
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ }
+ lhs = gimple_assign_lhs (new_stmt);
+
+ if (invert)
+ {
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
+ BIT_NOT_EXPR, rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ rhs = gimple_assign_lhs (new_stmt);
+ }
+
+ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
+
+ /* Note that we optimized this PHI. */
+ return true;
+}
+
/* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track
--
2.27.0.windows.1
|