summaryrefslogtreecommitdiff
path: root/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch
blob: d3acf4cef5228e8c9120db680fa198025bc38657 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 4 Nov 2020 11:55:29 +0100
Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x <<
 10 [PR97690]

Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab

The following patch generalizes the x ? 1 : 0 -> (int) x optimization
to handle also left shifts by constant.

During x86_64-linux and i686-linux bootstraps + regtests it triggered
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
filename, function name and shift count) and 1866 -m32 cases.

Unfortunately, the patch regresses (before the tests have been adjusted):
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects  scan-tree-dump-times slp1 "optimized: basic block" 1
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1
and in both cases it actually results in worse code.

> > We'd need some optimization that would go through all PHI edges and
> > compute if some use of the phi results don't actually compute a constant
> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0.

> PRE should do this, IMHO only optimizing it at -O2 is fine.

> > Similarly, in the slp vectorization test there is:
> >      a[0] = b[0] ? 1 : 7;

> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ...

> So the option is to put : 7 in the 2, 4 an 8 case as well.  The testcase
> wasn't added for any real-world case but is artificial I guess for
> COND_EXPR handling of invariants.

> But yeah, for things like SLP it means we eventually have to
> implement reverse transforms for all of this to make the lanes
> matching.  But that's true anyway for things like x + 1 vs. x + 0
> or x / 3 vs. x / 2 or other simplifications we do.

2020-11-04  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/97690
	* tree-ssa-phiopt.c (conditional_replacement): Also optimize
	cond ? pow2p_cst : 0 as ((type) cond) << cst.

	* gcc.dg/tree-ssa/phi-opt-22.c: New test.
	* gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1.
	* gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and
	? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0.
---
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c   | 11 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c   |  2 +-
 gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c |  6 ++--
 gcc/tree-ssa-phiopt.c                        | 38 ++++++++++++++------
 4 files changed, 43 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
new file mode 100644
index 000000000..fd3706666
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/97690 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+
+int foo (_Bool d) { return d ? 2 : 0; }
+int bar (_Bool d) { return d ? 1 : 0; }
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
+int qux (_Bool d) { return d ? 1024 : 0; }
+
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
index 36b8e7fc8..d70ea5a01 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */ 
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 /* Test for CPROP across a DAG. */
 
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
index d32cb7585..e64f0115a 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride)
   for (i = 0; i < N/stride; i++, a += stride, b += stride)
    {
      a[0] = b[0] ? 1 : 7;
-     a[1] = b[1] ? 2 : 0;
+     a[1] = b[1] ? 2 : 7;
      a[2] = b[2] ? 3 : 0;
-     a[3] = b[3] ? 4 : 0;
+     a[3] = b[3] ? 4 : 7;
      a[4] = b[4] ? 5 : 0;
      a[5] = b[5] ? 6 : 0;
      a[6] = b[6] ? 7 : 0;
-     a[7] = b[7] ? 8 : 0;
+     a[7] = b[7] ? 8 : 7;
    }
 }
 
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 591b6435f..85587e8d1 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
   gimple_stmt_iterator gsi;
   edge true_edge, false_edge;
   tree new_var, new_var2;
-  bool neg;
+  bool neg = false;
+  int shift = 0;
+  tree nonzero_arg;
 
   /* FIXME: Gimplification of complex type is too hard for now.  */
   /* We aren't prepared to handle vectors either (and it is a question
@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
 	   || POINTER_TYPE_P (TREE_TYPE (arg1))))
     return false;
 
-  /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
-     convert it to the conditional.  */
-  if ((integer_zerop (arg0) && integer_onep (arg1))
-      || (integer_zerop (arg1) && integer_onep (arg0)))
-    neg = false;
-  else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
-	   || (integer_zerop (arg1) && integer_all_onesp (arg0)))
+  /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
+     0 and (1 << cst), then convert it to the conditional.  */
+  if (integer_zerop (arg0))
+    nonzero_arg = arg1;
+  else if (integer_zerop (arg1))
+    nonzero_arg = arg0;
+  else
+    return false;
+  if (integer_all_onesp (nonzero_arg))
     neg = true;
+  else if (integer_pow2p (nonzero_arg))
+    {
+      shift = tree_log2 (nonzero_arg);
+      if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
+	return false;
+    }
   else
     return false;
 
@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
      falls through into BB.
 
      There is a single PHI node at the join point (BB) and its arguments
-     are constants (0, 1) or (0, -1).
+     are constants (0, 1) or (0, -1) or (0, (1 << shift)).
 
      So, given the condition COND, and the two PHI arguments, we can
      rewrite this PHI into non-branching code:
 
-       dest = (COND) or dest = COND'
+       dest = (COND) or dest = COND' or dest = (COND) << shift
 
      We use the condition as-is if the argument associated with the
      true edge has the value one or the argument associated with the
@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
       cond = fold_build1_loc (gimple_location (stmt),
                               NEGATE_EXPR, TREE_TYPE (cond), cond);
     }
+  else if (shift)
+    {
+      cond = fold_convert_loc (gimple_location (stmt),
+			       TREE_TYPE (result), cond);
+      cond = fold_build2_loc (gimple_location (stmt),
+			      LSHIFT_EXPR, TREE_TYPE (cond), cond,
+			      build_int_cst (integer_type_node, shift));
+    }
 
   /* Insert our new statements at the end of conditional block before the
      COND_STMT.  */
-- 
2.27.0.windows.1