summaryrefslogtreecommitdiff
path: root/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
diff options
context:
space:
mode:
Diffstat (limited to '0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch')
-rw-r--r--0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch184
1 files changed, 184 insertions, 0 deletions
diff --git a/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch b/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
new file mode 100644
index 0000000..aab1bb6
--- /dev/null
+++ b/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
@@ -0,0 +1,184 @@
+From f6652dbebf81372884e9fd8b68627fc7a94d8d3b Mon Sep 17 00:00:00 2001
+From: Roger Sayle <roger@nextmovesoftware.com>
+Date: Fri, 27 May 2022 08:57:46 +0100
+Subject: [PATCH 145/157] [Backport][SME] Canonicalize X&-Y as X*Y in match.pd
+ when Y is [0,1].
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8fb94fc6097c0a934aac0d89c9c5e2038da67655
+
+"For every pessimization, there's an equal and opposite optimization".
+
+In the review of my original patch for PR middle-end/98865, Richard
+Biener pointed out that match.pd shouldn't be transforming X*Y into
+X&-Y as the former is considered cheaper by tree-ssa's cost model
+(operator count). A corollary of this is that we should instead be
+transforming X&-Y into the cheaper X*Y as a preferred canonical form
+(especially as RTL expansion now intelligently selects the appropriate
+implementation based on the target's costs).
+
+With this patch we now generate identical code for:
+int foo(int x, int y) { return -(x&1) & y; }
+int bar(int x, int y) { return (x&1) * y; }
+
+specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2,
+but both use and/mul with -Os.
+
+One minor wrinkle/improvement is that this patch includes three
+additional optimizations (that account for the change in canonical
+form) to continue to optimize PR92834 and PR94786.
+
+2022-05-27 Roger Sayle <roger@nextmovesoftware.com>
+
+gcc/ChangeLog
+ * match.pd (match_zero_one_valued_p): New predicate.
+ (mult @0 @1): Use zero_one_valued_p for optimization to the
+ expression "bit_and @0 @1".
+ (bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR.
+ (plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform.
+ (minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise.
+ (bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise.
+ Remove three redundant transforms obsoleted by the three above.
+
+gcc/testsuite/ChangeLog
+ * gcc.dg/pr98865.c: New test case.
+---
+ gcc/match.pd | 86 ++++++++++++++++------------------
+ gcc/testsuite/gcc.dg/pr98865.c | 14 ++++++
+ 2 files changed, 55 insertions(+), 45 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/pr98865.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index aee58e47b..6d3165bcd 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ || !COMPLEX_FLOAT_TYPE_P (type)))
+ (negate @0)))
+
+-/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */
+-(simplify
+- (mult SSA_NAME@1 SSA_NAME@2)
+- (if (INTEGRAL_TYPE_P (type)
+- && get_nonzero_bits (@1) == 1
+- && get_nonzero_bits (@2) == 1)
+- (bit_and @1 @2)))
+-
+ /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+ unless the target has native support for the former but not the latter. */
+ (simplify
+@@ -1790,6 +1782,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ (bit_not (bit_not @0))
+ @0)
+
++(match zero_one_valued_p
++ @0
++ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1)))
++(match zero_one_valued_p
++ truth_valued_p@0)
++
++/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }. */
++(simplify
++ (mult zero_one_valued_p@0 zero_one_valued_p@1)
++ (if (INTEGRAL_TYPE_P (type))
++ (bit_and @0 @1)))
++
++/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */
++(simplify
++ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
++ (if (INTEGRAL_TYPE_P (type)
++ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
++ && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE
++ && !TYPE_UNSIGNED (TREE_TYPE (@0)))
++ (mult (convert @0) @1)))
++
+ /* Convert ~ (-A) to A - 1. */
+ (simplify
+ (bit_not (convert? (negate @0)))
+@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ (cmp @0 (minmax:c @0 @1))
+ { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } ))
+
+-/* Undo fancy way of writing max/min or other ?: expressions,
+- like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
++/* Undo fancy ways of writing max/min or other ?: expressions, like
++ a - ((a - b) & -(a < b)) and a - (a - b) * (a < b) into (a < b) ? b : a.
+ People normally use ?: and that is what we actually try to optimize. */
+-(for cmp (simple_comparison)
+- (simplify
+- (minus @0 (bit_and:c (minus @0 @1)
+- (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+- (if (INTEGRAL_TYPE_P (type)
+- && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+- && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+- || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+- && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+- (cond (cmp @2 @3) @1 @0)))
+- (simplify
+- (plus:c @0 (bit_and:c (minus @1 @0)
+- (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+- (if (INTEGRAL_TYPE_P (type)
+- && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+- && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+- || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+- && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+- (cond (cmp @2 @3) @1 @0)))
+- /* Similarly with ^ instead of - though in that case with :c. */
+- (simplify
+- (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1)
+- (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+- (if (INTEGRAL_TYPE_P (type)
+- && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+- && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+- || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+- && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+- (cond (cmp @2 @3) @1 @0))))
++/* Transform A + (B-A)*cmp into cmp ? B : A. */
++(simplify
++ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2))
++ (if (INTEGRAL_TYPE_P (type)
++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
++ (cond (convert:boolean_type_node @2) @1 @0)))
++/* Transform A - (A-B)*cmp into cmp ? B : A. */
++(simplify
++ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2))
++ (if (INTEGRAL_TYPE_P (type)
++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
++ (cond (convert:boolean_type_node @2) @1 @0)))
++/* Transform A ^ (A^B)*cmp into cmp ? B : A. */
++(simplify
++ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2))
++ (if (INTEGRAL_TYPE_P (type)
++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
++ (cond (convert:boolean_type_node @2) @1 @0)))
+
+ /* Simplifications of shift and rotates. */
+
+diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c
+new file mode 100644
+index 000000000..95f727033
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/pr98865.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-optimized" } */
++
++int foo(int x, int y)
++{
++ return -(x&1) & y;
++}
++
++int bar(int x, int y)
++{
++ return (x&1) * y;
++}
++
++/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */
+--
+2.33.0
+