summaryrefslogtreecommitdiff
path: root/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-10-17 02:15:03 +0000
committerCoprDistGit <infra@openeuler.org>2023-10-17 02:15:03 +0000
commitcc47ed6ddebfece0584ad7ee706549614d16c0f0 (patch)
tree973a28470803b27c914f813f43d43f8932763ea3 /0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
parent1e2198a988cc8d2ea55ab6ca2a1835e60149ab5c (diff)
automatic import of gccopeneuler22.03_LTS_SP2
Diffstat (limited to '0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch')
-rw-r--r--0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch218
1 files changed, 218 insertions, 0 deletions
diff --git a/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
new file mode 100644
index 0000000..35b773e
--- /dev/null
+++ b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
@@ -0,0 +1,218 @@
+From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001
+From: Jakub Jelinek <jakub@redhat.com>
+Date: Tue, 5 Jan 2021 16:35:22 +0100
+Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31)
+ ^ y [PR96928]
+
+Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146
+
+As requested in the PR, the one's complement abs can be done more
+efficiently without cmov or branching.
+
+Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
+it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
+code, but with -m64:
+ movl %edi, %eax
+- notl %eax
+- cmpl %edi, %eax
+- cmovl %edi, %eax
++ sarl $31, %eax
++ xorl %edi, %eax
+ ret
+
+2021-01-05 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/96928
+ * tree-ssa-phiopt.c (xor_replacement): New function.
+ (tree_ssa_phiopt_worker): Call it.
+
+ * gcc.dg/tree-ssa/pr96928.c: New test.
+ * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
+ instead of scanning rtl dump for ifcvt message check assembly
+ for xor instruction.
+---
+ gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++
+ gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++
+ 2 files changed, 146 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
+
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
+new file mode 100644
+index 000000000..209135726
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
+@@ -0,0 +1,38 @@
++/* PR tree-optimization/96928 */
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-phiopt2" } */
++/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
++/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
++/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
++/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
++/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
++
++int
++foo (int a)
++{
++ return a < 0 ? ~a : a;
++}
++
++int
++bar (int a, int b)
++{
++ return a < 0 ? ~b : b;
++}
++
++unsigned
++baz (int a, unsigned int b)
++{
++ return a < 0 ? ~b : b;
++}
++
++unsigned
++qux (int a, unsigned int c)
++{
++ return a >= 0 ? ~c : c;
++}
++
++int
++corge (int a, int b)
++{
++ return a >= 0 ? b : ~b;
++}
+diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
+index 707a5882e..b9cd07a60 100644
+--- a/gcc/tree-ssa-phiopt.c
++++ b/gcc/tree-ssa-phiopt.c
+@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block,
+ edge, edge, gimple *, tree, tree);
+ static bool abs_replacement (basic_block, basic_block,
+ edge, edge, gimple *, tree, tree);
++static bool xor_replacement (basic_block, basic_block,
++ edge, edge, gimple *, tree, tree);
+ static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
+ edge, edge, gimple *,
+ tree, tree);
+@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
+ cfgchanged = true;
+ else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
++ else if (!early_p
++ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
++ cfgchanged = true;
+ else if (!early_p
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
+ e2, phi, arg0,
+@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
+ return true;
+ }
+
++/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
++
++static bool
++xor_replacement (basic_block cond_bb, basic_block middle_bb,
++ edge e0 ATTRIBUTE_UNUSED, edge e1,
++ gimple *phi, tree arg0, tree arg1)
++{
++ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
++ return false;
++
++ /* OTHER_BLOCK must have only one executable statement which must have the
++ form arg0 = ~arg1 or arg1 = ~arg0. */
++
++ gimple *assign = last_and_only_stmt (middle_bb);
++ /* If we did not find the proper one's complement assignment, then we cannot
++ optimize. */
++ if (assign == NULL)
++ return false;
++
++ /* If we got here, then we have found the only executable statement
++ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
++ arg1 = ~arg0, then we cannot optimize. */
++ if (!is_gimple_assign (assign))
++ return false;
++
++ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
++ return false;
++
++ tree lhs = gimple_assign_lhs (assign);
++ tree rhs = gimple_assign_rhs1 (assign);
++
++ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
++ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
++ return false;
++
++ gimple *cond = last_stmt (cond_bb);
++ tree result = PHI_RESULT (phi);
++
++ /* Only relationals comparing arg[01] against zero are interesting. */
++ enum tree_code cond_code = gimple_cond_code (cond);
++ if (cond_code != LT_EXPR && cond_code != GE_EXPR)
++ return false;
++
++ /* Make sure the conditional is x OP 0. */
++ tree clhs = gimple_cond_lhs (cond);
++ if (TREE_CODE (clhs) != SSA_NAME
++ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
++ || TYPE_UNSIGNED (TREE_TYPE (clhs))
++ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
++ || !integer_zerop (gimple_cond_rhs (cond)))
++ return false;
++
++ /* We need to know which is the true edge and which is the false
++ edge so that we know if have xor or inverted xor. */
++ edge true_edge, false_edge;
++ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
++
++ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
++ will need to invert the result. Similarly for LT_EXPR if
++ the false edge goes to OTHER_BLOCK. */
++ edge e;
++ if (cond_code == GE_EXPR)
++ e = true_edge;
++ else
++ e = false_edge;
++
++ bool invert = e->dest == middle_bb;
++
++ result = duplicate_ssa_name (result, NULL);
++
++ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
++
++ int prec = TYPE_PRECISION (TREE_TYPE (clhs));
++ gimple *new_stmt
++ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
++ build_int_cst (integer_type_node, prec - 1));
++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
++
++ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
++ {
++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
++ NOP_EXPR, gimple_assign_lhs (new_stmt));
++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
++ }
++ lhs = gimple_assign_lhs (new_stmt);
++
++ if (invert)
++ {
++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
++ BIT_NOT_EXPR, rhs);
++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
++ rhs = gimple_assign_lhs (new_stmt);
++ }
++
++ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
++ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
++
++ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
++
++ /* Note that we optimized this PHI. */
++ return true;
++}
++
+ /* Auxiliary functions to determine the set of memory accesses which
+ can't trap because they are preceded by accesses to the same memory
+ portion. We do that for MEM_REFs, so we only need to track
+--
+2.27.0.windows.1
+