diff options
author | CoprDistGit <infra@openeuler.org> | 2023-10-17 02:15:03 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-10-17 02:15:03 +0000 |
commit | cc47ed6ddebfece0584ad7ee706549614d16c0f0 (patch) | |
tree | 973a28470803b27c914f813f43d43f8932763ea3 /0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch | |
parent | 1e2198a988cc8d2ea55ab6ca2a1835e60149ab5c (diff) |
automatic import of gccopeneuler22.03_LTS_SP2
Diffstat (limited to '0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch')
-rw-r--r-- | 0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch new file mode 100644 index 0000000..35b773e --- /dev/null +++ b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch @@ -0,0 +1,218 @@ +From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Tue, 5 Jan 2021 16:35:22 +0100 +Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) + ^ y [PR96928] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146 + +As requested in the PR, the one's complement abs can be done more +efficiently without cmov or branching. + +Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize +it in ifcvt, on x86_64 with -m32 we generate in the end the exact same +code, but with -m64: + movl %edi, %eax +- notl %eax +- cmpl %edi, %eax +- cmovl %edi, %eax ++ sarl $31, %eax ++ xorl %edi, %eax + ret + +2021-01-05 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/96928 + * tree-ssa-phiopt.c (xor_replacement): New function. + (tree_ssa_phiopt_worker): Call it. + + * gcc.dg/tree-ssa/pr96928.c: New test. + * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1, + instead of scanning rtl dump for ifcvt message check assembly + for xor instruction. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++ + gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++ + 2 files changed, 146 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +new file mode 100644 +index 000000000..209135726 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +@@ -0,0 +1,38 @@ ++/* PR tree-optimization/96928 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ ++ ++int ++foo (int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++int ++bar (int a, int b) ++{ ++ return a < 0 ? ~b : b; ++} ++ ++unsigned ++baz (int a, unsigned int b) ++{ ++ return a < 0 ? ~b : b; ++} ++ ++unsigned ++qux (int a, unsigned int c) ++{ ++ return a >= 0 ? ~c : c; ++} ++ ++int ++corge (int a, int b) ++{ ++ return a >= 0 ? b : ~b; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 707a5882e..b9cd07a60 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); ++static bool xor_replacement (basic_block, basic_block, ++ edge, edge, gimple *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, + edge, edge, gimple *, + tree, tree); +@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; ++ else if (!early_p ++ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ cfgchanged = true; + else if (!early_p + && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, + e2, phi, arg0, +@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ ++ ++static bool ++xor_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0 ATTRIBUTE_UNUSED, edge e1, ++ gimple *phi, tree arg0, tree arg1) ++{ ++ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) ++ return false; ++ ++ /* OTHER_BLOCK must have only one executable statement which must have the ++ form arg0 = ~arg1 or arg1 = ~arg0. */ ++ ++ gimple *assign = last_and_only_stmt (middle_bb); ++ /* If we did not find the proper one's complement assignment, then we cannot ++ optimize. */ ++ if (assign == NULL) ++ return false; ++ ++ /* If we got here, then we have found the only executable statement ++ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or ++ arg1 = ~arg0, then we cannot optimize. */ ++ if (!is_gimple_assign (assign)) ++ return false; ++ ++ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) ++ return false; ++ ++ tree lhs = gimple_assign_lhs (assign); ++ tree rhs = gimple_assign_rhs1 (assign); ++ ++ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ ++ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) ++ return false; ++ ++ gimple *cond = last_stmt (cond_bb); ++ tree result = PHI_RESULT (phi); ++ ++ /* Only relationals comparing arg[01] against zero are interesting. */ ++ enum tree_code cond_code = gimple_cond_code (cond); ++ if (cond_code != LT_EXPR && cond_code != GE_EXPR) ++ return false; ++ ++ /* Make sure the conditional is x OP 0. */ ++ tree clhs = gimple_cond_lhs (cond); ++ if (TREE_CODE (clhs) != SSA_NAME ++ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) ++ || TYPE_UNSIGNED (TREE_TYPE (clhs)) ++ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) ++ || !integer_zerop (gimple_cond_rhs (cond))) ++ return false; ++ ++ /* We need to know which is the true edge and which is the false ++ edge so that we know if have xor or inverted xor. */ ++ edge true_edge, false_edge; ++ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); ++ ++ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we ++ will need to invert the result. Similarly for LT_EXPR if ++ the false edge goes to OTHER_BLOCK. */ ++ edge e; ++ if (cond_code == GE_EXPR) ++ e = true_edge; ++ else ++ e = false_edge; ++ ++ bool invert = e->dest == middle_bb; ++ ++ result = duplicate_ssa_name (result, NULL); ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); ++ ++ int prec = TYPE_PRECISION (TREE_TYPE (clhs)); ++ gimple *new_stmt ++ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, ++ build_int_cst (integer_type_node, prec - 1)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ ++ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs))) ++ { ++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), ++ NOP_EXPR, gimple_assign_lhs (new_stmt)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ } ++ lhs = gimple_assign_lhs (new_stmt); ++ ++ if (invert) ++ { ++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), ++ BIT_NOT_EXPR, rhs); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ rhs = gimple_assign_lhs (new_stmt); ++ } ++ ++ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs); ++ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ replace_phi_edge_with_variable (cond_bb, e1, phi, result); ++ ++ /* Note that we optimized this PHI. */ ++ return true; ++} ++ + /* Auxiliary functions to determine the set of memory accesses which + can't trap because they are preceded by accesses to the same memory + portion. We do that for MEM_REFs, so we only need to track +-- +2.27.0.windows.1 + |