diff options
Diffstat (limited to '0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch')
-rw-r--r-- | 0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch b/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch new file mode 100644 index 0000000..db7b4b2 --- /dev/null +++ b/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch @@ -0,0 +1,212 @@ +From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001 +From: Roger Sayle <roger@nextmovesoftware.com> +Date: Mon, 2 Aug 2021 13:27:53 +0100 +Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f + +Many thanks again to Jakub Jelinek for a speedy fix for PR 101642. +Interestingly, that test case "bswap16(x) ? : x" also reveals a +missed optimization opportunity. The resulting "x ? bswap(x) : 0" +can be further simplified to just bswap(x). + +Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the +related "x ? popcount(x) : 0", so this patch simply makes that +transformation make general, additionally handling bswap, parity, +ffs and clrsb. All of the required infrastructure is already +present thanks to Jakub previously adding support for clz/ctz. +To reflect this generalization, the name of the function is changed +from cond_removal_in_popcount_clz_ctz_pattern to the hopefully +equally descriptive cond_removal_in_builtin_zero_pattern. + +2021-08-02 Roger Sayle <roger@nextmovesoftware.com> + +gcc/ChangeLog + * tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern): + Renamed from cond_removal_in_popcount_clz_ctz_pattern. + Add support for BSWAP, FFS, PARITY and CLRSB builtins. + (tree_ssa_phiop_worker): Update call to function above. + +gcc/testsuite/ChangeLog + * gcc.dg/tree-ssa/phi-opt-25.c: New test case. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 37 +++++++--- + 2 files changed, 109 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c +new file mode 100644 +index 000000000..c52c92e1d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c +@@ -0,0 +1,83 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++unsigned short test_bswap16(unsigned short x) ++{ ++ return x ? __builtin_bswap16(x) : 0; ++} ++ ++unsigned int test_bswap32(unsigned int x) ++{ ++ return x ? __builtin_bswap32(x) : 0; ++} ++ ++unsigned long long test_bswap64(unsigned long long x) ++{ ++ return x ? __builtin_bswap64(x) : 0; ++} ++ ++int test_clrsb(int x) ++{ ++ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1); ++} ++ ++int test_clrsbl(long x) ++{ ++ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1); ++} ++ ++int test_clrsbll(long long x) ++{ ++ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1); ++} ++ ++#if 0 ++/* BUILT_IN_FFS is transformed by match.pd */ ++int test_ffs(unsigned int x) ++{ ++ return x ? __builtin_ffs(x) : 0; ++} ++ ++int test_ffsl(unsigned long x) ++{ ++ return x ? __builtin_ffsl(x) : 0; ++} ++ ++int test_ffsll(unsigned long long x) ++{ ++ return x ? __builtin_ffsll(x) : 0; ++} ++#endif ++ ++int test_parity(int x) ++{ ++ return x ? __builtin_parity(x) : 0; ++} ++ ++int test_parityl(long x) ++{ ++ return x ? __builtin_parityl(x) : 0; ++} ++ ++int test_parityll(long long x) ++{ ++ return x ? __builtin_parityll(x) : 0; ++} ++ ++int test_popcount(int x) ++{ ++ return x ? __builtin_popcount(x) : 0; ++} ++ ++int test_popcountl(long x) ++{ ++ return x ? __builtin_popcountl(x) : 0; ++} ++ ++int test_popcountll(long long x) ++{ ++ return x ? __builtin_popcountll(x) : 0; ++} ++ ++/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */ ++ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 045a7b1b8..21ac08145 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); +-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +- edge, edge, gphi *, +- tree, tree); ++static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block, ++ edge, edge, gphi *, ++ tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set<tree> *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); +@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, +- e2, phi, arg0, +- arg1)) ++ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2, ++ phi, arg0, arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* Convert ++/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0). ++ Convert + + <bb 2> + if (b_4(D) != 0) +@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + instead of 0 above it uses the value from that macro. */ + + static bool +-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, +- basic_block middle_bb, +- edge e1, edge e2, gphi *phi, +- tree arg0, tree arg1) ++cond_removal_in_builtin_zero_pattern (basic_block cond_bb, ++ basic_block middle_bb, ++ edge e1, edge e2, gphi *phi, ++ tree arg0, tree arg1) + { + gimple *cond; + gimple_stmt_iterator gsi, gsi_from; +@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + int val = 0; + switch (cfn) + { ++ case CFN_BUILT_IN_BSWAP16: ++ case CFN_BUILT_IN_BSWAP32: ++ case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: ++ CASE_CFN_FFS: ++ CASE_CFN_PARITY: + CASE_CFN_POPCOUNT: + break; + CASE_CFN_CLZ: +@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + } + } + return false; ++ case BUILT_IN_CLRSB: ++ val = TYPE_PRECISION (integer_type_node) - 1; ++ break; ++ case BUILT_IN_CLRSBL: ++ val = TYPE_PRECISION (long_integer_type_node) - 1; ++ break; ++ case BUILT_IN_CLRSBLL: ++ val = TYPE_PRECISION (long_long_integer_type_node) - 1; ++ break; + default: + return false; + } +-- +2.27.0.windows.1 + |