diff options
Diffstat (limited to '0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch')
-rw-r--r-- | 0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch b/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch new file mode 100644 index 0000000..e8a58aa --- /dev/null +++ b/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch @@ -0,0 +1,105 @@ +From 315911bd3ae6f42366779e262ab76d9ed79359a0 Mon Sep 17 00:00:00 2001 +From: zhongyunde <zhongyunde@huawei.com> +Date: Fri, 11 Nov 2022 11:30:37 +0800 +Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh + + Merge the high part of series instructions into umulh + + gcc/ + * match.pd: Add simplifcations for high part of umulh + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4 +--- + gcc/match.pd | 56 ++++++++++++++++++++++++++ + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index f6c5befd7..433682afb 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP4 to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit ++ integers to one 128-bit integer. Try to match the high part of mul pattern ++ after the low part of mul pattern is simplified. The following scenario ++ should be matched: ++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2 ++ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3 ++ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2 ++ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3 ++ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6 ++ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8 ++ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7)) ++ addc32 = Addc << 32; -- lshift@10 @9 @3 ++ ResLo = In0(D) * In1(D); -- mult @0 @1 ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH; ++ } */ ++(simplify ++ (plus:c ++ (plus:c ++ (convert ++ (gt (lshift@10 @9 @3) ++ (mult:c @0 @1))) ++ (lshift ++ (convert ++ (gt @8 @9)) ++ @3)) ++ (plus:c@11 ++ (rshift ++ (plus:c@9 ++ (mult:c (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) ++ @3) ++ (mult:c (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)) ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) ++ (convert:i128_type @1)) ++ { shift; }))) ++ ) ++) ++#endif ++ + #if GIMPLE + /* These patterns are mostly used by FORWPROP1 to fold some operations into more + simple IR. The following scenario should be matched: +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index 2a3b74604..f61cf5e6f 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1, + retHi = m11; + } + +-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */ +-- +2.27.0.windows.1 + |