summaryrefslogtreecommitdiff
path: root/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
diff options
context:
space:
mode:
Diffstat (limited to '0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch')
-rw-r--r--0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch105
1 files changed, 105 insertions, 0 deletions
diff --git a/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch b/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
new file mode 100644
index 0000000..e8a58aa
--- /dev/null
+++ b/0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
@@ -0,0 +1,105 @@
+From 315911bd3ae6f42366779e262ab76d9ed79359a0 Mon Sep 17 00:00:00 2001
+From: zhongyunde <zhongyunde@huawei.com>
+Date: Fri, 11 Nov 2022 11:30:37 +0800
+Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh
+
+ Merge the high part of series instructions into umulh
+
+ gcc/
+ * match.pd: Add simplifcations for high part of umulh
+
+ gcc/testsuite/
+ * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
+---
+ gcc/match.pd | 56 ++++++++++++++++++++++++++
+ gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
+ 2 files changed, 59 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index f6c5befd7..433682afb 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ #endif
+
++#if GIMPLE
++/* These patterns are mostly used by FORWPROP4 to move some operations outside of
++ the if statements. They should be done late because it gives jump threading
++ and few other passes to reduce what is going on. */
++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
++ integers to one 128-bit integer. Try to match the high part of mul pattern
++ after the low part of mul pattern is simplified. The following scenario
++ should be matched:
++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
++ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
++ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
++ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
++ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
++ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
++ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
++ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
++ addc32 = Addc << 32; -- lshift@10 @9 @3
++ ResLo = In0(D) * In1(D); -- mult @0 @1
++ ResHi = ((long unsigned int) (addc32 > ResLo)) +
++ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
++ } */
++(simplify
++ (plus:c
++ (plus:c
++ (convert
++ (gt (lshift@10 @9 @3)
++ (mult:c @0 @1)))
++ (lshift
++ (convert
++ (gt @8 @9))
++ @3))
++ (plus:c@11
++ (rshift
++ (plus:c@9
++ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
++ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
++ @3)
++ (mult:c (rshift@5 SSA_NAME@0 @3)
++ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
++ )
++ )
++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
++ && TYPE_PRECISION (type) == 64)
++ (with {
++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
++ tree shift = build_int_cst (integer_type_node, 64);
++ }
++ (convert:type (rshift
++ (mult (convert:i128_type @0)
++ (convert:i128_type @1))
++ { shift; })))
++ )
++)
++#endif
++
+ #if GIMPLE
+ /* These patterns are mostly used by FORWPROP1 to fold some operations into more
+ simple IR. The following scenario should be matched:
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+index 2a3b74604..f61cf5e6f 100644
+--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
+
+ # define BN_BITS4 32
+ # define BN_MASK2 (0xffffffffffffffffL)
+@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
+ retHi = m11;
+ }
+
+-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
+--
+2.27.0.windows.1
+