summaryrefslogtreecommitdiff
path: root/0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch
blob: cac0b3947d8cdcec182185340d8fdeecff3e19d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
From 4e536dbb4a08925cea259be13962969efcc0f3c1 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Fri, 11 Nov 2022 11:30:37 +0800
Subject: [PATCH 08/22] [MULL64 3/3] Fold series of instructions into umulh

    Merge the high part of series instructions into umulh

    gcc/
        * match.pd: Add simplifcations for high part of umulh

    gcc/testsuite/
        * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
---
 gcc/match.pd                           | 56 ++++++++++++++++++++++++++
 gcc/testsuite/g++.dg/tree-ssa/mull64.C |  5 ++-
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 2092e6959..b7e3588e8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4301,6 +4301,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 )
 #endif
 
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
+   the if statements. They should be done late because it gives jump threading
+   and few other passes to reduce what is going on.  */
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
+   integers to one 128-bit integer. Try to match the high part of mul pattern
+   after the low part of mul pattern is simplified. The following scenario
+   should be matched:
+  (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
+    In0Lo = In0(D) & 4294967295;        -- bit_and@4 SSA_NAME@0 @2
+    In0Hi = In0(D) >> 32;               -- rshift@5 SSA_NAME@0 @3
+    In1Lo = In1(D) & 4294967295;        -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
+    In1Hi = In1(D) >> 32;               -- rshift@7 SSA_NAME@1 INTEGER_CST@3
+    Mull_01 = In0Hi * In1Lo;            -- mult@8 @5 @6
+    Addc = In0Lo * In1Hi + Mull_01;     -- plus@9 (mult (@4 @7) @8
+    AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
+    addc32 = Addc << 32;                -- lshift@10 @9 @3
+    ResLo = In0(D) * In1(D);            -- mult @0 @1
+    ResHi = ((long unsigned int) (addc32 > ResLo)) +
+	        (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
+ } */
+(simplify
+ (plus:c
+  (plus:c
+   (convert
+    (gt (lshift@10 @9 @3)
+        (mult:c @0 @1)))
+   (lshift
+    (convert
+     (gt @8 @9))
+    @3))
+  (plus:c@11
+   (rshift
+    (plus:c@9
+     (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
+             (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
+    @3)
+   (mult:c (rshift@5 SSA_NAME@0 @3)
+           (rshift@7 SSA_NAME@1 INTEGER_CST@3))
+  )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+      && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+      && TYPE_PRECISION (type) == 64)
+  (with {
+      tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
+      tree shift = build_int_cst (integer_type_node, 64);
+   }
+  (convert:type (rshift
+	         (mult (convert:i128_type @0)
+                       (convert:i128_type @1))
+                 { shift; })))
+ )
+)
+#endif
+
 #if GIMPLE
 /* These patterns are mostly used by FORWPROP1 to fold some operations into more
    simple IR. The following scenario should be matched:
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
index 2a3b74604..f61cf5e6f 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
 
 #  define BN_BITS4        32
 #  define BN_MASK2        (0xffffffffffffffffL)
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
     retHi  = m11;
 }
 
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
-- 
2.33.0