1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Wed, 9 Nov 2022 17:04:13 +0800
Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul
Merge the low part of series instructions into mul
gcc/
* match.pd: Add simplifcations for low part of mul
* common.opt: Add new option fmerge-mull enable with -O2
* opts.c: default_options_table
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: New test.
---
gcc/common.opt | 4 +++
gcc/match.pd | 27 ++++++++++++++++++++
gcc/opts.c | 1 +
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
diff --git a/gcc/common.opt b/gcc/common.opt
index ad147f7a9..6a7f66624 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2069,6 +2069,10 @@ fmerge-debug-strings
Common Report Var(flag_merge_debug_strings) Init(1)
Attempt to merge identical debug strings across compilation units.
+fmerge-mull
+Common Report Var(flag_merge_mull) Init(0) Optimization
+Attempt to merge series instructions into mul.
+
fmessage-length=
Common RejectNegative Joined UInteger
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
diff --git a/gcc/match.pd b/gcc/match.pd
index 5c5b5f89e..f6c5befd7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
+ simple IR. The following scenario should be matched:
+ In0Lo = In0(D) & 4294967295;
+ In0Hi = In0(D) >> 32;
+ In1Lo = In1(D) & 4294967295;
+ In1Hi = In1(D) >> 32;
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
+ addc32 = Addc << 32;
+ ResLo = In0Lo * In1Lo + addc32 */
+(simplify
+ (plus:c (mult @4 @5)
+ (lshift
+ (plus:c
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
+ INTEGER_CST@3
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (mult (convert:type @0) (convert:type @1))
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/opts.c b/gcc/opts.c
index f12b13599..751965e46 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
/* -O2 and above optimizations, but not -Os or -Og. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
new file mode 100644
index 000000000..2a3b74604
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+
+# define BN_BITS4 32
+# define BN_MASK2 (0xffffffffffffffffL)
+# define BN_MASK2l (0xffffffffL)
+# define BN_MASK2h (0xffffffff00000000L)
+# define BN_MASK2h1 (0xffffffff80000000L)
+# define LBITS(a) ((a)&BN_MASK2l)
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
+
+void mul64(unsigned long in0, unsigned long in1,
+ unsigned long &retLo, unsigned long &retHi) {
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
+ unsigned long Addc, addc32, low;
+ al = LBITS(in0);
+ ah = HBITS(in0);
+ bl = LBITS(in1);
+ bh = HBITS(in1);
+ m10 = bh * al;
+ m00 = bl * al;
+ m01 = bl * ah;
+ m11 = bh * ah;
+ Addc = (m10 + m01) & BN_MASK2;
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
+ m11 += HBITS(Addc);
+ addc32 = L2HBITS(Addc);
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
+ retLo = low;
+ retHi = m11;
+}
+
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
--
2.27.0.windows.1
|