summaryrefslogtreecommitdiff
path: root/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
diff options
context:
space:
mode:
Diffstat (limited to '0074-FORWPROP-Fold-series-of-instructions-into-mul.patch')
-rw-r--r--0074-FORWPROP-Fold-series-of-instructions-into-mul.patch130
1 files changed, 130 insertions, 0 deletions
diff --git a/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch b/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
new file mode 100644
index 0000000..d864621
--- /dev/null
+++ b/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
@@ -0,0 +1,130 @@
+From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001
+From: zhongyunde <zhongyunde@huawei.com>
+Date: Wed, 9 Nov 2022 17:04:13 +0800
+Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul
+
+ Merge the low part of series instructions into mul
+
+ gcc/
+ * match.pd: Add simplifcations for low part of mul
+ * common.opt: Add new option fmerge-mull enable with -O2
+ * opts.c: default_options_table
+
+ gcc/testsuite/
+ * g++.dg/tree-ssa/mull64.C: New test.
+---
+ gcc/common.opt | 4 +++
+ gcc/match.pd | 27 ++++++++++++++++++++
+ gcc/opts.c | 1 +
+ gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
+ 4 files changed, 66 insertions(+)
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index ad147f7a9..6a7f66624 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2069,6 +2069,10 @@ fmerge-debug-strings
+ Common Report Var(flag_merge_debug_strings) Init(1)
+ Attempt to merge identical debug strings across compilation units.
+
++fmerge-mull
++Common Report Var(flag_merge_mull) Init(0) Optimization
++Attempt to merge series instructions into mul.
++
+ fmessage-length=
+ Common RejectNegative Joined UInteger
+ -fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 5c5b5f89e..f6c5befd7 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ #endif
+
++#if GIMPLE
++/* These patterns are mostly used by FORWPROP1 to fold some operations into more
++ simple IR. The following scenario should be matched:
++ In0Lo = In0(D) & 4294967295;
++ In0Hi = In0(D) >> 32;
++ In1Lo = In1(D) & 4294967295;
++ In1Hi = In1(D) >> 32;
++ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
++ addc32 = Addc << 32;
++ ResLo = In0Lo * In1Lo + addc32 */
++(simplify
++ (plus:c (mult @4 @5)
++ (lshift
++ (plus:c
++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
++ INTEGER_CST@3
++ )
++ )
++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
++ && TYPE_PRECISION (type) == 64)
++ (mult (convert:type @0) (convert:type @1))
++ )
++)
++#endif
++
+ /* Simplification moved from fold_cond_expr_with_comparison. It may also
+ be extended. */
+ /* This pattern implements two kinds simplification:
+diff --git a/gcc/opts.c b/gcc/opts.c
+index f12b13599..751965e46 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] =
+ { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
+ { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
+
+ /* -O2 and above optimizations, but not -Os or -Og. */
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+new file mode 100644
+index 000000000..2a3b74604
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
++
++# define BN_BITS4 32
++# define BN_MASK2 (0xffffffffffffffffL)
++# define BN_MASK2l (0xffffffffL)
++# define BN_MASK2h (0xffffffff00000000L)
++# define BN_MASK2h1 (0xffffffff80000000L)
++# define LBITS(a) ((a)&BN_MASK2l)
++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
++# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
++
++void mul64(unsigned long in0, unsigned long in1,
++ unsigned long &retLo, unsigned long &retHi) {
++ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
++ unsigned long Addc, addc32, low;
++ al = LBITS(in0);
++ ah = HBITS(in0);
++ bl = LBITS(in1);
++ bh = HBITS(in1);
++ m10 = bh * al;
++ m00 = bl * al;
++ m01 = bl * ah;
++ m11 = bh * ah;
++ Addc = (m10 + m01) & BN_MASK2;
++ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
++ m11 += HBITS(Addc);
++ addc32 = L2HBITS(Addc);
++ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
++ retLo = low;
++ retHi = m11;
++}
++
++/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
+--
+2.27.0.windows.1
+