diff options
Diffstat (limited to '0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch')
-rw-r--r-- | 0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch b/0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch new file mode 100644 index 0000000..efdd318 --- /dev/null +++ b/0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch @@ -0,0 +1,130 @@ +From 547ab9b3e073ef389e5fd89d961bb1e3e6934ae9 Mon Sep 17 00:00:00 2001 +From: zhongyunde <zhongyunde@huawei.com> +Date: Wed, 9 Nov 2022 17:04:13 +0800 +Subject: [PATCH 07/22] [MULL64 2/3] Fold series of instructions into mul + + Merge the low part of series instructions into mul + + gcc/ + * match.pd: Add simplifcations for low part of mul + * common.opt: Add new option fmerge-mull enable with -O2 + * opts.c: default_options_table + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: New test. +--- + gcc/common.opt | 4 +++ + gcc/match.pd | 27 ++++++++++++++++++++ + gcc/opts.cc | 1 + + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ + 4 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C + +diff --git a/gcc/common.opt b/gcc/common.opt +index 8a0dafc52..e365a48bc 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2126,6 +2126,10 @@ fmerge-debug-strings + Common Var(flag_merge_debug_strings) Init(1) + Attempt to merge identical debug strings across compilation units. + ++fmerge-mull ++Common Var(flag_merge_mull) Init(0) Optimization ++Attempt to merge series instructions into mul. ++ + fmessage-length= + Common RejectNegative Joined UInteger + -fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping. +diff --git a/gcc/match.pd b/gcc/match.pd +index fd0857fc9..2092e6959 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP1 to fold some operations into more ++ simple IR. The following scenario should be matched: ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 */ ++(simplify ++ (plus:c (mult @4 @5) ++ (lshift ++ (plus:c ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) ++ INTEGER_CST@3 ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (mult (convert:type @0) (convert:type @1)) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/opts.cc b/gcc/opts.cc +index a97630d1c..eae71ed20 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -647,6 +647,7 @@ static const struct default_options default_options_table[] = + VECT_COST_MODEL_VERY_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, ++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +new file mode 100644 +index 000000000..2a3b74604 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) ++ ++void mul64(unsigned long in0, unsigned long in1, ++ unsigned long &retLo, unsigned long &retHi) { ++ unsigned long m00, m01, m10, m11, al, ah, bl, bh; ++ unsigned long Addc, addc32, low; ++ al = LBITS(in0); ++ ah = HBITS(in0); ++ bl = LBITS(in1); ++ bh = HBITS(in1); ++ m10 = bh * al; ++ m00 = bl * al; ++ m01 = bl * ah; ++ m11 = bh * ah; ++ Addc = (m10 + m01) & BN_MASK2; ++ if (Addc < m01) m11 += L2HBITS((unsigned long)1); ++ m11 += HBITS(Addc); ++ addc32 = L2HBITS(Addc); ++ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++; ++ retLo = low; ++ retHi = m11; ++} ++ ++/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ +-- +2.33.0 + |