diff options
Diffstat (limited to '0074-FORWPROP-Fold-series-of-instructions-into-mul.patch')
-rw-r--r-- | 0074-FORWPROP-Fold-series-of-instructions-into-mul.patch | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch b/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch new file mode 100644 index 0000000..d864621 --- /dev/null +++ b/0074-FORWPROP-Fold-series-of-instructions-into-mul.patch @@ -0,0 +1,130 @@ +From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001 +From: zhongyunde <zhongyunde@huawei.com> +Date: Wed, 9 Nov 2022 17:04:13 +0800 +Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul + + Merge the low part of series instructions into mul + + gcc/ + * match.pd: Add simplifcations for low part of mul + * common.opt: Add new option fmerge-mull enable with -O2 + * opts.c: default_options_table + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: New test. +--- + gcc/common.opt | 4 +++ + gcc/match.pd | 27 ++++++++++++++++++++ + gcc/opts.c | 1 + + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ + 4 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C + +diff --git a/gcc/common.opt b/gcc/common.opt +index ad147f7a9..6a7f66624 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2069,6 +2069,10 @@ fmerge-debug-strings + Common Report Var(flag_merge_debug_strings) Init(1) + Attempt to merge identical debug strings across compilation units. + ++fmerge-mull ++Common Report Var(flag_merge_mull) Init(0) Optimization ++Attempt to merge series instructions into mul. ++ + fmessage-length= + Common RejectNegative Joined UInteger + -fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping. +diff --git a/gcc/match.pd b/gcc/match.pd +index 5c5b5f89e..f6c5befd7 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP1 to fold some operations into more ++ simple IR. The following scenario should be matched: ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 */ ++(simplify ++ (plus:c (mult @4 @5) ++ (lshift ++ (plus:c ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) ++ INTEGER_CST@3 ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (mult (convert:type @0) (convert:type @1)) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/opts.c b/gcc/opts.c +index f12b13599..751965e46 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] = + { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, ++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +new file mode 100644 +index 000000000..2a3b74604 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) ++ ++void mul64(unsigned long in0, unsigned long in1, ++ unsigned long &retLo, unsigned long &retHi) { ++ unsigned long m00, m01, m10, m11, al, ah, bl, bh; ++ unsigned long Addc, addc32, low; ++ al = LBITS(in0); ++ ah = HBITS(in0); ++ bl = LBITS(in1); ++ bh = HBITS(in1); ++ m10 = bh * al; ++ m00 = bl * al; ++ m01 = bl * ah; ++ m11 = bh * ah; ++ Addc = (m10 + m01) & BN_MASK2; ++ if (Addc < m01) m11 += L2HBITS((unsigned long)1); ++ m11 += HBITS(Addc); ++ addc32 = L2HBITS(Addc); ++ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++; ++ retLo = low; ++ retHi = m11; ++} ++ ++/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ +-- +2.27.0.windows.1 + |