diff options
Diffstat (limited to '0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch')
-rw-r--r-- | 0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch b/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch new file mode 100644 index 0000000..9b2be00 --- /dev/null +++ b/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch @@ -0,0 +1,252 @@ +From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 <pronin.alexander@huawei.com> +Date: Tue, 24 Jan 2023 16:43:40 +0300 +Subject: [PATCH 04/18] Add option to allow matching uaddsub overflow for widen + ops too. + +--- + gcc/common.opt | 5 ++ + gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++ + gcc/tree-ssa-math-opts.cc | 43 ++++++++-- + 3 files changed, 184 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index dac477c04..39c90604e 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3106,6 +3106,11 @@ freciprocal-math + Common Var(flag_reciprocal_math) SetByCombined Optimization + Same as -fassociative-math for expressions which include division. + ++fuaddsub-overflow-match-all ++Common Var(flag_uaddsub_overflow_match_all) ++Match unsigned add/sub overflow even if the target does not support ++the corresponding instruction. ++ + ; Nonzero means that unsafe floating-point math optimizations are allowed + ; for the sake of speed. IEEE compliance is not guaranteed, and operations + ; are allowed to assume that their arguments and results are "normal" +diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c +new file mode 100644 +index 000000000..96c26d308 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/uaddsub.c +@@ -0,0 +1,143 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */ ++#include <stdint.h> ++ ++typedef unsigned __int128 uint128_t; ++typedef struct uint256_t ++{ ++ uint128_t lo; ++ uint128_t hi; ++} uint256_t; ++ ++uint16_t add16 (uint8_t a, uint8_t b) ++{ ++ uint8_t tmp = a + b; ++ uint8_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint16_t res = overflow; ++ res <<= 8; ++ res += tmp; ++ return res; ++} ++ ++uint32_t add32 (uint16_t a, uint16_t b) ++{ ++ uint16_t tmp = a + b; ++ uint16_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint32_t res = overflow; ++ res <<= 16; ++ res += tmp; ++ return res; ++} ++ ++uint64_t add64 (uint32_t a, uint32_t b) ++{ ++ uint32_t tmp = a + b; ++ uint32_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint64_t res = overflow; ++ res <<= 32; ++ res += tmp; ++ return res; ++} ++ ++uint128_t add128 (uint64_t a, uint64_t b) ++{ ++ uint64_t tmp = a + b; ++ uint64_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint128_t res = overflow; ++ res <<= 64; ++ res += tmp; ++ return res; ++} ++ ++uint256_t add256 (uint128_t a, uint128_t b) ++{ ++ uint128_t tmp = a + b; ++ uint128_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint256_t res; ++ res.hi = overflow; ++ res.lo = tmp; ++ return res; ++} ++ ++uint16_t sub16 (uint8_t a, uint8_t b) ++{ ++ uint8_t tmp = a - b; ++ uint8_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint16_t res = overflow; ++ res <<= 8; ++ res += tmp; ++ return res; ++} ++ ++uint32_t sub32 (uint16_t a, uint16_t b) ++{ ++ uint16_t tmp = a - b; ++ uint16_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint32_t res = overflow; ++ res <<= 16; ++ res += tmp; ++ return res; ++} ++ ++uint64_t sub64 (uint32_t a, uint32_t b) ++{ ++ uint32_t tmp = a - b; ++ uint32_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint64_t res = overflow; ++ res <<= 32; ++ res += tmp; ++ return res; ++} ++ ++uint128_t sub128 (uint64_t a, uint64_t b) ++{ ++ uint64_t tmp = a - b; ++ uint64_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint128_t res = overflow; ++ res <<= 64; ++ res += tmp; ++ return res; ++} ++ ++uint256_t sub256 (uint128_t a, uint128_t b) ++{ ++ uint128_t tmp = a - b; ++ uint128_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint256_t res; ++ res.hi = overflow; ++ res.lo = tmp; ++ return res; ++} ++ ++/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */ +diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc +index 232e903b0..55d6ee8ae 100644 +--- a/gcc/tree-ssa-math-opts.cc ++++ b/gcc/tree-ssa-math-opts.cc +@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, + } + } + ++/* Check if the corresponding operation has wider equivalent on the target. */ ++ ++static bool ++wider_optab_check_p (optab op, machine_mode mode, int unsignedp) ++{ ++ machine_mode wider_mode; ++ FOR_EACH_WIDER_MODE (wider_mode, mode) ++ { ++ machine_mode next_mode; ++ if (optab_handler (op, wider_mode) != CODE_FOR_nothing ++ || (op == smul_optab ++ && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode) ++ && (find_widening_optab_handler ((unsignedp ++ ? umul_widen_optab ++ : smul_widen_optab), ++ next_mode, mode)))) ++ return true; ++ } ++ ++ return false; ++} + + /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have + a check for non-zero like: +@@ -3903,15 +3924,22 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, + || code == MINUS_EXPR + || code == MULT_EXPR + || code == BIT_NOT_EXPR); ++ int unsignedp = TYPE_UNSIGNED (type); + if (!INTEGRAL_TYPE_P (type) +- || !TYPE_UNSIGNED (type) +- || has_zero_uses (lhs) +- || (code != PLUS_EXPR +- && code != MULT_EXPR +- && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab, +- TYPE_MODE (type)) == CODE_FOR_nothing)) ++ || !unsignedp ++ || has_zero_uses (lhs)) + return false; + ++ if (code == PLUS_EXPR || code == MINUS_EXPR) ++ { ++ machine_mode mode = TYPE_MODE (type); ++ optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab; ++ if (optab_handler (op, mode) == CODE_FOR_nothing ++ && (!flag_uaddsub_overflow_match_all ++ || !wider_optab_check_p (op, mode, unsignedp))) ++ return false; ++ } ++ + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) +@@ -3986,7 +4014,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, + || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen)) + || (code == PLUS_EXPR + && optab_handler (uaddv4_optab, +- TYPE_MODE (type)) == CODE_FOR_nothing) ++ TYPE_MODE (type)) == CODE_FOR_nothing ++ && !flag_uaddsub_overflow_match_all) + || (code == MULT_EXPR + && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab, + TYPE_MODE (type)) == CODE_FOR_nothing)) +-- +2.33.0 + |