1 files changed, 252 insertions, 0 deletions
diff --git a/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch b/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
new file mode 100644
index 0000000..9b2be00
--- /dev/null
+++ b/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
@@ -0,0 +1,252 @@
+From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Tue, 24 Jan 2023 16:43:40 +0300
+Subject: [PATCH 04/18] Add option to allow matching uaddsub overflow for widen
+ ops too.
+
+---
+ gcc/common.opt                 |   5 ++
+ gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
+ gcc/tree-ssa-math-opts.cc      |  43 ++++++++--
+ 3 files changed, 184 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index dac477c04..39c90604e 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3106,6 +3106,11 @@ freciprocal-math
+ Common Var(flag_reciprocal_math) SetByCombined Optimization
+ Same as -fassociative-math for expressions which include division.
+ 
++fuaddsub-overflow-match-all
++Common Var(flag_uaddsub_overflow_match_all)
++Match unsigned add/sub overflow even if the target does not support
++the corresponding instruction.
++
+ ; Nonzero means that unsafe floating-point math optimizations are allowed
+ ; for the sake of speed.  IEEE compliance is not guaranteed, and operations
+ ; are allowed to assume that their arguments and results are "normal"
+diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
+new file mode 100644
+index 000000000..96c26d308
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/uaddsub.c
+@@ -0,0 +1,143 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
++#include <stdint.h>
++
++typedef unsigned __int128 uint128_t;
++typedef struct uint256_t
++{
++  uint128_t lo;
++  uint128_t hi;
++} uint256_t;
++
++uint16_t add16 (uint8_t a, uint8_t b)
++{
++  uint8_t tmp = a + b;
++  uint8_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint16_t res = overflow;
++  res <<= 8;
++  res += tmp;
++  return res;
++}
++
++uint32_t add32 (uint16_t a, uint16_t b)
++{
++  uint16_t tmp = a + b;
++  uint16_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint32_t res = overflow;
++  res <<= 16;
++  res += tmp;
++  return res;
++}
++
++uint64_t add64 (uint32_t a, uint32_t b)
++{
++  uint32_t tmp = a + b;
++  uint32_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint64_t res = overflow;
++  res <<= 32;
++  res += tmp;
++  return res;
++}
++
++uint128_t add128 (uint64_t a, uint64_t b)
++{
++  uint64_t tmp = a + b;
++  uint64_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint128_t res = overflow;
++  res <<= 64;
++  res += tmp;
++  return res;
++}
++
++uint256_t add256 (uint128_t a, uint128_t b)
++{
++  uint128_t tmp = a + b;
++  uint128_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint256_t res;
++  res.hi = overflow;
++  res.lo = tmp;
++  return res;
++}
++
++uint16_t sub16 (uint8_t a, uint8_t b)
++{
++  uint8_t tmp = a - b;
++  uint8_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint16_t res = overflow;
++  res <<= 8;
++  res += tmp;
++  return res;
++}
++
++uint32_t sub32 (uint16_t a, uint16_t b)
++{
++  uint16_t tmp = a - b;
++  uint16_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint32_t res = overflow;
++  res <<= 16;
++  res += tmp;
++  return res;
++}
++
++uint64_t sub64 (uint32_t a, uint32_t b)
++{
++  uint32_t tmp = a - b;
++  uint32_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint64_t res = overflow;
++  res <<= 32;
++  res += tmp;
++  return res;
++}
++
++uint128_t sub128 (uint64_t a, uint64_t b)
++{
++  uint64_t tmp = a - b;
++  uint64_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint128_t res = overflow;
++  res <<= 64;
++  res += tmp;
++  return res;
++}
++
++uint256_t sub256 (uint128_t a, uint128_t b)
++{
++  uint128_t tmp = a - b;
++  uint128_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint256_t res;
++  res.hi = overflow;
++  res.lo = tmp;
++  return res;
++}
++
++/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 232e903b0..55d6ee8ae 100644
+--- a/gcc/tree-ssa-math-opts.cc
++++ b/gcc/tree-ssa-math-opts.cc
+@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
+     }
+ }
+ 
++/* Check if the corresponding operation has wider equivalent on the target.  */
++
++static bool
++wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
++{
++  machine_mode wider_mode;
++  FOR_EACH_WIDER_MODE (wider_mode, mode)
++    {
++      machine_mode next_mode;
++      if (optab_handler (op, wider_mode) != CODE_FOR_nothing
++	  || (op == smul_optab
++	      && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
++	      && (find_widening_optab_handler ((unsignedp
++						? umul_widen_optab
++						: smul_widen_optab),
++						next_mode, mode))))
++	return true;
++    }
++
++  return false;
++}
+ 
+ /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
+    a check for non-zero like:
+@@ -3903,15 +3924,22 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+ 		       || code == MINUS_EXPR
+ 		       || code == MULT_EXPR
+ 		       || code == BIT_NOT_EXPR);
++  int unsignedp = TYPE_UNSIGNED (type);
+   if (!INTEGRAL_TYPE_P (type)
+-      || !TYPE_UNSIGNED (type)
+-      || has_zero_uses (lhs)
+-      || (code != PLUS_EXPR
+-	  && code != MULT_EXPR
+-	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
+-			    TYPE_MODE (type)) == CODE_FOR_nothing))
++      || !unsignedp
++      || has_zero_uses (lhs))
+     return false;
+ 
++  if (code == PLUS_EXPR || code == MINUS_EXPR)
++    {
++      machine_mode mode = TYPE_MODE (type);
++      optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab;
++      if (optab_handler (op, mode) == CODE_FOR_nothing
++	  && (!flag_uaddsub_overflow_match_all
++	      || !wider_optab_check_p (op, mode, unsignedp)))
++	return false;
++    }
++
+   tree rhs1 = gimple_assign_rhs1 (stmt);
+   tree rhs2 = gimple_assign_rhs2 (stmt);
+   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
+@@ -3986,7 +4014,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
+       || (code == PLUS_EXPR
+ 	  && optab_handler (uaddv4_optab,
+-			    TYPE_MODE (type)) == CODE_FOR_nothing)
++			    TYPE_MODE (type)) == CODE_FOR_nothing
++	  && !flag_uaddsub_overflow_match_all)
+       || (code == MULT_EXPR
+ 	  && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
+ 			    TYPE_MODE (type)) == CODE_FOR_nothing))
+-- 
+2.33.0
+