automatic import of gccopeneuler24.03_LTS_SP1

author: CoprDistGit <infra@openeuler.org> 2025-02-28 10:03:49 +0000
committer: CoprDistGit <infra@openeuler.org> 2025-02-28 10:03:49 +0000
commit: 73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree: 8e28b63e478c43c252f18b49836dff7313affe54 /0044-Port-maxmin-patch-to-GCC-12.patch
parent: 49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)
1 files changed, 378 insertions, 0 deletions
diff --git a/0044-Port-maxmin-patch-to-GCC-12.patch b/0044-Port-maxmin-patch-to-GCC-12.patch
new file mode 100644
index 0000000..2423c12
--- /dev/null
+++ b/0044-Port-maxmin-patch-to-GCC-12.patch
@@ -0,0 +1,378 @@
+From a3013c074cd2ab5f71eb98a587a627f38c68656c Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 17:07:24 +0800
+Subject: [PATCH 12/18] Port maxmin patch to GCC 12
+
+---
+ gcc/config/aarch64/aarch64-simd.md    | 256 ++++++++++++++++++++++++++
+ gcc/config/aarch64/predicates.md      |  19 ++
+ gcc/testsuite/gcc.dg/combine-maxmin.c |  46 +++++
+ 3 files changed, 321 insertions(+)
+ create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 82f73805f..de92802f5 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -1138,6 +1138,82 @@
+   [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
+ )
+ 
++;; Simplify the extension with following truncation for shift+neg operation.
++
++(define_insn_and_split "*aarch64_sshr_neg_v8hi"
++  [(set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (match_operand:V8HI 1 "register_operand")
++		    (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
++	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (match_dup 1)
++		    (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
++	      (match_dup 2)))))]
++  "TARGET_SIMD"
++  "#"
++  "&& true"
++  [(set (match_operand:V8HI 0 "register_operand" "=w")
++	(ashiftrt:V8HI
++	  (neg:V8HI
++	    (match_operand:V8HI 1 "register_operand" "w"))
++	  (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))]
++  {
++    /* Reduce the shift amount to smaller mode.  */
++    int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[2], 0))
++	      - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands[2])) / 2);
++    operands[2] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
++  }
++  [(set_attr "type" "multiple")]
++)
++
++;; The helper definition that allows combiner to use the previous pattern.
++
++(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
++  [(set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(match_operand:V4SI 1 "register_operand" "w"))
++	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(match_operand:V4SI 3 "register_operand" "w"))
++	      (match_dup 2)))))]
++  "TARGET_SIMD"
++  "#"
++  "&& true"
++  [(set (match_operand:V4SI 1 "register_operand" "=w")
++	(ashiftrt:V4SI
++	  (neg:V4SI
++	    (match_dup 1))
++	  (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
++   (set (match_operand:V4SI 3 "register_operand" "=w")
++	(ashiftrt:V4SI
++	  (neg:V4SI
++	    (match_dup 3))
++	  (match_dup 2)))
++   (set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (match_dup 1))
++	  (truncate:V4HI
++	    (match_dup 3))))]
++  ""
++  [(set_attr "type" "multiple")]
++)
++
+ (define_insn "*aarch64_simd_sra<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+ 	(plus:VDQ_I
+@@ -1714,6 +1790,26 @@
+  }
+ )
+ 
++(define_insn "vec_pack_trunc_shifted_<mode>"
++ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
++       (vec_concat:<VNARROWQ2>
++	 (truncate:<VNARROWQ>
++	   (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
++	      (match_operand:VQN 2 "half_size_operand" "w")))
++	 (truncate:<VNARROWQ>
++	   (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
++	      (match_operand:VQN 4 "half_size_operand" "w")))))]
++ "TARGET_SIMD"
++ {
++   if (BYTES_BIG_ENDIAN)
++     return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
++   else
++     return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
++ }
++  [(set_attr "type" "neon_permute<q>")
++   (set_attr "length" "4")]
++)
++
+ (define_insn "aarch64_shrn<mode>_insn_le"
+   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ 	(vec_concat:<VNARROWQ2>
+@@ -6652,6 +6748,166 @@
+   [(set_attr "type" "neon_tst<q>")]
+ )
+ 
++;; Simplify the extension with following truncation for cmtst-like operation.
++
++(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
++  [(set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (plus:V4HI
++	    (truncate:V4HI
++	      (eq:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (and:V8HI
++		      (match_operand:V8HI 1 "register_operand")
++		      (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++		    (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
++		(match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
++	    (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
++	  (plus:V4HI
++	    (truncate:V4HI
++	      (eq:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (and:V8HI
++		      (match_dup 1)
++		      (match_dup 2))
++		    (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
++		(match_dup 4)))
++	    (match_dup 5))))]
++  "TARGET_SIMD && !reload_completed"
++  "#"
++  "&& true"
++  [(set (match_operand:V8HI 6 "register_operand" "=w")
++	(match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++   (set (match_operand:V8HI 0 "register_operand" "=w")
++	(plus:V8HI
++	  (eq:V8HI
++	    (and:V8HI
++	      (match_operand:V8HI 1 "register_operand" "w")
++	      (match_dup 6))
++	    (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
++	  (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))]
++  {
++    if (can_create_pseudo_p ())
++      {
++	int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[4], 0));
++	operands[4] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
++	int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[5], 0));
++	operands[5] = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
++
++	operands[6] = gen_reg_rtx (V8HImode);
++      }
++    else
++      FAIL;
++  }
++  [(set_attr "type" "neon_tst_q")]
++)
++
++;; Three helper definitions that allow combiner to use the previous pattern.
++
++(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
++  [(set (match_operand:V4SI 0 "register_operand" "=w")
++	(neg:V4SI
++	  (eq:V4SI
++	    (sign_extend:V4SI
++	      (vec_select:V4HI
++		(and:V8HI
++		  (match_operand:V8HI 1 "register_operand")
++		  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++		(match_operand:V8HI 3 "vect_par_cnst_lo_half")))
++	    (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
++  "TARGET_SIMD && !reload_completed"
++  "#"
++  "&& true"
++  [(set (match_operand:V8HI 5 "register_operand" "=w")
++	(and:V8HI
++	  (match_operand:V8HI 1 "register_operand")
++	  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
++   (set (match_operand:V4SI 0 "register_operand" "=w")
++	(sign_extend:V4SI
++	  (vec_select:V4HI
++	    (match_dup 5)
++	    (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
++   (set (match_dup 0)
++	(neg:V4SI
++	  (eq:V4SI
++	    (match_dup 0)
++	    (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
++  {
++    if (can_create_pseudo_p ())
++      operands[5] = gen_reg_rtx (V8HImode);
++    else
++      FAIL;
++  }
++  [(set_attr "type" "multiple")]
++)
++
++(define_insn_and_split "*aarch64_cmtst_arith_tmp_hi_v8hi"
++  [(set (match_operand:V4SI 0 "register_operand" "=w")
++	  (neg:V4SI
++	    (eq:V4SI
++	      (sign_extend:V4SI
++		(vec_select:V4HI
++		  (and:V8HI
++		    (match_operand:V8HI 1 "register_operand")
++		    (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++		  (match_operand:V8HI 3 "vect_par_cnst_hi_half")))
++	      (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
++  "TARGET_SIMD && !reload_completed"
++  "#"
++  "&& true"
++  [(set (match_operand:V8HI 5 "register_operand" "=w")
++	(and:V8HI
++	  (match_operand:V8HI 1 "register_operand")
++	  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
++   (set (match_operand:V4SI 0 "register_operand" "=w")
++	(sign_extend:V4SI
++	  (vec_select:V4HI
++	    (match_dup 5)
++	    (match_operand:V8HI 3 "vect_par_cnst_hi_half"))))
++   (set (match_dup 0)
++	  (neg:V4SI
++	    (eq:V4SI
++	      (match_dup 0)
++	      (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
++  {
++    if (can_create_pseudo_p ())
++      operands[5] = gen_reg_rtx (V8HImode);
++    else
++      FAIL;
++  }
++  [(set_attr "type" "multiple")]
++)
++
++(define_insn_and_split "*aarch64_cmtst_arith_tmpv8hi"
++  [(set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (not:V4SI
++	      (match_operand:V4SI 1 "register_operand" "w")))
++	  (truncate:V4HI
++	    (not:V4SI
++	      (match_operand:V4SI 2 "register_operand" "w")))))]
++  "TARGET_SIMD"
++  "#"
++  "&& true"
++  [(set (match_operand:V4SI 1 "register_operand" "=w")
++	(not:V4SI
++	  (match_dup 1)))
++   (set (match_operand:V4SI 2 "register_operand" "=w")
++	(not:V4SI
++	  (match_dup 2)))
++   (set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (match_dup 1))
++	  (truncate:V4HI
++	    (match_dup 2))))]
++  ""
++  [(set_attr "type" "multiple")]
++)
++
+ (define_insn_and_split "aarch64_cmtstdi"
+   [(set (match_operand:DI 0 "register_operand" "=w,r")
+ 	(neg:DI
+diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
+index 07c14aacb..1b8496c07 100644
+--- a/gcc/config/aarch64/predicates.md
++++ b/gcc/config/aarch64/predicates.md
+@@ -118,6 +118,25 @@
+ 	     (match_test "aarch64_simd_valid_immediate (op, NULL,
+ 							AARCH64_CHECK_ORR)"))))
+ 
++(define_predicate "aarch64_bic_imm_for_maxmin"
++   (match_code "const_vector")
++{
++  if (!aarch64_simd_valid_immediate (op, NULL, AARCH64_CHECK_BIC))
++    return false;
++  op = unwrap_const_vec_duplicate (op);
++  unsigned int size = GET_MODE_UNIT_BITSIZE (mode);
++  return CONST_INT_P (op)
++	 && ((~UINTVAL (op)) < (((long unsigned int) 1 << size) - 1));
++})
++
++(define_predicate "maxmin_arith_shift_operand"
++   (match_code "const_vector")
++{
++  op = unwrap_const_vec_duplicate (op);
++  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) - 1;
++  return CONST_INT_P (op) && (UINTVAL (op) == size);
++})
++
+ (define_predicate "aarch64_reg_or_bic_imm"
+    (ior (match_operand 0 "register_operand")
+ 	(and (match_code "const_vector")
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
+new file mode 100755
+index 000000000..06bce7029
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
+@@ -0,0 +1,46 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-options "-O3 -fdump-rtl-combine-all" } */
++
++/* The test checks usage of smax/smin insns for clip evaluation and
++ * uzp1/uzp2 insns for vector element narrowing.  It's inspired by
++ * sources of x264 codec.  */
++
++typedef unsigned char uint8_t;
++typedef long int intptr_t;
++typedef signed short int int16_t;
++
++static __attribute__((always_inline)) inline uint8_t clip (int x )
++{
++    return ( (x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x );
++}
++
++void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
++	 intptr_t stride, int width, int height, int16_t *buf)
++{
++    const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
++    for( int y = 0; y < height; y++ ) {
++        for( int x = -2; x < width+3; x++ ) {
++            int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
++		     + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
++            dstv[x] = clip ( (v + 16) >> 5 );
++            buf[x+2] = v + pad;
++        }
++        for( int x = 0; x < width; x++ )
++            dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
++			      + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
++			     - 32*pad + 512) >> 10);
++        for( int x = 0; x < width; x++ )
++            dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
++			      + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
++			     + 16) >> 5);
++        dsth += stride;
++        dstv += stride;
++        dstc += stride;
++        src += stride;
++    }
++}
++
++/* { dg-final { scan-assembler-times {smax\t} 4 } }  */
++/* { dg-final { scan-assembler-times {smin\t} 4 } }  */
++/* { dg-final { scan-assembler-times {cmtst\t} 2 } }  */
++/* { dg-final { scan-assembler-times {uzp1\t} 6 } }  */
+-- 
+2.33.0
+
author	CoprDistGit <infra@openeuler.org>	2025-02-28 10:03:49 +0000
committer	CoprDistGit <infra@openeuler.org>	2025-02-28 10:03:49 +0000
commit	73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree	8e28b63e478c43c252f18b49836dff7313affe54 /0044-Port-maxmin-patch-to-GCC-12.patch
parent	49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)