summaryrefslogtreecommitdiff
path: root/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
diff options
context:
space:
mode:
Diffstat (limited to '0009-Backport-expand-Simplify-removing-subregs-when-expan.patch')
-rw-r--r--0009-Backport-expand-Simplify-removing-subregs-when-expan.patch141
1 files changed, 141 insertions, 0 deletions
diff --git a/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
new file mode 100644
index 0000000..434c669
--- /dev/null
+++ b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
@@ -0,0 +1,141 @@
+From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001
+From: zhanghaijian <z.zhanghaijian@huawei.com>
+Date: Thu, 15 Jul 2021 09:04:55 +0800
+Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when
+ expanding a copy [PR95254]
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
+
+In rtl expand, if we have a copy that matches one of the following patterns:
+ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
+ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (constant C))
+where mode M1 is equal in size to M2, try to detect whether the mode change
+involves an implicit round trip through memory. If so, see if we can avoid
+that by removing the subregs and doing the move in mode M2 instead.
+
+diff --git a/gcc/expr.c b/gcc/expr.c
+index 991b26f3341..d66fdd4e93d 100644
+--- a/gcc/expr.c
++++ b/gcc/expr.c
+@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y)
+ gcc_assert (mode != BLKmode
+ && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+
++ /* If we have a copy that looks like one of the following patterns:
++ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
++ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
++ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
++ (set (subreg:M1 (reg:M2 ...)) (constant C))
++ where mode M1 is equal in size to M2, try to detect whether the
++ mode change involves an implicit round trip through memory.
++ If so, see if we can avoid that by removing the subregs and
++ doing the move in mode M2 instead. */
++
++ rtx x_inner = NULL_RTX;
++ rtx y_inner = NULL_RTX;
++
++#define CANDIDATE_SUBREG_P(subreg) \
++ (REG_P (SUBREG_REG (subreg)) \
++ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
++ GET_MODE_SIZE (GET_MODE (subreg))) \
++ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
++ != CODE_FOR_nothing)
++
++#define CANDIDATE_MEM_P(innermode, mem) \
++ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
++ && !push_operand ((mem), GET_MODE (mem)) \
++ /* Not a candiate if innermode requires too much alignment. */ \
++ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \
++ || targetm.slow_unaligned_access (GET_MODE (mem), \
++ MEM_ALIGN (mem)) \
++ || !targetm.slow_unaligned_access ((innermode), \
++ MEM_ALIGN (mem))))
++
++ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
++ x_inner = SUBREG_REG (x);
++
++ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
++ y_inner = SUBREG_REG (y);
++
++ if (x_inner != NULL_RTX
++ && y_inner != NULL_RTX
++ && GET_MODE (x_inner) == GET_MODE (y_inner)
++ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
++ {
++ x = x_inner;
++ y = y_inner;
++ mode = GET_MODE (x_inner);
++ }
++ else if (x_inner != NULL_RTX
++ && MEM_P (y)
++ && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
++ {
++ x = x_inner;
++ y = adjust_address (y, GET_MODE (x_inner), 0);
++ mode = GET_MODE (x_inner);
++ }
++ else if (y_inner != NULL_RTX
++ && MEM_P (x)
++ && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
++ {
++ x = adjust_address (x, GET_MODE (y_inner), 0);
++ y = y_inner;
++ mode = GET_MODE (y_inner);
++ }
++ else if (x_inner != NULL_RTX
++ && CONSTANT_P (y)
++ && !targetm.can_change_mode_class (GET_MODE (x_inner),
++ mode, ALL_REGS)
++ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
++ {
++ x = x_inner;
++ y = y_inner;
++ mode = GET_MODE (x_inner);
++ }
++
+ if (CONSTANT_P (y))
+ {
+ if (optimize
+diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
+new file mode 100644
+index 00000000000..10bfc868197
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
++
++typedef short __attribute__((vector_size (8))) v4hi;
++
++typedef union U4HI { v4hi v; short a[4]; } u4hi;
++
++short b[4];
++
++void pass_v4hi (v4hi v)
++{
++ int i;
++ u4hi u;
++ u.v = v;
++ for (i = 0; i < 4; i++)
++ b[i] = u.a[i];
++};
++
++/* { dg-final { scan-assembler-not "ptrue" } } */
+diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
+index 518071bdd86..398cdba5d5f 100644
+--- a/gcc/testsuite/gcc.target/i386/pr67609.c
++++ b/gcc/testsuite/gcc.target/i386/pr67609.c
+@@ -1,7 +1,7 @@
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -msse2" } */
+ /* { dg-require-effective-target lp64 } */
+-/* { dg-final { scan-assembler "movdqa" } } */
++/* { dg-final { scan-assembler "movq\t%xmm0" } } */
+
+ #include <emmintrin.h>
+ __m128d reg;
+--
+2.21.0.windows.1
+