diff options
Diffstat (limited to '0009-Backport-expand-Simplify-removing-subregs-when-expan.patch')
-rw-r--r-- | 0009-Backport-expand-Simplify-removing-subregs-when-expan.patch | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch new file mode 100644 index 0000000..434c669 --- /dev/null +++ b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch @@ -0,0 +1,141 @@ +From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001 +From: zhanghaijian <z.zhanghaijian@huawei.com> +Date: Thu, 15 Jul 2021 09:04:55 +0800 +Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when + expanding a copy [PR95254] + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8 + +In rtl expand, if we have a copy that matches one of the following patterns: + (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) + (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (constant C)) +where mode M1 is equal in size to M2, try to detect whether the mode change +involves an implicit round trip through memory. If so, see if we can avoid +that by removing the subregs and doing the move in mode M2 instead. + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f3341..d66fdd4e93d 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y) + gcc_assert (mode != BLKmode + && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode)); + ++ /* If we have a copy that looks like one of the following patterns: ++ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) ++ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) ++ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) ++ (set (subreg:M1 (reg:M2 ...)) (constant C)) ++ where mode M1 is equal in size to M2, try to detect whether the ++ mode change involves an implicit round trip through memory. ++ If so, see if we can avoid that by removing the subregs and ++ doing the move in mode M2 instead. */ ++ ++ rtx x_inner = NULL_RTX; ++ rtx y_inner = NULL_RTX; ++ ++#define CANDIDATE_SUBREG_P(subreg) \ ++ (REG_P (SUBREG_REG (subreg)) \ ++ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \ ++ GET_MODE_SIZE (GET_MODE (subreg))) \ ++ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \ ++ != CODE_FOR_nothing) ++ ++#define CANDIDATE_MEM_P(innermode, mem) \ ++ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \ ++ && !push_operand ((mem), GET_MODE (mem)) \ ++ /* Not a candiate if innermode requires too much alignment. */ \ ++ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \ ++ || targetm.slow_unaligned_access (GET_MODE (mem), \ ++ MEM_ALIGN (mem)) \ ++ || !targetm.slow_unaligned_access ((innermode), \ ++ MEM_ALIGN (mem)))) ++ ++ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x)) ++ x_inner = SUBREG_REG (x); ++ ++ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y)) ++ y_inner = SUBREG_REG (y); ++ ++ if (x_inner != NULL_RTX ++ && y_inner != NULL_RTX ++ && GET_MODE (x_inner) == GET_MODE (y_inner) ++ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS)) ++ { ++ x = x_inner; ++ y = y_inner; ++ mode = GET_MODE (x_inner); ++ } ++ else if (x_inner != NULL_RTX ++ && MEM_P (y) ++ && CANDIDATE_MEM_P (GET_MODE (x_inner), y)) ++ { ++ x = x_inner; ++ y = adjust_address (y, GET_MODE (x_inner), 0); ++ mode = GET_MODE (x_inner); ++ } ++ else if (y_inner != NULL_RTX ++ && MEM_P (x) ++ && CANDIDATE_MEM_P (GET_MODE (y_inner), x)) ++ { ++ x = adjust_address (x, GET_MODE (y_inner), 0); ++ y = y_inner; ++ mode = GET_MODE (y_inner); ++ } ++ else if (x_inner != NULL_RTX ++ && CONSTANT_P (y) ++ && !targetm.can_change_mode_class (GET_MODE (x_inner), ++ mode, ALL_REGS) ++ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0))) ++ { ++ x = x_inner; ++ y = y_inner; ++ mode = GET_MODE (x_inner); ++ } ++ + if (CONSTANT_P (y)) + { + if (optimize +diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c +new file mode 100644 +index 00000000000..10bfc868197 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */ ++ ++typedef short __attribute__((vector_size (8))) v4hi; ++ ++typedef union U4HI { v4hi v; short a[4]; } u4hi; ++ ++short b[4]; ++ ++void pass_v4hi (v4hi v) ++{ ++ int i; ++ u4hi u; ++ u.v = v; ++ for (i = 0; i < 4; i++) ++ b[i] = u.a[i]; ++}; ++ ++/* { dg-final { scan-assembler-not "ptrue" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c +index 518071bdd86..398cdba5d5f 100644 +--- a/gcc/testsuite/gcc.target/i386/pr67609.c ++++ b/gcc/testsuite/gcc.target/i386/pr67609.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -msse2" } */ + /* { dg-require-effective-target lp64 } */ +-/* { dg-final { scan-assembler "movdqa" } } */ ++/* { dg-final { scan-assembler "movq\t%xmm0" } } */ + + #include <emmintrin.h> + __m128d reg; +-- +2.21.0.windows.1 + |