diff options
Diffstat (limited to '0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch')
-rw-r--r-- | 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch b/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch new file mode 100644 index 0000000..3bd87e9 --- /dev/null +++ b/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch @@ -0,0 +1,168 @@ +From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Thu, 1 Jun 2023 09:37:06 +0100 +Subject: [PATCH 077/157] [Backport][SME] aarch64: Add =r,m and =m,r + alternatives to 64-bit vector move patterns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b + +We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives +to the mov patterns. This straightforward patch does that and for the pair variants too. +For the testcase in the code we now generate the optimal assembly without any superfluous +GP<->SIMD moves. + +Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. + +gcc/ChangeLog: + + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Add =r,m and =r,m alternatives. + (load_pair<DREG:mode><DREG2:mode>): Likewise. + (vec_store_pair<DREG:mode><DREG2:mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/xreg-vec-modes_1.c: New test. +--- + gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++-------- + .../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++ + 2 files changed, 65 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 2d688edf5..b5c52ba16 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -116,26 +116,28 @@ + + (define_insn "*aarch64_simd_mov<VDMOV:mode>" + [(set (match_operand:VDMOV 0 "nonimmediate_operand" +- "=w, m, m, w, ?r, ?w, ?r, w, w") ++ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") + (match_operand:VDMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn, Dz"))] ++ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))] + "TARGET_FLOAT + && (register_operand (operands[0], <MODE>mode) + || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" + "@ + ldr\t%d0, %1 ++ ldr\t%x0, %1 + str\txzr, %0 + str\t%d1, %0 ++ str\t%x1, %0 + * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; + * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\"; + fmov\t%d0, %1 + mov\t%0, %1 + * return aarch64_output_simd_mov_immediate (operands[1], 64); + fmov\t%d0, xzr" +- [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ +- neon_logic<q>, neon_to_gp<q>, f_mcr,\ ++ [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\ ++ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\ + mov_reg, neon_move<q>, f_mcr") +- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")] ++ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")] + ) + + (define_insn "*aarch64_simd_mov<VQMOV:mode>" +@@ -177,31 +179,35 @@ + ) + + (define_insn "load_pair<DREG:mode><DREG2:mode>" +- [(set (match_operand:DREG 0 "register_operand" "=w") +- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) +- (set (match_operand:DREG2 2 "register_operand" "=w") +- (match_operand:DREG2 3 "memory_operand" "m"))] ++ [(set (match_operand:DREG 0 "register_operand" "=w,r") ++ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump")) ++ (set (match_operand:DREG2 2 "register_operand" "=w,r") ++ (match_operand:DREG2 3 "memory_operand" "m,m"))] + "TARGET_FLOAT + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<DREG:MODE>mode)))" +- "ldp\\t%d0, %d2, %z1" +- [(set_attr "type" "neon_ldp")] ++ "@ ++ ldp\t%d0, %d2, %z1 ++ ldp\t%x0, %x2, %z1" ++ [(set_attr "type" "neon_ldp,load_16")] + ) + + (define_insn "vec_store_pair<DREG:mode><DREG2:mode>" +- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") +- (match_operand:DREG 1 "register_operand" "w")) +- (set (match_operand:DREG2 2 "memory_operand" "=m") +- (match_operand:DREG2 3 "register_operand" "w"))] ++ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump") ++ (match_operand:DREG 1 "register_operand" "w,r")) ++ (set (match_operand:DREG2 2 "memory_operand" "=m,m") ++ (match_operand:DREG2 3 "register_operand" "w,r"))] + "TARGET_FLOAT + && rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<DREG:MODE>mode)))" +- "stp\\t%d1, %d3, %z0" +- [(set_attr "type" "neon_stp")] ++ "@ ++ stp\t%d1, %d3, %z0 ++ stp\t%x1, %x3, %z0" ++ [(set_attr "type" "neon_stp,store_16")] + ) + + (define_insn "load_pair<VQ:mode><VQ2:mode>" +diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c +new file mode 100644 +index 000000000..fc4dcb1ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++typedef unsigned int v2si __attribute__((vector_size (8))); ++ ++#define force_gp(V1) asm volatile ("" \ ++ : "=r"(V1) \ ++ : "r"(V1) \ ++ : /* No clobbers */); ++ ++/* ++** foo: ++** ldr (x[0-9]+), \[x1\] ++** str \1, \[x0\] ++** ret ++*/ ++ ++void ++foo (v2si *a, v2si *b) ++{ ++ v2si tmp = *b; ++ force_gp (tmp); ++ *a = tmp; ++} ++ ++/* ++** foo2: ++** ldp (x[0-9]+), (x[0-9]+), \[x0\] ++** stp \1, \2, \[x1\] ++** ret ++*/ ++void ++foo2 (v2si *a, v2si *b) ++{ ++ v2si t1 = *a; ++ v2si t2 = a[1]; ++ force_gp (t1); ++ force_gp (t2); ++ *b = t1; ++ b[1] = t2; ++} +-- +2.33.0 + |