summaryrefslogtreecommitdiff
path: root/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
committerCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
commit73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree8e28b63e478c43c252f18b49836dff7313affe54 /0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
parent49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch')
-rw-r--r--0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch168
1 files changed, 168 insertions, 0 deletions
diff --git a/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch b/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
new file mode 100644
index 0000000..3bd87e9
--- /dev/null
+++ b/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
@@ -0,0 +1,168 @@
+From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001
+From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+Date: Thu, 1 Jun 2023 09:37:06 +0100
+Subject: [PATCH 077/157] [Backport][SME] aarch64: Add =r,m and =m,r
+ alternatives to 64-bit vector move patterns
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b
+
+We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
+to the mov patterns. This straightforward patch does that and for the pair variants too.
+For the testcase in the code we now generate the optimal assembly without any superfluous
+GP<->SIMD moves.
+
+Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
+ Add =r,m and =r,m alternatives.
+ (load_pair<DREG:mode><DREG2:mode>): Likewise.
+ (vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/xreg-vec-modes_1.c: New test.
+---
+ gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++--------
+ .../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++
+ 2 files changed, 65 insertions(+), 17 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 2d688edf5..b5c52ba16 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -116,26 +116,28 @@
+
+ (define_insn "*aarch64_simd_mov<VDMOV:mode>"
+ [(set (match_operand:VDMOV 0 "nonimmediate_operand"
+- "=w, m, m, w, ?r, ?w, ?r, w, w")
++ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
+ (match_operand:VDMOV 1 "general_operand"
+- "m, Dz, w, w, w, r, r, Dn, Dz"))]
++ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
+ "TARGET_FLOAT
+ && (register_operand (operands[0], <MODE>mode)
+ || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
+ "@
+ ldr\t%d0, %1
++ ldr\t%x0, %1
+ str\txzr, %0
+ str\t%d1, %0
++ str\t%x1, %0
+ * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
+ * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
+ fmov\t%d0, %1
+ mov\t%0, %1
+ * return aarch64_output_simd_mov_immediate (operands[1], 64);
+ fmov\t%d0, xzr"
+- [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
+- neon_logic<q>, neon_to_gp<q>, f_mcr,\
++ [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
++ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
+ mov_reg, neon_move<q>, f_mcr")
+- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
++ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
+ )
+
+ (define_insn "*aarch64_simd_mov<VQMOV:mode>"
+@@ -177,31 +179,35 @@
+ )
+
+ (define_insn "load_pair<DREG:mode><DREG2:mode>"
+- [(set (match_operand:DREG 0 "register_operand" "=w")
+- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+- (set (match_operand:DREG2 2 "register_operand" "=w")
+- (match_operand:DREG2 3 "memory_operand" "m"))]
++ [(set (match_operand:DREG 0 "register_operand" "=w,r")
++ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
++ (set (match_operand:DREG2 2 "register_operand" "=w,r")
++ (match_operand:DREG2 3 "memory_operand" "m,m"))]
+ "TARGET_FLOAT
+ && rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<DREG:MODE>mode)))"
+- "ldp\\t%d0, %d2, %z1"
+- [(set_attr "type" "neon_ldp")]
++ "@
++ ldp\t%d0, %d2, %z1
++ ldp\t%x0, %x2, %z1"
++ [(set_attr "type" "neon_ldp,load_16")]
+ )
+
+ (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+- (match_operand:DREG 1 "register_operand" "w"))
+- (set (match_operand:DREG2 2 "memory_operand" "=m")
+- (match_operand:DREG2 3 "register_operand" "w"))]
++ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
++ (match_operand:DREG 1 "register_operand" "w,r"))
++ (set (match_operand:DREG2 2 "memory_operand" "=m,m")
++ (match_operand:DREG2 3 "register_operand" "w,r"))]
+ "TARGET_FLOAT
+ && rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<DREG:MODE>mode)))"
+- "stp\\t%d1, %d3, %z0"
+- [(set_attr "type" "neon_stp")]
++ "@
++ stp\t%d1, %d3, %z0
++ stp\t%x1, %x3, %z0"
++ [(set_attr "type" "neon_stp,store_16")]
+ )
+
+ (define_insn "load_pair<VQ:mode><VQ2:mode>"
+diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
+new file mode 100644
+index 000000000..fc4dcb1ad
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++typedef unsigned int v2si __attribute__((vector_size (8)));
++
++#define force_gp(V1) asm volatile ("" \
++ : "=r"(V1) \
++ : "r"(V1) \
++ : /* No clobbers */);
++
++/*
++** foo:
++** ldr (x[0-9]+), \[x1\]
++** str \1, \[x0\]
++** ret
++*/
++
++void
++foo (v2si *a, v2si *b)
++{
++ v2si tmp = *b;
++ force_gp (tmp);
++ *a = tmp;
++}
++
++/*
++** foo2:
++** ldp (x[0-9]+), (x[0-9]+), \[x0\]
++** stp \1, \2, \[x1\]
++** ret
++*/
++void
++foo2 (v2si *a, v2si *b)
++{
++ v2si t1 = *a;
++ v2si t2 = a[1];
++ force_gp (t1);
++ force_gp (t2);
++ *b = t1;
++ b[1] = t2;
++}
+--
+2.33.0
+