diff options
Diffstat (limited to '0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch')
-rw-r--r-- | 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch | 1552 |
1 files changed, 1552 insertions, 0 deletions
diff --git a/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch b/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch new file mode 100644 index 0000000..f99b246 --- /dev/null +++ b/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch @@ -0,0 +1,1552 @@ +From 4a0e91dc27b30ae673ba132bf2be17a74bc89f31 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:24 +0000 +Subject: [PATCH 080/157] [Backport][SME] aarch64: Distinguish + streaming-compatible AdvSIMD insns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c86ee4f683e05e5809597d96b5eeb261c9c92cac + +The vast majority of Advanced SIMD instructions are not +available in streaming mode, but some of the load/store/move +instructions are. This patch adds a new target feature macro +called TARGET_BASE_SIMD for this streaming-compatible subset. + +The vector-to-vector move instructions are not streaming-compatible, +so we need to use the SVE move instructions where enabled, or fall +back to the nofp16 handling otherwise. + +I haven't found a good way of testing the SVE EXT alternative +in aarch64_simd_mov_from_<mode>high, but I'd rather provide it +than not. + +gcc/ + * config/aarch64/aarch64.h (TARGET_BASE_SIMD): New macro. + (TARGET_SIMD): Require PSTATE.SM to be 0. + (AARCH64_ISA_SM_OFF): New macro. + * config/aarch64/aarch64.cc (aarch64_array_mode_supported_p): + Allow Advanced SIMD structure modes for TARGET_BASE_SIMD. + (aarch64_print_operand): Support '%Z'. + (aarch64_secondary_reload): Expect SVE moves to be used for + Advanced SIMD modes if SVE is enabled and non-streaming + Advanced SIMD isn't. + (aarch64_register_move_cost): Likewise. + (aarch64_simd_container_mode): Extend Advanced SIMD mode + handling to TARGET_BASE_SIMD. + (aarch64_expand_cpymem): Expand commentary. + * config/aarch64/aarch64.md (arches): Add base_simd and nobase_simd. + (arch_enabled): Handle it. + (*mov<mode>_aarch64): Extend UMOV alternative to TARGET_BASE_SIMD. + (*movti_aarch64): Use an SVE move instruction if non-streaming + SIMD isn't available. + (*mov<TFD:mode>_aarch64): Likewise. + (load_pair_dw_tftf): Extend to TARGET_BASE_SIMD. + (store_pair_dw_tftf): Likewise. + (loadwb_pair<TX:mode>_<P:mode>): Likewise. + (storewb_pair<TX:mode>_<P:mode>): Likewise. + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Allow UMOV in streaming mode. + (*aarch64_simd_mov<VQMOV:mode>): Use an SVE move instruction + if non-streaming SIMD isn't available. + (aarch64_store_lane0<mode>): Depend on TARGET_FLOAT rather than + TARGET_SIMD. + (aarch64_simd_mov_from_<mode>low): Likewise. Use fmov if + Advanced SIMD is completely disabled. + (aarch64_simd_mov_from_<mode>high): Use SVE EXT instructions if + non-streaming SIMD isn't available. + +gcc/testsuite/ + * gcc.target/aarch64/movdf_2.c: New test. + * gcc.target/aarch64/movdi_3.c: Likewise. + * gcc.target/aarch64/movhf_2.c: Likewise. + * gcc.target/aarch64/movhi_2.c: Likewise. + * gcc.target/aarch64/movqi_2.c: Likewise. + * gcc.target/aarch64/movsf_2.c: Likewise. + * gcc.target/aarch64/movsi_2.c: Likewise. + * gcc.target/aarch64/movtf_3.c: Likewise. + * gcc.target/aarch64/movtf_4.c: Likewise. + * gcc.target/aarch64/movti_3.c: Likewise. + * gcc.target/aarch64/movti_4.c: Likewise. + * gcc.target/aarch64/movv16qi_4.c: Likewise. + * gcc.target/aarch64/movv16qi_5.c: Likewise. + * gcc.target/aarch64/movv8qi_4.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_1.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_2.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_3.c: Likewise. +--- + gcc/config/aarch64/aarch64-simd.md | 50 ++++++----- + gcc/config/aarch64/aarch64.cc | 16 ++-- + gcc/config/aarch64/aarch64.h | 12 ++- + gcc/config/aarch64/aarch64.md | 77 +++++++++-------- + gcc/testsuite/gcc.target/aarch64/movdf_2.c | 51 +++++++++++ + gcc/testsuite/gcc.target/aarch64/movdi_3.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhf_2.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhi_2.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movqi_2.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsf_2.c | 51 +++++++++++ + gcc/testsuite/gcc.target/aarch64/movsi_2.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_3.c | 81 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_4.c | 78 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_3.c | 86 +++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_4.c | 83 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_4.c | 82 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_5.c | 79 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_4.c | 55 ++++++++++++ + .../gcc.target/aarch64/sme/arm_neon_1.c | 13 +++ + .../gcc.target/aarch64/sme/arm_neon_2.c | 11 +++ + .../gcc.target/aarch64/sme/arm_neon_3.c | 11 +++ + 21 files changed, 1062 insertions(+), 65 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 1f4b30642..62493cdfa 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -121,19 +121,19 @@ + && (register_operand (operands[0], <MODE>mode) + || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" + {@ [cons: =0, 1; attrs: type, arch] +- [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1 +- [r , m ; load_8 , * ] ldr\t%x0, %1 +- [m , Dz; store_8 , * ] str\txzr, %0 +- [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0 +- [m , r ; store_8 , * ] str\t%x1, %0 +- [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype> +- [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1 +- [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0] +- [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1 +- [?w, r ; f_mcr , * ] fmov\t%d0, %1 +- [?r, r ; mov_reg , * ] mov\t%0, %1 +- [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64); +- [w , Dz; f_mcr , * ] fmov\t%d0, xzr ++ [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1 ++ [r , m ; load_8 , * ] ldr\t%x0, %1 ++ [m , Dz; store_8 , * ] str\txzr, %0 ++ [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0 ++ [m , r ; store_8 , * ] str\t%x1, %0 ++ [w , w ; neon_logic<q> , simd ] mov\t%0.<Vbtype>, %1.<Vbtype> ++ [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1 ++ [?r, w ; neon_to_gp<q> , base_simd] umov\t%0, %1.d[0] ++ [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1 ++ [?w, r ; f_mcr , * ] fmov\t%d0, %1 ++ [?r, r ; mov_reg , * ] mov\t%0, %1 ++ [w , Dn; neon_move<q> , simd ] << aarch64_output_simd_mov_immediate (operands[1], 64); ++ [w , Dz; f_mcr , * ] fmov\t%d0, xzr + } + ) + +@@ -148,6 +148,7 @@ + [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0 + [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0 + [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype> ++ [w , w ; * , sve , 4] mov\t%Z0.d, %Z1.d + [?r , w ; multiple , * , 8] # + [?w , r ; multiple , * , 8] # + [?r , r ; multiple , * , 8] # +@@ -177,7 +178,7 @@ + [(set (match_operand:<VEL> 0 "memory_operand" "=m") + (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") + (parallel [(match_operand 2 "const_int_operand" "n")])))] +- "TARGET_SIMD ++ "TARGET_FLOAT + && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" + "str\\t%<Vetype>1, %0" + [(set_attr "type" "neon_store1_1reg<q>")] +@@ -312,35 +313,38 @@ + ) + + (define_insn_and_split "aarch64_simd_mov_from_<mode>low" +- [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r") ++ [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r") + (vec_select:<VHALF> +- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") ++ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") + (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))] +- "TARGET_SIMD" ++ "TARGET_FLOAT" + "@ + # +- umov\t%0, %1.d[0]" ++ umov\t%0, %1.d[0] ++ fmov\t%0, %d1" + "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)" + [(set (match_dup 0) (match_dup 1))] + { + operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode); + } +- [(set_attr "type" "mov_reg,neon_to_gp<q>") ++ [(set_attr "type" "mov_reg,neon_to_gp<q>,f_mrc") ++ (set_attr "arch" "simd,base_simd,*") + (set_attr "length" "4")] + ) + + (define_insn "aarch64_simd_mov_from_<mode>high" +- [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r") ++ [(set (match_operand:<VHALF> 0 "register_operand" "=w,w,?r,?r") + (vec_select:<VHALF> +- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") ++ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w,w") + (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))] + "TARGET_FLOAT" + "@ + dup\t%d0, %1.d[1] ++ ext\t%Z0.b, %Z0.b, %Z0.b, #8 + umov\t%0, %1.d[1] + fmov\t%0, %1.d[1]" +- [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc") +- (set_attr "arch" "simd,simd,*") ++ [(set_attr "type" "neon_dup<q>,*,neon_to_gp<q>,f_mrc") ++ (set_attr "arch" "simd,sve,simd,*") + (set_attr "length" "4")] + ) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8f8395201..08a98f8ba 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3999,7 +3999,7 @@ static bool + aarch64_array_mode_supported_p (machine_mode mode, + unsigned HOST_WIDE_INT nelems) + { +- if (TARGET_SIMD ++ if (TARGET_BASE_SIMD + && (AARCH64_VALID_SIMD_QREG_MODE (mode) + || AARCH64_VALID_SIMD_DREG_MODE (mode)) + && (nelems >= 2 && nelems <= 4)) +@@ -12955,8 +12955,8 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- /* Without the TARGET_SIMD instructions we cannot move a Q register +- to a Q register directly. We need a scratch. */ ++ /* Without the TARGET_SIMD or TARGET_SVE instructions we cannot move a ++ Q register to a Q register directly. We need a scratch. */ + if (REG_P (x) + && (mode == TFmode + || mode == TImode +@@ -15540,7 +15540,7 @@ aarch64_register_move_cost (machine_mode mode, + secondary reload. A general register is used as a scratch to move + the upper DI value and the lower DI value is moved directly, + hence the cost is the sum of three moves. */ +- if (! TARGET_SIMD) ++ if (!TARGET_SIMD && !TARGET_SVE) + return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; + + return regmove_cost->FP2FP; +@@ -21107,7 +21107,7 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) + return aarch64_full_sve_mode (mode).else_mode (word_mode); + + gcc_assert (known_eq (width, 64) || known_eq (width, 128)); +- if (TARGET_SIMD) ++ if (TARGET_BASE_SIMD) + { + if (known_eq (width, 128)) + return aarch64_vq_mode (mode).else_mode (word_mode); +@@ -25221,7 +25221,11 @@ aarch64_expand_cpymem (rtx *operands) + int copy_bits = 256; + + /* Default to 256-bit LDP/STP on large copies, however small copies, no SIMD +- support or slow 256-bit LDP/STP fall back to 128-bit chunks. */ ++ support or slow 256-bit LDP/STP fall back to 128-bit chunks. ++ ++ ??? Although it would be possible to use LDP/STP Qn in streaming mode ++ (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear ++ whether that would improve performance. */ + if (size <= 24 + || !TARGET_SIMD + || (aarch64_tune_params.extra_tuning_flags +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index dd2de4e88..a3c83a3b1 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -61,8 +61,15 @@ + #define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN) + + /* AdvSIMD is supported in the default configuration, unless disabled by +- -mgeneral-regs-only or by the +nosimd extension. */ +-#define TARGET_SIMD (AARCH64_ISA_SIMD) ++ -mgeneral-regs-only or by the +nosimd extension. The set of available ++ instructions is then subdivided into: ++ ++ - the "base" set, available both in SME streaming mode and in ++ non-streaming mode ++ ++ - the full set, available only in non-streaming mode. */ ++#define TARGET_BASE_SIMD (AARCH64_ISA_SIMD) ++#define TARGET_SIMD (AARCH64_ISA_SIMD && AARCH64_ISA_SM_OFF) + #define TARGET_FLOAT (AARCH64_ISA_FP) + + #define UNITS_PER_WORD 8 +@@ -199,6 +206,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + + /* Macros to test ISA flags. */ + ++#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) + #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) + #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 1ec23fae8..079c8a3f9 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -378,7 +378,8 @@ + ;; As a convenience, "fp_q" means "fp" + the ability to move between + ;; Q registers and is equivalent to "simd". + +-(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16]) ++(define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd ++ simd nosimd sve fp16]) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -406,6 +407,12 @@ + (and (eq_attr "arch" "fp") + (match_test "TARGET_FLOAT")) + ++ (and (eq_attr "arch" "base_simd") ++ (match_test "TARGET_BASE_SIMD")) ++ ++ (and (eq_attr "arch" "nobase_simd") ++ (match_test "!TARGET_BASE_SIMD")) ++ + (and (eq_attr "arch" "fp_q, simd") + (match_test "TARGET_SIMD")) + +@@ -1202,22 +1209,22 @@ + "(register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_zero (operands[1], <MODE>mode))" + {@ [cons: =0, 1; attrs: type, arch] +- [r, r ; mov_reg , * ] mov\t%w0, %w1 +- [r, M ; mov_imm , * ] mov\t%w0, %1 +- [w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode); ++ [r, r ; mov_reg , * ] mov\t%w0, %w1 ++ [r, M ; mov_imm , * ] mov\t%w0, %1 ++ [w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode); + /* The "mov_imm" type for CNT is just a placeholder. */ +- [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); +- [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]); +- [r, m ; load_4 , * ] ldr<size>\t%w0, %1 +- [w, m ; load_4 , * ] ldr\t%<size>0, %1 +- [m, r Z ; store_4 , * ] str<size>\\t%w1, %0 +- [m, w ; store_4 , * ] str\t%<size>1, %0 +- [r, w ; neon_to_gp<q> , simd ] umov\t%w0, %1.<v>[0] +- [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1 +- [w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1 +- [w, r Z ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1 +- [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] +- [w, w ; neon_dup , nosimd] fmov\t%s0, %s1 ++ [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); ++ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]); ++ [r, m ; load_4 , * ] ldr<size>\t%w0, %1 ++ [w, m ; load_4 , * ] ldr\t%<size>0, %1 ++ [m, r Z ; store_4 , * ] str<size>\\t%w1, %0 ++ [m, w ; store_4 , * ] str\t%<size>1, %0 ++ [r, w ; neon_to_gp<q> , base_simd ] umov\t%w0, %1.<v>[0] ++ [r, w ; neon_to_gp<q> , nobase_simd] fmov\t%w0, %s1 ++ [w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1 ++ [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 ++ [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] ++ [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 + } + ) + +@@ -1372,9 +1379,9 @@ + + (define_insn "*movti_aarch64" + [(set (match_operand:TI 0 +- "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m") ++ "nonimmediate_operand" "= r,w,w,w, r,w,w,r,m,m,w,m") + (match_operand:TI 1 +- "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))] ++ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,w,m,r,Z,m,w"))] + "(register_operand (operands[0], TImode) + || aarch64_reg_or_zero (operands[1], TImode))" + "@ +@@ -1384,16 +1391,17 @@ + # + # + mov\\t%0.16b, %1.16b ++ mov\\t%Z0.d, %Z1.d + ldp\\t%0, %H0, %1 + stp\\t%1, %H1, %0 + stp\\txzr, xzr, %0 + ldr\\t%q0, %1 + str\\t%q1, %0" +- [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \ ++ [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q,*,\ + load_16,store_16,store_16,\ + load_16,store_16") +- (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4") +- (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")] ++ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4,4") ++ (set_attr "arch" "*,simd,*,*,*,simd,sve,*,*,*,fp,fp")] + ) + + ;; Split a TImode register-register or register-immediate move into +@@ -1529,13 +1537,14 @@ + + (define_insn "*mov<mode>_aarch64" + [(set (match_operand:TFD 0 +- "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") ++ "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m") + (match_operand:TFD 1 +- "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] ++ "general_operand" " w,w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] + "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "@ + mov\\t%0.16b, %1.16b ++ mov\\t%Z0.d, %Z1.d + # + # + # +@@ -1546,10 +1555,10 @@ + ldp\\t%0, %H0, %1 + stp\\t%1, %H1, %0 + stp\\txzr, xzr, %0" +- [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ ++ [(set_attr "type" "logic_reg,*,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ + f_loadd,f_stored,load_16,store_16,store_16") +- (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") +- (set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*")] ++ (set_attr "length" "4,4,8,8,8,4,4,4,4,4,4,4") ++ (set_attr "arch" "simd,sve,*,*,*,simd,*,*,*,*,*,*")] + ) + + (define_split +@@ -1738,7 +1747,7 @@ + (match_operand:TF 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:TF 2 "register_operand" "=w") + (match_operand:TF 3 "memory_operand" "m"))] +- "TARGET_SIMD ++ "TARGET_BASE_SIMD + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), +@@ -1788,11 +1797,11 @@ + (match_operand:TF 1 "register_operand" "w")) + (set (match_operand:TF 2 "memory_operand" "=m") + (match_operand:TF 3 "register_operand" "w"))] +- "TARGET_SIMD && +- rtx_equal_p (XEXP (operands[2], 0), +- plus_constant (Pmode, +- XEXP (operands[0], 0), +- GET_MODE_SIZE (TFmode)))" ++ "TARGET_BASE_SIMD ++ && rtx_equal_p (XEXP (operands[2], 0), ++ plus_constant (Pmode, ++ XEXP (operands[0], 0), ++ GET_MODE_SIZE (TFmode)))" + "stp\\t%q1, %q3, %z0" + [(set_attr "type" "neon_stp_q") + (set_attr "fp" "yes")] +@@ -1840,7 +1849,7 @@ + (set (match_operand:TX 3 "register_operand" "=w") + (mem:TX (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))])] +- "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)" ++ "TARGET_BASE_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)" + "ldp\\t%q2, %q3, [%1], %4" + [(set_attr "type" "neon_ldp_q")] + ) +@@ -1890,7 +1899,7 @@ + (set (mem:TX (plus:P (match_dup 0) + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:TX 3 "register_operand" "w"))])] +- "TARGET_SIMD ++ "TARGET_BASE_SIMD + && INTVAL (operands[5]) + == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)" + "stp\\t%q2, %q3, [%0, %4]!" +diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_2.c b/gcc/testsuite/gcc.target/aarch64/movdf_2.c +new file mode 100644 +index 000000000..0d459d317 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdf_2.c +@@ -0,0 +1,51 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++double ++fpr_to_fpr (double q0, double q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++double ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++double ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++void ++fpr_to_gpr (double q0) [[arm::streaming_compatible]] ++{ ++ register double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_3.c b/gcc/testsuite/gcc.target/aarch64/movdi_3.c +new file mode 100644 +index 000000000..31b2cbbae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdi_3.c +@@ -0,0 +1,59 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++void ++fpr_to_fpr (void) [[arm::streaming_compatible]] ++{ ++ register uint64_t q0 asm ("q0"); ++ register uint64_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++void ++gpr_to_fpr (uint64_t x0) [[arm::streaming_compatible]] ++{ ++ register uint64_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ register uint64_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++uint64_t ++fpr_to_gpr () [[arm::streaming_compatible]] ++{ ++ register uint64_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_2.c b/gcc/testsuite/gcc.target/aarch64/movhf_2.c +new file mode 100644 +index 000000000..3292b0de8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movhf_2.c +@@ -0,0 +1,53 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+simd" ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++_Float16 ++fpr_to_fpr (_Float16 q0, _Float16 q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++_Float16 ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register _Float16 w0 asm ("w0"); ++ asm volatile ("" : "=r" (w0)); ++ return w0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++_Float16 ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++void ++fpr_to_gpr (_Float16 q0) [[arm::streaming_compatible]] ++{ ++ register _Float16 w0 asm ("w0"); ++ w0 = q0; ++ asm volatile ("" :: "r" (w0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_2.c b/gcc/testsuite/gcc.target/aarch64/movhi_2.c +new file mode 100644 +index 000000000..dbbf3486f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movhi_2.c +@@ -0,0 +1,61 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+simd" ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) [[arm::streaming_compatible]] ++{ ++ register uint16_t q0 asm ("q0"); ++ register uint16_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint16_t w0) [[arm::streaming_compatible]] ++{ ++ register uint16_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ register uint16_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** umov w0, v0.h\[0\] ++** ret ++*/ ++uint16_t ++fpr_to_gpr () [[arm::streaming_compatible]] ++{ ++ register uint16_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_2.c b/gcc/testsuite/gcc.target/aarch64/movqi_2.c +new file mode 100644 +index 000000000..aec087e4e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movqi_2.c +@@ -0,0 +1,59 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) [[arm::streaming_compatible]] ++{ ++ register uint8_t q0 asm ("q0"); ++ register uint8_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint8_t w0) [[arm::streaming_compatible]] ++{ ++ register uint8_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ register uint8_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** umov w0, v0.b\[0\] ++** ret ++*/ ++uint8_t ++fpr_to_gpr () [[arm::streaming_compatible]] ++{ ++ register uint8_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_2.c b/gcc/testsuite/gcc.target/aarch64/movsf_2.c +new file mode 100644 +index 000000000..7fed4b22f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movsf_2.c +@@ -0,0 +1,51 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++float ++fpr_to_fpr (float q0, float q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++float ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register float w0 asm ("w0"); ++ asm volatile ("" : "=r" (w0)); ++ return w0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++float ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++void ++fpr_to_gpr (float q0) [[arm::streaming_compatible]] ++{ ++ register float w0 asm ("w0"); ++ w0 = q0; ++ asm volatile ("" :: "r" (w0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_2.c b/gcc/testsuite/gcc.target/aarch64/movsi_2.c +new file mode 100644 +index 000000000..c14d2468a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movsi_2.c +@@ -0,0 +1,59 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) [[arm::streaming_compatible]] ++{ ++ register uint32_t q0 asm ("q0"); ++ register uint32_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint32_t w0) [[arm::streaming_compatible]] ++{ ++ register uint32_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ register uint32_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++uint32_t ++fpr_to_gpr () [[arm::streaming_compatible]] ++{ ++ register uint32_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_3.c b/gcc/testsuite/gcc.target/aarch64/movtf_3.c +new file mode 100644 +index 000000000..dd164a418 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movtf_3.c +@@ -0,0 +1,81 @@ ++/* { dg-do assemble } */ ++/* { dg-require-effective-target large_long_double } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \[sp\] ++** ldr q0, \[sp\] ++** add sp, sp, #?16 ++** ret ++*/ ++long double ++fpr_to_fpr (long double q0, long double q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\[1\], x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\[1\], x0 ++** ret ++*/ ++long double ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register long double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++long double ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\[1\] ++** | ++** fmov x1, v0.d\[1\] ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\[1\] ++** | ++** fmov x0, v0.d\[1\] ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (long double q0) [[arm::streaming_compatible]] ++{ ++ register long double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_4.c b/gcc/testsuite/gcc.target/aarch64/movtf_4.c +new file mode 100644 +index 000000000..faf9703e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movtf_4.c +@@ -0,0 +1,78 @@ ++/* { dg-do assemble } */ ++/* { dg-require-effective-target large_long_double } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+sve" ++ ++/* ++** fpr_to_fpr: ++** mov z0.d, z1.d ++** ret ++*/ ++long double ++fpr_to_fpr (long double q0, long double q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\[1\], x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\[1\], x0 ++** ret ++*/ ++long double ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register long double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++long double ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\[1\] ++** | ++** fmov x1, v0.d\[1\] ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\[1\] ++** | ++** fmov x0, v0.d\[1\] ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (long double q0) [[arm::streaming_compatible]] ++{ ++ register long double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movti_3.c b/gcc/testsuite/gcc.target/aarch64/movti_3.c +new file mode 100644 +index 000000000..243109181 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movti_3.c +@@ -0,0 +1,86 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \[sp\] ++** ldr q0, \[sp\] ++** add sp, sp, #?16 ++** ret ++*/ ++void ++fpr_to_fpr (void) [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ register __int128_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\[1\], x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\[1\], x0 ++** ret ++*/ ++void ++gpr_to_fpr (__int128_t x0) [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\[1\] ++** | ++** fmov x1, v0.d\[1\] ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\[1\] ++** | ++** fmov x0, v0.d\[1\] ++** fmov x1, d0 ++** ) ++** ret ++*/ ++__int128_t ++fpr_to_gpr () [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movti_4.c b/gcc/testsuite/gcc.target/aarch64/movti_4.c +new file mode 100644 +index 000000000..a70feccb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movti_4.c +@@ -0,0 +1,83 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+sve" ++ ++/* ++** fpr_to_fpr: ++** mov z0\.d, z1\.d ++** ret ++*/ ++void ++fpr_to_fpr (void) [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ register __int128_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\[1\], x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\[1\], x0 ++** ret ++*/ ++void ++gpr_to_fpr (__int128_t x0) [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\[1\] ++** | ++** fmov x1, v0.d\[1\] ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\[1\] ++** | ++** fmov x0, v0.d\[1\] ++** fmov x1, d0 ++** ) ++** ret ++*/ ++__int128_t ++fpr_to_gpr () [[arm::streaming_compatible]] ++{ ++ register __int128_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c +new file mode 100644 +index 000000000..7bec888b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c +@@ -0,0 +1,82 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++typedef unsigned char v16qi __attribute__((vector_size(16))); ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \[sp\] ++** ldr q0, \[sp\] ++** add sp, sp, #?16 ++** ret ++*/ ++v16qi ++fpr_to_fpr (v16qi q0, v16qi q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\[1\], x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\[1\], x0 ++** ret ++*/ ++v16qi ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register v16qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v16qi ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return (v16qi) {}; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** umov x0, v0.d\[0\] ++** fmov x1, v0.d\[1\] ++** | ++** fmov x1, v0.d\[1\] ++** umov x0, v0.d\[0\] ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** umov x1, v0.d\[0\] ++** fmov x0, v0.d\[1\] ++** | ++** fmov x0, v0.d\[1\] ++** umov x1, v0.d\[0\] ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (v16qi q0) [[arm::streaming_compatible]] ++{ ++ register v16qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c +new file mode 100644 +index 000000000..2d36342b3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c +@@ -0,0 +1,79 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+sve" ++ ++typedef unsigned char v16qi __attribute__((vector_size(16))); ++ ++/* ++** fpr_to_fpr: ++** mov z0.d, z1.d ++** ret ++*/ ++v16qi ++fpr_to_fpr (v16qi q0, v16qi q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\[1\], x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\[1\], x0 ++** ret ++*/ ++v16qi ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register v16qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v16qi ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return (v16qi) {}; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** umov x0, v0.d\[0\] ++** fmov x1, v0.d\[1\] ++** | ++** fmov x1, v0.d\[1\] ++** umov x0, v0.d\[0\] ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** umov x1, v0.d\[0\] ++** fmov x0, v0.d\[1\] ++** | ++** fmov x0, v0.d\[1\] ++** umov x1, v0.d\[0\] ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (v16qi q0) [[arm::streaming_compatible]] ++{ ++ register v16qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c +new file mode 100644 +index 000000000..12ae25a3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c +@@ -0,0 +1,55 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++typedef unsigned char v8qi __attribute__((vector_size(8))); ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++v8qi ++fpr_to_fpr (v8qi q0, v8qi q1) [[arm::streaming_compatible]] ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++v8qi ++gpr_to_fpr () [[arm::streaming_compatible]] ++{ ++ register v8qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v8qi ++zero_to_fpr () [[arm::streaming_compatible]] ++{ ++ return (v8qi) {}; ++} ++ ++/* ++** fpr_to_gpr: ++** umov x0, v0\.d\[0\] ++** ret ++*/ ++void ++fpr_to_gpr (v8qi q0) [[arm::streaming_compatible]] ++{ ++ register v8qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c +new file mode 100644 +index 000000000..5b5346cf4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c +@@ -0,0 +1,13 @@ ++// { dg-options "" } ++ ++#include <arm_neon.h> ++ ++#pragma GCC target "+nosme" ++ ++// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } ++ ++int32x4_t ++foo (int32x4_t x, int32x4_t y) [[arm::streaming_compatible]] ++{ ++ return vhaddq_s32 (x, y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c +new file mode 100644 +index 000000000..2092c4471 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c +@@ -0,0 +1,11 @@ ++// { dg-options "" } ++ ++#include <arm_neon.h> ++ ++// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } ++ ++int32x4_t ++foo (int32x4_t x, int32x4_t y) [[arm::streaming_compatible]] ++{ ++ return vhaddq_s32 (x, y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c +new file mode 100644 +index 000000000..36794e5b0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c +@@ -0,0 +1,11 @@ ++// { dg-options "" } ++ ++#include <arm_neon.h> ++ ++// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } ++ ++int32x4_t ++foo (int32x4_t x, int32x4_t y) [[arm::streaming]] ++{ ++ return vhaddq_s32 (x, y); ++} +-- +2.33.0 + |