diff options
Diffstat (limited to '0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch')
-rw-r--r-- | 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch | 15955 |
1 files changed, 15955 insertions, 0 deletions
diff --git a/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch b/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch new file mode 100644 index 0000000..b83e594 --- /dev/null +++ b/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch @@ -0,0 +1,15955 @@ +From 6c651a11f8e68244c4c53ad7b29983f54a3bc737 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:28 +0000 +Subject: [PATCH 112/157] [Backport][SME] aarch64: Add support for <arm_sme.h> + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4f6ab9537051e156d52bd8e9df40107ba6685895 + +This adds support for the SME parts of arm_sme.h. + +gcc/ + * doc/invoke.texi: Document +sme-i16i64 and +sme-f64f64. + * config.gcc (aarch64*-*-*): Add arm_sme.h to the list of headers + to install and aarch64-sve-builtins-sme.o to the list of objects + to build. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define + or undefine TARGET_SME, TARGET_SME_I16I64 and TARGET_SME_F64F64. + (aarch64_pragma_aarch64): Handle arm_sme.h. + * config/aarch64/aarch64-option-extensions.def (sme-i16i64) + (sme-f64f64): New extensions. + * config/aarch64/aarch64-protos.h (aarch64_sme_vq_immediate) + (aarch64_addsvl_addspl_immediate_p, aarch64_output_addsvl_addspl) + (aarch64_output_sme_zero_za): Declare. + (aarch64_output_move_struct): Delete. + (aarch64_sme_ldr_vnum_offset): Declare. + (aarch64_sve::handle_arm_sme_h): Likewise. + * config/aarch64/aarch64.h (AARCH64_ISA_SM_ON): New macro. + (AARCH64_ISA_SME_I16I64, AARCH64_ISA_SME_F64F64): Likewise. + (TARGET_STREAMING, TARGET_STREAMING_SME): Likewise. + (TARGET_SME_I16I64, TARGET_SME_F64F64): Likewise. + * config/aarch64/aarch64.cc (aarch64_sve_rdvl_factor_p): Rename to... + (aarch64_sve_rdvl_addvl_factor_p): ...this. + (aarch64_sve_rdvl_immediate_p): Update accordingly. + (aarch64_rdsvl_immediate_p, aarch64_add_offset): Likewise. + (aarch64_sme_vq_immediate): Likewise. Make public. + (aarch64_sve_addpl_factor_p): New function. + (aarch64_sve_addvl_addpl_immediate_p): Use + aarch64_sve_rdvl_addvl_factor_p and aarch64_sve_addpl_factor_p. + (aarch64_addsvl_addspl_immediate_p): New function. + (aarch64_output_addsvl_addspl): Likewise. + (aarch64_cannot_force_const_mem): Return true for RDSVL immediates. + (aarch64_classify_index): Handle .Q scaling for VNx1TImode. + (aarch64_classify_address): Likewise for vnum offsets. + (aarch64_output_sme_zero_za): New function. + (aarch64_sme_ldr_vnum_offset_p): Likewise. + * config/aarch64/predicates.md (aarch64_addsvl_addspl_immediate): + New predicate. + (aarch64_pluslong_operand): Include it for SME. + * config/aarch64/constraints.md (Ucj, Uav): New constraints. + * config/aarch64/iterators.md (VNx1TI_ONLY): New mode iterator. + (SME_ZA_I, SME_ZA_SDI, SME_ZA_SDF_I, SME_MOP_BHI): Likewise. + (SME_MOP_HSDF): Likewise. + (UNSPEC_SME_ADDHA, UNSPEC_SME_ADDVA, UNSPEC_SME_FMOPA) + (UNSPEC_SME_FMOPS, UNSPEC_SME_LD1_HOR, UNSPEC_SME_LD1_VER) + (UNSPEC_SME_READ_HOR, UNSPEC_SME_READ_VER, UNSPEC_SME_SMOPA) + (UNSPEC_SME_SMOPS, UNSPEC_SME_ST1_HOR, UNSPEC_SME_ST1_VER) + (UNSPEC_SME_SUMOPA, UNSPEC_SME_SUMOPS, UNSPEC_SME_UMOPA) + (UNSPEC_SME_UMOPS, UNSPEC_SME_USMOPA, UNSPEC_SME_USMOPS) + (UNSPEC_SME_WRITE_HOR, UNSPEC_SME_WRITE_VER): New unspecs. + (elem_bits): Handle x2 and x4 structure modes, plus VNx1TI. + (Vetype, Vesize, VPRED): Handle VNx1TI. + (b): New mode attribute. + (SME_LD1, SME_READ, SME_ST1, SME_WRITE, SME_BINARY_SDI, SME_INT_MOP) + (SME_FP_MOP): New int iterators. + (optab): Handle SME unspecs. + (hv): New int attribute. + * config/aarch64/aarch64.md (*add<mode>3_aarch64): Handle ADDSVL + and ADDSPL. + * config/aarch64/aarch64-sme.md (UNSPEC_SME_LDR): New unspec. + (@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus) + (aarch64_sme_ldr0, @aarch64_sme_ldrn<mode>): New patterns. + (UNSPEC_SME_STR): New unspec. + (@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus) + (aarch64_sme_str0, @aarch64_sme_strn<mode>): New patterns. + (@aarch64_sme_<optab><v_int_container><mode>): Likewise. + (*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise. + (@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise. + (@aarch64_sme_<optab><v_int_container><mode>): Likewise. + (*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise. + (@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise. + (UNSPEC_SME_ZERO): New unspec. + (aarch64_sme_zero): New pattern. + (@aarch64_sme_<SME_BINARY_SDI:optab><mode>): Likewise. + (@aarch64_sme_<SME_INT_MOP:optab><mode>): Likewise. + (@aarch64_sme_<SME_FP_MOP:optab><mode>): Likewise. + * config/aarch64/aarch64-sve-builtins.def: Add ZA type suffixes. + Include aarch64-sve-builtins-sme.def. + (DEF_SME_ZA_FUNCTION): New macro. + * config/aarch64/aarch64-sve-builtins.h (CP_READ_ZA): New call + property. + (CP_WRITE_ZA): Likewise. + (PRED_za_m): New predication type. + (type_suffix_index): Handle DEF_SME_ZA_SUFFIX. + (type_suffix_info): Add vector_p and za_p fields. + (function_instance::num_za_tiles): New member function. + (function_builder::get_attributes): Add an aarch64_feature_flags + argument. + (function_expander::get_contiguous_base): Take a base argument + number, a vnum argument number, and an argument that indicates + whether the vnum parameter is a factor of the SME vector length + or the prevailing vector length. + (function_expander::add_integer_operand): Take a poly_int64. + (sve_switcher::sve_switcher): Take a base set of flags. + (sme_switcher): New class. + (scalar_types): Add a null entry for NUM_VECTOR_TYPES. + * config/aarch64/aarch64-sve-builtins.cc: Include + aarch64-sve-builtins-sme.h. + (pred_suffixes): Add an entry for PRED_za_m. + (type_suffixes): Initialize vector_p and za_p. Handle ZA suffixes. + (TYPES_all_za, TYPES_d_za, TYPES_za_bhsd_data, TYPES_za_all_data) + (TYPES_za_s_integer, TYPES_za_d_integer, TYPES_mop_base) + (TYPES_mop_base_signed, TYPES_mop_base_unsigned, TYPES_mop_i16i64) + (TYPES_mop_i16i64_signed, TYPES_mop_i16i64_unsigned, TYPES_za): New + type suffix macros. + (preds_m, preds_za_m): New predication lists. + (function_groups): Handle DEF_SME_ZA_FUNCTION. + (scalar_types): Add an entry for NUM_VECTOR_TYPES. + (find_type_suffix_for_scalar_type): Check positively for vectors + rather than negatively for predicates. + (check_required_extensions): Handle PSTATE.SM and PSTATE.ZA + requirements. + (report_out_of_range): Handle the case where the minimum and + maximum are the same. + (function_instance::reads_global_state_p): Return true for functions + that read ZA. + (function_instance::modifies_global_state_p): Return true for functions + that write to ZA. + (sve_switcher::sve_switcher): Add a base flags argument. + (function_builder::get_name): Handle "__arm_" prefixes. + (add_attribute): Add an overload that takes a namespaces. + (add_shared_state_attribute): New function. + (function_builder::get_attributes): Take the required feature flags + as argument. Add streaming and ZA attributes where appropriate. + (function_builder::add_unique_function): Update calls accordingly. + (function_resolver::check_gp_argument): Assert that the predication + isn't ZA _m predication. + (function_checker::function_checker): Don't bias the argument + number for ZA _m predication. + (function_expander::get_contiguous_base): Add arguments that + specify the base argument number, the vnum argument number, + and an argument that indicates whether the vnum parameter is + a factor of the SME vector length or the prevailing vector length. + Handle the SME case. + (function_expander::add_input_operand): Handle pmode_register_operand. + (function_expander::add_integer_operand): Take a poly_int64. + (init_builtins): Call handle_arm_sme_h for LTO. + (handle_arm_sve_h): Skip SME intrinsics. + (handle_arm_sme_h): New function. + * config/aarch64/aarch64-sve-builtins-functions.h + (read_write_za, write_za): New classes. + (unspec_based_sme_function, za_arith_function): New using aliases. + (quiet_za_arith_function): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.h + (binary_za_int_m, binary_za_m, binary_za_uint_m, bool_inherent) + (inherent_za, inherent_mask_za, ldr_za, load_za, read_za_m, store_za) + (str_za, unary_za_m, write_za_m): Declare. + * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): + Expect za_m functions to have an existing governing predicate. + (binary_za_m_base, binary_za_int_m_def, binary_za_m_def): New classes. + (binary_za_uint_m_def, bool_inherent_def, inherent_za_def): Likewise. + (inherent_mask_za_def, ldr_za_def, load_za_def, read_za_m_def) + (store_za_def, str_za_def, unary_za_m_def, write_za_m_def): Likewise. + * config/aarch64/arm_sme.h: New file. + * config/aarch64/aarch64-sve-builtins-sme.h: Likewise. + * config/aarch64/aarch64-sve-builtins-sme.cc: Likewise. + * config/aarch64/aarch64-sve-builtins-sme.def: Likewise. + * config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on + aarch64-sve-builtins-sme.def and aarch64-sve-builtins-sme.h. + (aarch64-sve-builtins-sme.o): New rule. + +gcc/testsuite/ + * lib/target-supports.exp: Add sme and sme-i16i64 features. + * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test __ARM_FEATURE_SME* + macros. + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Allow functions + to be marked as __arm_streaming, __arm_streaming_compatible, and + __arm_inout("za"). + * g++.target/aarch64/sve/acle/general-c++/func_redef_4.c: Mark the + function as __arm_streaming_compatible. + * g++.target/aarch64/sve/acle/general-c++/func_redef_5.c: Likewise. + * g++.target/aarch64/sve/acle/general-c++/func_redef_7.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/func_redef_4.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/func_redef_5.c: Likewise. + * g++.target/aarch64/sme/aarch64-sme-acle-asm.exp: New test harness. + * gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c: New test. + * gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c: Likewise. +--- + gcc/config.gcc | 4 +- + gcc/config/aarch64/aarch64-c.cc | 6 + + .../aarch64/aarch64-option-extensions.def | 4 + + gcc/config/aarch64/aarch64-protos.h | 8 +- + gcc/config/aarch64/aarch64-sme.md | 373 +++++++++++++++ + .../aarch64/aarch64-sve-builtins-functions.h | 64 +++ + .../aarch64/aarch64-sve-builtins-shapes.cc | 306 +++++++++++- + .../aarch64/aarch64-sve-builtins-shapes.h | 13 + + .../aarch64/aarch64-sve-builtins-sme.cc | 412 +++++++++++++++++ + .../aarch64/aarch64-sve-builtins-sme.def | 76 +++ + gcc/config/aarch64/aarch64-sve-builtins-sme.h | 57 +++ + gcc/config/aarch64/aarch64-sve-builtins.cc | 336 ++++++++++++-- + gcc/config/aarch64/aarch64-sve-builtins.def | 28 ++ + gcc/config/aarch64/aarch64-sve-builtins.h | 46 +- + gcc/config/aarch64/aarch64.cc | 140 +++++- + gcc/config/aarch64/aarch64.h | 15 + + gcc/config/aarch64/aarch64.md | 13 +- + gcc/config/aarch64/arm_sme.h | 45 ++ + gcc/config/aarch64/constraints.md | 9 + + gcc/config/aarch64/iterators.md | 94 +++- + gcc/config/aarch64/predicates.md | 8 +- + gcc/config/aarch64/t-aarch64 | 17 +- + gcc/doc/invoke.texi | 4 + + .../aarch64/sme/aarch64-sme-acle-asm.exp | 82 ++++ + .../sve/acle/general-c++/func_redef_4.c | 3 +- + .../sve/acle/general-c++/func_redef_5.c | 1 + + .../sve/acle/general-c++/func_redef_7.c | 1 + + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 38 ++ + .../aarch64/sme/aarch64-sme-acle-asm.exp | 81 ++++ + .../aarch64/sme/acle-asm/addha_za32.c | 48 ++ + .../aarch64/sme/acle-asm/addha_za64.c | 50 ++ + .../aarch64/sme/acle-asm/addva_za32.c | 48 ++ + .../aarch64/sme/acle-asm/addva_za64.c | 50 ++ + .../aarch64/sme/acle-asm/arm_has_sme_sc.c | 25 + + .../sme/acle-asm/arm_in_streaming_mode_ns.c | 11 + + .../sme/acle-asm/arm_in_streaming_mode_s.c | 11 + + .../sme/acle-asm/arm_in_streaming_mode_sc.c | 26 ++ + .../gcc.target/aarch64/sme/acle-asm/cntsb_s.c | 310 +++++++++++++ + .../aarch64/sme/acle-asm/cntsb_sc.c | 12 + + .../gcc.target/aarch64/sme/acle-asm/cntsd_s.c | 277 +++++++++++ + .../aarch64/sme/acle-asm/cntsd_sc.c | 13 + + .../gcc.target/aarch64/sme/acle-asm/cntsh_s.c | 279 +++++++++++ + .../aarch64/sme/acle-asm/cntsh_sc.c | 13 + + .../gcc.target/aarch64/sme/acle-asm/cntsw_s.c | 278 +++++++++++ + .../aarch64/sme/acle-asm/cntsw_sc.c | 13 + + .../aarch64/sme/acle-asm/ld1_hor_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/ld1_hor_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/ld1_ver_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/ldr_vnum_za_s.c | 147 ++++++ + .../aarch64/sme/acle-asm/ldr_vnum_za_sc.c | 148 ++++++ + .../aarch64/sme/acle-asm/ldr_za_s.c | 124 +++++ + .../aarch64/sme/acle-asm/ldr_za_sc.c | 71 +++ + .../aarch64/sme/acle-asm/mopa_za32.c | 102 ++++ + .../aarch64/sme/acle-asm/mopa_za64.c | 70 +++ + .../aarch64/sme/acle-asm/mops_za32.c | 102 ++++ + .../aarch64/sme/acle-asm/mops_za64.c | 70 +++ + .../aarch64/sme/acle-asm/read_hor_za128.c | 435 ++++++++++++++++++ + .../aarch64/sme/acle-asm/read_hor_za16.c | 207 +++++++++ + .../aarch64/sme/acle-asm/read_hor_za32.c | 196 ++++++++ + .../aarch64/sme/acle-asm/read_hor_za64.c | 186 ++++++++ + .../aarch64/sme/acle-asm/read_hor_za8.c | 125 +++++ + .../aarch64/sme/acle-asm/read_ver_za128.c | 435 ++++++++++++++++++ + .../aarch64/sme/acle-asm/read_ver_za16.c | 207 +++++++++ + .../aarch64/sme/acle-asm/read_ver_za32.c | 196 ++++++++ + .../aarch64/sme/acle-asm/read_ver_za64.c | 186 ++++++++ + .../aarch64/sme/acle-asm/read_ver_za8.c | 125 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_hor_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/st1_hor_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/st1_hor_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/st1_hor_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/st1_hor_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_ver_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/st1_ver_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/st1_ver_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/st1_ver_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/st1_ver_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/str_vnum_za_s.c | 147 ++++++ + .../aarch64/sme/acle-asm/str_vnum_za_sc.c | 148 ++++++ + .../aarch64/sme/acle-asm/str_za_s.c | 124 +++++ + .../aarch64/sme/acle-asm/str_za_sc.c | 71 +++ + .../aarch64/sme/acle-asm/sumopa_za32.c | 30 ++ + .../aarch64/sme/acle-asm/sumopa_za64.c | 32 ++ + .../aarch64/sme/acle-asm/sumops_za32.c | 30 ++ + .../aarch64/sme/acle-asm/sumops_za64.c | 32 ++ + .../aarch64/sme/acle-asm/test_sme_acle.h | 62 +++ + .../aarch64/sme/acle-asm/undef_za.c | 33 ++ + .../aarch64/sme/acle-asm/usmopa_za32.c | 30 ++ + .../aarch64/sme/acle-asm/usmopa_za64.c | 32 ++ + .../aarch64/sme/acle-asm/usmops_za32.c | 30 ++ + .../aarch64/sme/acle-asm/usmops_za64.c | 32 ++ + .../aarch64/sme/acle-asm/write_hor_za128.c | 193 ++++++++ + .../aarch64/sme/acle-asm/write_hor_za16.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_hor_za32.c | 143 ++++++ + .../aarch64/sme/acle-asm/write_hor_za64.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_hor_za8.c | 93 ++++ + .../aarch64/sme/acle-asm/write_ver_za128.c | 193 ++++++++ + .../aarch64/sme/acle-asm/write_ver_za16.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_ver_za32.c | 143 ++++++ + .../aarch64/sme/acle-asm/write_ver_za64.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_ver_za8.c | 93 ++++ + .../aarch64/sme/acle-asm/zero_mask_za.c | 130 ++++++ + .../gcc.target/aarch64/sme/acle-asm/zero_za.c | 11 + + .../aarch64/sve/acle/asm/test_sve_acle.h | 14 +- + .../sve/acle/general-c/binary_za_int_m_1.c | 50 ++ + .../sve/acle/general-c/binary_za_m_1.c | 49 ++ + .../sve/acle/general-c/binary_za_m_2.c | 11 + + .../sve/acle/general-c/binary_za_uint_m_1.c | 50 ++ + .../aarch64/sve/acle/general-c/func_redef_4.c | 3 +- + .../aarch64/sve/acle/general-c/func_redef_5.c | 1 + + .../aarch64/sve/acle/general-c/read_za_m_1.c | 48 ++ + .../aarch64/sve/acle/general-c/unary_za_m_1.c | 49 ++ + .../aarch64/sve/acle/general-c/write_za_m_1.c | 48 ++ + gcc/testsuite/lib/target-supports.exp | 3 +- + 140 files changed, 13816 insertions(+), 78 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.cc + create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.def + create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.h + create mode 100644 gcc/config/aarch64/arm_sme.h + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index da66603cd..19b21a280 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -325,11 +325,11 @@ m32c*-*-*) + ;; + aarch64*-*-*) + cpu_type=aarch64 +- extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h" ++ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h" + c_target_objs="aarch64-c.o" + cxx_target_objs="aarch64-c.o" + d_target_objs="aarch64-d.o" +- extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o" ++ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o" + target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" + target_has_targetm_common=yes + ;; +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 76c20848f..cb8a6c2fc 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -250,6 +250,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + "__ARM_FEATURE_LS64", pfile); + aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + ++ aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); ++ aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); ++ aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); ++ + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A + 16-bit floating-point extensions. */ +@@ -347,6 +351,8 @@ aarch64_pragma_aarch64 (cpp_reader *) + const char *name = TREE_STRING_POINTER (x); + if (strcmp (name, "arm_sve.h") == 0) + aarch64_sve::handle_arm_sve_h (); ++ else if (strcmp (name, "arm_sme.h") == 0) ++ aarch64_sve::handle_arm_sme_h (); + else if (strcmp (name, "arm_neon.h") == 0) + handle_arm_neon_h (); + else if (strcmp (name, "arm_acle.h") == 0) +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index faee64a79..98854dbce 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -151,4 +151,8 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") + + AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") + ++AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "") ++ ++AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "") ++ + #undef AARCH64_OPT_EXTENSION +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 0883ddd1a..81900fa83 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -809,7 +809,11 @@ bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); + void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); + bool aarch64_rdsvl_immediate_p (const_rtx); ++rtx aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT, ++ aarch64_feature_flags); + char *aarch64_output_rdsvl (const_rtx); ++bool aarch64_addsvl_addspl_immediate_p (const_rtx); ++char *aarch64_output_addsvl_addspl (rtx); + bool aarch64_mov_operand_p (rtx, machine_mode); + rtx aarch64_reverse_mask (machine_mode, unsigned int); + bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); +@@ -853,6 +857,7 @@ int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &); + bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); + bool aarch64_use_return_insn_p (void); + const char *aarch64_output_casesi (rtx *); ++const char *aarch64_output_sme_zero_za (rtx); + + arm_pcs aarch64_tlsdesc_abi_id (); + enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); +@@ -867,7 +872,6 @@ int aarch64_uxt_size (int, HOST_WIDE_INT); + int aarch64_vec_fpconst_pow_of_2 (rtx); + rtx aarch64_eh_return_handler_rtx (void); + rtx aarch64_mask_from_zextract_ops (rtx, rtx); +-const char *aarch64_output_move_struct (rtx *operands); + rtx aarch64_return_addr_rtx (void); + rtx aarch64_return_addr (int, rtx); + rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); +@@ -881,6 +885,7 @@ bool aarch64_sve_ldnf1_operand_p (rtx); + bool aarch64_sve_ldr_operand_p (rtx); + bool aarch64_sve_prefetch_operand_p (rtx, machine_mode); + bool aarch64_sve_struct_memory_operand_p (rtx); ++bool aarch64_sme_ldr_vnum_offset_p (rtx, rtx); + rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); + rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); + bool aarch64_stepped_int_parallel_p (rtx, int); +@@ -1000,6 +1005,7 @@ void handle_arm_neon_h (void); + namespace aarch64_sve { + void init_builtins (); + void handle_arm_sve_h (); ++ void handle_arm_sme_h (); + tree builtin_decl (unsigned, bool); + bool builtin_type_p (const_tree); + bool builtin_type_p (const_tree, unsigned int *, unsigned int *); +diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md +index d4973098e..da0745f65 100644 +--- a/gcc/config/aarch64/aarch64-sme.md ++++ b/gcc/config/aarch64/aarch64-sme.md +@@ -24,6 +24,19 @@ + ;; ---- Test current state + ;; ---- PSTATE.SM management + ;; ---- PSTATE.ZA management ++;; ++;; == Loads, stores and moves ++;; ---- Single-vector loads ++;; ---- Single-vector stores ++;; ---- Single-vector moves ++;; ---- Zeroing ++;; ++;; == Binary arithmetic ++;; ---- Binary arithmetic on ZA tile ++;; ++;; == Ternary arithmetic ++;; ---- [INT] Sum of outer products ++;; ---- [FP] Sum of outer products + + ;; ========================================================================= + ;; == State management +@@ -456,3 +469,363 @@ + DONE; + } + ) ++ ++;; ========================================================================= ++;; == Loads, stores and moves ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Single-vector loads ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - LD1 ++;; - LDR ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" [ ++ UNSPEC_SME_LDR ++]) ++ ++(define_insn "@aarch64_sme_<optab><mode>" ++ [(set (reg:SME_ZA_I ZA_REGNUM) ++ (unspec:SME_ZA_I ++ [(reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")] ++ SME_LD1))] ++ "TARGET_STREAMING_SME" ++ "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3" ++) ++ ++(define_insn "@aarch64_sme_<optab><mode>_plus" ++ [(set (reg:SME_ZA_I ZA_REGNUM) ++ (unspec:SME_ZA_I ++ [(reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (plus:SI (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:SI 2 "const_int_operand")) ++ (match_operand:<VPRED> 3 "register_operand" "Upl") ++ (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")] ++ SME_LD1))] ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands[2]) < 128 / <elem_bits>" ++ "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4" ++) ++ ++(define_insn "aarch64_sme_ldr0" ++ [(set (reg:VNx16QI ZA_REGNUM) ++ (unspec:VNx16QI ++ [(reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SI 0 "register_operand" "Ucj") ++ (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))] ++ UNSPEC_SME_LDR))] ++ "TARGET_SME" ++ "ldr\tza[%w0, 0], [%1, #0, mul vl]" ++) ++ ++(define_insn "@aarch64_sme_ldrn<mode>" ++ [(set (reg:VNx16QI ZA_REGNUM) ++ (unspec:VNx16QI ++ [(reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (plus:SI (match_operand:SI 0 "register_operand" "Ucj") ++ (match_operand:SI 1 "const_int_operand")) ++ (mem:VNx16QI ++ (plus:P (match_operand:P 2 "register_operand" "rk") ++ (match_operand:P 3 "aarch64_mov_operand")))] ++ UNSPEC_SME_LDR))] ++ "TARGET_SME ++ && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])" ++ "ldr\tza[%w0, %1], [%2, #%1, mul vl]" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Single-vector stores ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ST1 ++;; - STR ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" [ ++ UNSPEC_SME_STR ++]) ++ ++(define_insn "@aarch64_sme_<optab><mode>" ++ [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") ++ (unspec:SME_ZA_I ++ [(reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_dup 0) ++ (match_operand:DI 1 "const_int_operand") ++ (match_operand:SI 2 "register_operand" "Ucj") ++ (match_operand:<VPRED> 3 "register_operand" "Upl")] ++ SME_ST1))] ++ "TARGET_STREAMING_SME" ++ "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3, %0" ++) ++ ++(define_insn "@aarch64_sme_<optab><mode>_plus" ++ [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") ++ (unspec:SME_ZA_I ++ [(reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_dup 0) ++ (match_operand:DI 1 "const_int_operand") ++ (plus:SI (match_operand:SI 2 "register_operand" "Ucj") ++ (match_operand:SI 3 "const_int_operand")) ++ (match_operand:<VPRED> 4 "register_operand" "Upl")] ++ SME_ST1))] ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands[3]) < 128 / <elem_bits>" ++ "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4, %0" ++) ++ ++(define_insn "aarch64_sme_str0" ++ [(set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk")) ++ (unspec:VNx16QI ++ [(reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (mem:VNx16QI (match_dup 1)) ++ (match_operand:SI 0 "register_operand" "Ucj")] ++ UNSPEC_SME_STR))] ++ "TARGET_SME" ++ "str\tza[%w0, 0], [%1, #0, mul vl]" ++) ++ ++(define_insn "@aarch64_sme_strn<mode>" ++ [(set (mem:VNx16QI ++ (plus:P (match_operand:P 2 "register_operand" "rk") ++ (match_operand:P 3 "aarch64_mov_operand"))) ++ (unspec:VNx16QI ++ [(reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3))) ++ (plus:SI (match_operand:SI 0 "register_operand" "Ucj") ++ (match_operand:SI 1 "const_int_operand"))] ++ UNSPEC_SME_STR))] ++ "TARGET_SME ++ && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])" ++ "str\tza[%w0, %1], [%2, #%1, mul vl]" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Single-vector moves ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - MOVA ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><v_int_container><mode>" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SVE_FULL 1 "register_operand" "0") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:SI 4 "register_operand" "Ucj")] ++ SME_READ))] ++ "TARGET_STREAMING_SME" ++ "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]" ++) ++ ++(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SVE_FULL 1 "register_operand" "0") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:DI 3 "const_int_operand") ++ (plus:SI (match_operand:SI 4 "register_operand" "Ucj") ++ (match_operand:SI 5 "const_int_operand"))] ++ SME_READ))] ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands[5]) < 128 / <elem_bits>" ++ "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]" ++) ++ ++(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(reg:VNx1TI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SVE_FULL 1 "register_operand" "0") ++ (match_operand:VNx2BI 2 "register_operand" "Upl") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:SI 4 "register_operand" "Ucj")] ++ SME_READ))] ++ "TARGET_STREAMING_SME" ++ "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]" ++) ++ ++(define_insn "@aarch64_sme_<optab><v_int_container><mode>" ++ [(set (reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (unspec:<V_INT_CONTAINER> ++ [(reg:SVE_FULL ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 3 "register_operand" "w")] ++ SME_WRITE))] ++ "TARGET_STREAMING_SME" ++ "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>" ++) ++ ++(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus" ++ [(set (reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (unspec:<V_INT_CONTAINER> ++ [(reg:SVE_FULL ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (plus:SI (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:SI 2 "const_int_operand")) ++ (match_operand:<VPRED> 3 "register_operand" "Upl") ++ (match_operand:SVE_FULL 4 "register_operand" "w")] ++ SME_WRITE))] ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands[2]) < 128 / <elem_bits>" ++ "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>" ++) ++ ++(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>" ++ [(set (reg:VNx1TI_ONLY ZA_REGNUM) ++ (unspec:VNx1TI_ONLY ++ [(reg:VNx1TI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:VNx2BI 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 3 "register_operand" "w")] ++ SME_WRITE))] ++ "TARGET_STREAMING_SME" ++ "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Zeroing ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ZERO ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" [UNSPEC_SME_ZERO]) ++ ++(define_insn "aarch64_sme_zero_za" ++ [(set (reg:VNx16QI ZA_REGNUM) ++ (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand")] ++ UNSPEC_SME_ZERO))] ++ "TARGET_SME" ++ { ++ return aarch64_output_sme_zero_za (operands[0]); ++ } ++) ++ ++;; ========================================================================= ++;; == Binary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Binary arithmetic on ZA tile ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ADDHA ++;; - ADDVA ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><mode>" ++ [(set (reg:SME_ZA_SDI ZA_REGNUM) ++ (unspec:SME_ZA_SDI ++ [(reg:SME_ZA_SDI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<VPRED> 1 "register_operand" "Upl") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:SME_ZA_SDI 3 "register_operand" "w")] ++ SME_BINARY_SDI))] ++ "TARGET_STREAMING_SME" ++ "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>" ++) ++ ++;; ========================================================================= ++;; == Ternary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Sum of outer products ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMOPA ++;; - SMOPS ++;; - SUMOPA ++;; - SUMOPS ++;; - UMOPA ++;; - UMOPS ++;; - USMOPA ++;; - USMOPS ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>" ++ [(set (reg:VNx4SI_ONLY ZA_REGNUM) ++ (unspec:VNx4SI_ONLY ++ [(reg:VNx4SI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") ++ (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") ++ (match_operand:VNx16QI_ONLY 3 "register_operand" "w") ++ (match_operand:VNx16QI_ONLY 4 "register_operand" "w")] ++ SME_INT_MOP))] ++ "TARGET_STREAMING_SME" ++ "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b" ++) ++ ++(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>" ++ [(set (reg:VNx2DI_ONLY ZA_REGNUM) ++ (unspec:VNx2DI_ONLY ++ [(reg:VNx2DI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<VNx2DI_ONLY:VPRED> 1 "register_operand" "Upl") ++ (match_operand:<VNx2DI_ONLY:VPRED> 2 "register_operand" "Upl") ++ (match_operand:VNx8HI_ONLY 3 "register_operand" "w") ++ (match_operand:VNx8HI_ONLY 4 "register_operand" "w")] ++ SME_INT_MOP))] ++ "TARGET_STREAMING_SME && TARGET_SME_I16I64" ++ "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Sum of outer products ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BFMOPA ++;; - BFMOPS ++;; - FMOPA ++;; - FMOPS ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>" ++ [(set (reg:SME_ZA_SDF_I ZA_REGNUM) ++ (unspec:SME_ZA_SDF_I ++ [(reg:SME_ZA_SDF_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl") ++ (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl") ++ (match_operand:SME_MOP_HSDF 3 "register_operand" "w") ++ (match_operand:SME_MOP_HSDF 4 "register_operand" "w")] ++ SME_FP_MOP))] ++ "TARGET_STREAMING_SME ++ && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)" ++ "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>" ++) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +index f5fa4030c..9dfce5c0e 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -50,6 +50,27 @@ public: + } + }; + ++/* Wrap T, which is derived from function_base, and indicate that it ++ additionally has the call properties in PROPERTIES. */ ++template<typename T, unsigned int PROPERTIES> ++class add_call_properties : public T ++{ ++public: ++ using T::T; ++ ++ unsigned int ++ call_properties (const function_instance &fi) const override ++ { ++ return T::call_properties (fi) | PROPERTIES; ++ } ++}; ++ ++template<typename T> ++using read_write_za = add_call_properties<T, CP_READ_ZA | CP_WRITE_ZA>; ++ ++template<typename T> ++using write_za = add_call_properties<T, CP_WRITE_ZA>; ++ + /* A function_base that sometimes or always operates on tuples of + vectors. */ + class multi_vector_function : public function_base +@@ -383,6 +404,49 @@ typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub> + typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane> + unspec_based_sub_lane_function; + ++/* General SME unspec-based functions, parameterized on the vector mode. */ ++class sme_1mode_function : public read_write_za<unspec_based_function_base> ++{ ++public: ++ using parent = read_write_za<unspec_based_function_base>; ++ ++ CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp) ++ : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) ++ {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ auto icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* General SME unspec-based functions, parameterized on both the ZA mode ++ and the vector mode. */ ++template<insn_code (*CODE) (int, machine_mode, machine_mode)> ++class sme_2mode_function_t : public read_write_za<unspec_based_function_base> ++{ ++public: ++ using parent = read_write_za<unspec_based_function_base>; ++ ++ CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp) ++ : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) ++ {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ insn_code icode = CODE (unspec_for (e), e.vector_mode (0), ++ e.tuple_mode (1)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++using sme_2mode_function = sme_2mode_function_t<code_for_aarch64_sme>; ++ + /* A function that acts like unspec_based_function_exact_insn<INT_CODE> + when operating on integers, but that expands to an (fma ...)-style + aarch64_sve* operation when applied to floats. */ +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index c536949ba..bdde849c8 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -59,7 +59,10 @@ static void + apply_predication (const function_instance &instance, tree return_type, + vec<tree> &argument_types) + { +- if (instance.pred != PRED_none) ++ /* There are currently no SME ZA instructions that have both merging and ++ unpredicated forms, so for simplicity, the predicates are always included ++ in the original format string. */ ++ if (instance.pred != PRED_none && instance.pred != PRED_za_m) + { + argument_types.quick_insert (0, get_svbool_t ()); + /* For unary merge operations, the first argument is a vector with +@@ -589,6 +592,33 @@ struct binary_imm_long_base : public overloaded_base<0> + } + }; + ++/* Base class for binary_za_m and similar shapes. */ ++template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS, ++ unsigned int BITS = function_resolver::SAME_SIZE> ++struct binary_za_m_base : public overloaded_base<1> ++{ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ type_suffix_index type; ++ if (!r.check_num_arguments (5) ++ || !r.require_integer_immediate (0) ++ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) ++ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (4, 3, type, TCLASS, BITS)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++ + /* Base class for inc_dec and inc_dec_pat. */ + struct inc_dec_base : public overloaded_base<0> + { +@@ -1576,6 +1606,68 @@ struct binary_wide_opt_n_def : public overloaded_base<0> + }; + SHAPE (binary_wide_opt_n) + ++/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, ++ sv<t1:int>x<g>_t) ++ ++ where the first argument is a ZA tile. */ ++struct binary_za_int_m_def : public binary_za_m_base<TYPE_signed> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,vp,vp,t1,ts1", group, MODE_none); ++ } ++}; ++SHAPE (binary_za_int_m) ++ ++/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, ++ sv<t1>x<g>_t) ++ ++ where the first argument is a ZA tile. */ ++struct binary_za_m_def : public binary_za_m_base<> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ /* Allow the overloaded form to be specified seperately, with just ++ a single suffix. This is necessary for the 64-bit SME MOP intrinsics, ++ which have some forms dependent on FEAT_SME_I16I64 and some forms ++ dependent on FEAT_SME_F64F64. The resolver needs to be defined ++ for base SME. */ ++ if (group.types[0][1] != NUM_TYPE_SUFFIXES) ++ build_all (b, "_,su64,vp,vp,t1,t1", group, MODE_none); ++ } ++}; ++SHAPE (binary_za_m) ++ ++/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, ++ sv<t1:uint>x<g>_t) ++ ++ where the first argument is a ZA tile. */ ++struct binary_za_uint_m_def : public binary_za_m_base<TYPE_unsigned> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,vp,vp,t1,tu1", group, MODE_none); ++ } ++}; ++SHAPE (binary_za_uint_m) ++ ++/* bool svfoo(). */ ++struct bool_inherent_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "sp", group, MODE_none); ++ } ++}; ++SHAPE (bool_inherent) ++ + /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t) + <t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t). */ + struct clast_def : public overloaded_base<0> +@@ -2055,6 +2147,51 @@ struct inherent_b_def : public overloaded_base<0> + }; + SHAPE (inherent_b) + ++/* void svfoo_t0(). */ ++struct inherent_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_", group, MODE_none); ++ } ++}; ++SHAPE (inherent_za) ++ ++/* void svfoo_t0(uint64_t) ++ ++ where the argument is an integer constant that specifies an 8-bit mask. */ ++struct inherent_mask_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su64", group, MODE_none); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, 255); ++ } ++}; ++SHAPE (inherent_mask_za) ++ ++/* void svfoo_t0(uint32_t, const void *) ++ void svfoo_vnum_t0(uint32_t, const void *, int64_t) ++ ++ where the first argument is a variable ZA slice. */ ++struct ldr_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su32,al", group, MODE_none); ++ build_all (b, "_,su32,al,ss64", group, MODE_vnum); ++ } ++}; ++SHAPE (ldr_za) ++ + /* sv<t0>[xN]_t svfoo[_t0](const <t0>_t *) + sv<t0>[xN]_t svfoo_vnum[_t0](const <t0>_t *, int64_t). */ + struct load_def : public load_contiguous_base +@@ -2265,6 +2402,27 @@ struct load_replicate_def : public load_contiguous_base + }; + SHAPE (load_replicate) + ++/* void svfoo_t0(uint64_t, uint32_t, svbool_t, const void *) ++ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, const void *, int64_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct load_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su64,su32,vp,al", group, MODE_none); ++ build_all (b, "_,su64,su32,vp,al,ss64", group, MODE_vnum); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (load_za) ++ + /* svbool_t svfoo(enum svpattern). */ + struct pattern_pred_def : public nonoverloaded_base + { +@@ -2359,6 +2517,48 @@ struct rdffr_def : public nonoverloaded_base + }; + SHAPE (rdffr) + ++/* sv<t1>_t svfoo_t0[_t1](uint64_t, uint32_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct read_za_m_def : public overloaded_base<1> ++{ ++ bool ++ has_merge_argument_p (const function_instance &, unsigned int) const override ++ { ++ return true; ++ } ++ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t1,su64,su32", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ gcc_assert (r.pred == PRED_m); ++ type_suffix_index type; ++ if (!r.check_num_arguments (4) ++ || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES ++ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !r.require_integer_immediate (2) ++ || !r.require_scalar_type (3, "uint32_t")) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ gcc_assert (c.pred == PRED_m); ++ return c.require_immediate_range (1, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (read_za_m) ++ + /* <t0>_t svfoo[_t0](sv<t0>_t). */ + struct reduction_def : public overloaded_base<0> + { +@@ -2727,6 +2927,42 @@ struct store_scatter_offset_restricted_def : public store_scatter_base + }; + SHAPE (store_scatter_offset_restricted) + ++/* void svfoo_t0(uint64_t, uint32_t, svbool_t, void *) ++ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, void *, int64_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct store_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su64,su32,vp,as", group, MODE_none); ++ build_all (b, "_,su64,su32,vp,as,ss64", group, MODE_vnum); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (store_za) ++ ++/* void svfoo_t0(uint32_t, void *) ++ void svfoo_vnum_t0(uint32_t, void *, int64_t) ++ ++ where the first argument is a variable ZA slice. */ ++struct str_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su32,as", group, MODE_none); ++ build_all (b, "_,su32,as,ss64", group, MODE_vnum); ++ } ++}; ++SHAPE (str_za) ++ + /* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t). */ + struct tbl_tuple_def : public overloaded_base<0> + { +@@ -3487,4 +3723,72 @@ struct unary_widen_def : public overloaded_base<0> + }; + SHAPE (unary_widen) + ++/* void svfoo_t0[_t1](uint64_t, svbool_t, svbool_t, sv<t1>_t) ++ ++ where the first argument is a ZA tile. */ ++struct unary_za_m_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,vp,vp,t1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ type_suffix_index type; ++ if (!r.check_num_arguments (4) ++ || !r.require_integer_immediate (0) ++ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) ++ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (unary_za_m) ++ ++/* void svfoo_t0[_t1](uint64_t, uint32_t, svbool_t, sv<t1>_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct write_za_m_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,su32,vp,t1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ type_suffix_index type; ++ if (!r.check_num_arguments (4) ++ || !r.require_integer_immediate (0) ++ || !r.require_scalar_type (1, "uint32_t") ++ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) ++ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (write_za_m) ++ + } +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +index 2b06152d4..9c1f44bdc 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +@@ -93,6 +93,10 @@ namespace aarch64_sve + extern const function_shape *const binary_uint64_opt_n; + extern const function_shape *const binary_wide; + extern const function_shape *const binary_wide_opt_n; ++ extern const function_shape *const binary_za_int_m; ++ extern const function_shape *const binary_za_m; ++ extern const function_shape *const binary_za_uint_m; ++ extern const function_shape *const bool_inherent; + extern const function_shape *const clast; + extern const function_shape *const compare; + extern const function_shape *const compare_opt_n; +@@ -114,6 +118,9 @@ namespace aarch64_sve + extern const function_shape *const inc_dec_pred_scalar; + extern const function_shape *const inherent; + extern const function_shape *const inherent_b; ++ extern const function_shape *const inherent_za; ++ extern const function_shape *const inherent_mask_za; ++ extern const function_shape *const ldr_za; + extern const function_shape *const load; + extern const function_shape *const load_ext; + extern const function_shape *const load_ext_gather_index; +@@ -124,6 +131,7 @@ namespace aarch64_sve + extern const function_shape *const load_gather_sv_restricted; + extern const function_shape *const load_gather_vs; + extern const function_shape *const load_replicate; ++ extern const function_shape *const load_za; + extern const function_shape *const mmla; + extern const function_shape *const pattern_pred; + extern const function_shape *const prefetch; +@@ -131,6 +139,7 @@ namespace aarch64_sve + extern const function_shape *const prefetch_gather_offset; + extern const function_shape *const ptest; + extern const function_shape *const rdffr; ++ extern const function_shape *const read_za_m; + extern const function_shape *const reduction; + extern const function_shape *const reduction_wide; + extern const function_shape *const reinterpret; +@@ -148,6 +157,8 @@ namespace aarch64_sve + extern const function_shape *const store_scatter_index_restricted; + extern const function_shape *const store_scatter_offset; + extern const function_shape *const store_scatter_offset_restricted; ++ extern const function_shape *const store_za; ++ extern const function_shape *const str_za; + extern const function_shape *const tbl_tuple; + extern const function_shape *const ternary_bfloat; + extern const function_shape *const ternary_bfloat_lane; +@@ -186,6 +197,8 @@ namespace aarch64_sve + extern const function_shape *const unary_to_uint; + extern const function_shape *const unary_uint; + extern const function_shape *const unary_widen; ++ extern const function_shape *const unary_za_m; ++ extern const function_shape *const write_za_m; + } + } + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +new file mode 100644 +index 000000000..e1df6ce0d +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +@@ -0,0 +1,412 @@ ++/* ACLE support for AArch64 SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "rtl.h" ++#include "tm_p.h" ++#include "memmodel.h" ++#include "insn-codes.h" ++#include "optabs.h" ++#include "recog.h" ++#include "expr.h" ++#include "basic-block.h" ++#include "function.h" ++#include "fold-const.h" ++#include "gimple.h" ++#include "gimple-iterator.h" ++#include "gimplify.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "aarch64-sve-builtins.h" ++#include "aarch64-sve-builtins-shapes.h" ++#include "aarch64-sve-builtins-base.h" ++#include "aarch64-sve-builtins-sme.h" ++#include "aarch64-sve-builtins-functions.h" ++ ++using namespace aarch64_sve; ++ ++namespace { ++ ++class load_store_za_base : public function_base ++{ ++public: ++ tree ++ memory_scalar_type (const function_instance &) const override ++ { ++ return void_type_node; ++ } ++}; ++ ++class read_write_za_base : public function_base ++{ ++public: ++ constexpr read_write_za_base (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ auto za_mode = e.vector_mode (0); ++ auto z_mode = e.vector_mode (1); ++ auto icode = (za_mode == VNx1TImode ++ ? code_for_aarch64_sme (m_unspec, za_mode, z_mode) ++ : code_for_aarch64_sme (m_unspec, z_mode, z_mode)); ++ return e.use_exact_insn (icode); ++ } ++ ++ int m_unspec; ++}; ++ ++using load_za_base = add_call_properties<load_store_za_base, ++ CP_READ_MEMORY | CP_READ_ZA ++ | CP_WRITE_ZA>; ++ ++using store_za_base = add_call_properties<load_store_za_base, ++ CP_WRITE_MEMORY | CP_READ_ZA>; ++ ++/* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE. ++ The intrinsic has a vnum parameter at index ARGNO. Return true if the ++ vnum argument is a constant that is a valid ZA offset for the underlying ++ instruction. */ ++ ++static bool ++has_in_range_vnum_arg (function_expander &e, machine_mode mem_mode, ++ unsigned int argno) ++{ ++ return (e.mode_suffix_id == MODE_vnum ++ && CONST_INT_P (e.args[argno]) ++ && UINTVAL (e.args[argno]) < 16 / GET_MODE_UNIT_SIZE (mem_mode)); ++} ++ ++/* E is a ZA load or store intrinsic that uses instruction ICODE. Add a ++ 32-bit operand that gives the total ZA slice. (The instruction hard-codes ++ the constant offset to 0, so there is no operand for that.) ++ ++ Argument ARGNO is the intrinsic's slice argument. If the intrinsic is ++ a _vnum intrinsic, argument VNUM_ARGNO is the intrinsic's vnum operand, ++ which must be added to the slice argument. */ ++ ++static void ++add_load_store_slice_operand (function_expander &e, insn_code icode, ++ unsigned int argno, unsigned int vnum_argno) ++{ ++ rtx base = e.args[argno]; ++ if (e.mode_suffix_id == MODE_vnum) ++ { ++ rtx vnum = lowpart_subreg (SImode, e.args[vnum_argno], DImode); ++ base = simplify_gen_binary (PLUS, SImode, base, vnum); ++ } ++ e.add_input_operand (icode, base); ++} ++ ++/* Add a memory operand for ZA LD1 or ST1 intrinsic E. BASE_ARGNO is ++ the index of the base argument. */ ++ ++static void ++add_load_store_operand (function_expander &e, unsigned int base_argno) ++{ ++ auto mode = e.vector_mode (0); ++ rtx base = e.get_contiguous_base (mode, base_argno, base_argno + 1, ++ AARCH64_FL_SM_ON); ++ auto mem = gen_rtx_MEM (mode, force_reg (Pmode, base)); ++ set_mem_align (mem, BITS_PER_UNIT); ++ e.add_fixed_operand (mem); ++} ++ ++/* Expand ZA LDR or STR intrinsic E. There are two underlying instructions: ++ ++ - BASE_CODE has a zero ZA slice offset ++ - VNUM_CODE has a constant operand for the ZA slice offset. */ ++ ++static rtx ++expand_ldr_str_za (function_expander &e, insn_code base_code, ++ insn_code vnum_code) ++{ ++ if (has_in_range_vnum_arg (e, VNx16QImode, 2)) ++ { ++ rtx mem_offset = aarch64_sme_vq_immediate (Pmode, ++ UINTVAL (e.args[2]) * 16, ++ AARCH64_ISA_MODE); ++ e.add_input_operand (vnum_code, e.args[0]); ++ e.add_input_operand (vnum_code, e.args[2]); ++ e.add_input_operand (vnum_code, e.args[1]); ++ e.add_input_operand (vnum_code, mem_offset); ++ return e.generate_insn (vnum_code); ++ } ++ else ++ { ++ rtx base = e.get_contiguous_base (VNx16QImode, 1, 2, AARCH64_FL_SM_ON); ++ add_load_store_slice_operand (e, base_code, 0, 2); ++ e.add_input_operand (base_code, base); ++ return e.generate_insn (base_code); ++ } ++} ++ ++/* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec. ++ IS_LOAD is true if E is a load, false if it is a store. */ ++ ++static rtx ++expand_ld1_st1 (function_expander &e, int unspec, bool is_load) ++{ ++ bool is_vnum = has_in_range_vnum_arg (e, e.vector_mode (0), 4); ++ auto icode = (is_vnum ++ ? code_for_aarch64_sme_plus (unspec, e.vector_mode (0)) ++ : code_for_aarch64_sme (unspec, e.vector_mode (0))); ++ if (!is_load) ++ add_load_store_operand (e, 3); ++ e.add_input_operand (icode, e.args[0]); ++ if (is_vnum) ++ { ++ e.add_input_operand (icode, e.args[1]); ++ e.add_input_operand (icode, e.args[4]); ++ } ++ else ++ add_load_store_slice_operand (e, icode, 1, 4); ++ e.add_input_operand (icode, e.args[2]); ++ if (is_load) ++ add_load_store_operand (e, 3); ++ return e.generate_insn (icode); ++} ++ ++class arm_has_sme_impl : public function_base ++{ ++ gimple * ++ fold (gimple_folder &f) const override ++ { ++ if (TARGET_SME) ++ return f.fold_to_cstu (1); ++ return nullptr; ++ } ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ if (TARGET_SME) ++ return const1_rtx; ++ emit_insn (gen_aarch64_get_sme_state ()); ++ return expand_simple_binop (DImode, LSHIFTRT, ++ gen_rtx_REG (DImode, R0_REGNUM), ++ gen_int_mode (63, QImode), ++ e.possible_target, true, OPTAB_LIB_WIDEN); ++ } ++}; ++ ++class arm_in_streaming_mode_impl : public function_base ++{ ++ gimple * ++ fold (gimple_folder &f) const override ++ { ++ if (TARGET_STREAMING) ++ return f.fold_to_cstu (1); ++ if (TARGET_NON_STREAMING) ++ return f.fold_to_cstu (0); ++ return nullptr; ++ } ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ if (TARGET_STREAMING) ++ return const1_rtx; ++ ++ if (TARGET_NON_STREAMING) ++ return const0_rtx; ++ ++ rtx reg; ++ if (TARGET_SME) ++ { ++ reg = gen_reg_rtx (DImode); ++ emit_insn (gen_aarch64_read_svcr (reg)); ++ } ++ else ++ { ++ emit_insn (gen_aarch64_get_sme_state ()); ++ reg = gen_rtx_REG (DImode, R0_REGNUM); ++ } ++ return expand_simple_binop (DImode, AND, reg, gen_int_mode (1, DImode), ++ e.possible_target, true, OPTAB_LIB_WIDEN); ++ } ++}; ++ ++/* Implements svcnts[bhwd]. */ ++class svcnts_bhwd_impl : public function_base ++{ ++public: ++ constexpr svcnts_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} ++ ++ unsigned int ++ get_shift () const ++ { ++ return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode)); ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const override ++ { ++ if (TARGET_STREAMING) ++ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); ++ return nullptr; ++ } ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ rtx cntsb = aarch64_sme_vq_immediate (DImode, 16, AARCH64_ISA_MODE); ++ auto shift = get_shift (); ++ if (!shift) ++ return cntsb; ++ ++ return expand_simple_binop (DImode, LSHIFTRT, cntsb, ++ gen_int_mode (shift, QImode), ++ e.possible_target, true, OPTAB_LIB_WIDEN); ++ } ++ ++ /* The mode of the vector associated with the [bhwd] suffix. */ ++ machine_mode m_ref_mode; ++}; ++ ++class svld1_za_impl : public load_za_base ++{ ++public: ++ constexpr svld1_za_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ld1_st1 (e, m_unspec, true); ++ } ++ ++ int m_unspec; ++}; ++ ++class svldr_za_impl : public load_za_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_ldr0, ++ code_for_aarch64_sme_ldrn (Pmode)); ++ } ++}; ++ ++using svread_za_tile_impl = add_call_properties<read_write_za_base, ++ CP_READ_ZA>; ++ ++class svst1_za_impl : public store_za_base ++{ ++public: ++ constexpr svst1_za_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ld1_st1 (e, m_unspec, false); ++ } ++ ++ int m_unspec; ++}; ++ ++class svstr_za_impl : public store_za_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_str0, ++ code_for_aarch64_sme_strn (Pmode)); ++ } ++}; ++ ++class svundef_za_impl : public write_za<function_base> ++{ ++public: ++ rtx ++ expand (function_expander &) const override ++ { ++ rtx target = gen_rtx_REG (VNx16QImode, ZA_REGNUM); ++ emit_clobber (copy_rtx (target)); ++ return const0_rtx; ++ } ++}; ++ ++using svwrite_za_tile_impl = add_call_properties<read_write_za_base, ++ CP_READ_ZA | CP_WRITE_ZA>; ++ ++class svzero_mask_za_impl : public write_za<function_base> ++{ ++public: ++ rtx ++ expand (function_expander &e) const override ++ { ++ return e.use_exact_insn (CODE_FOR_aarch64_sme_zero_za); ++ } ++}; ++ ++class svzero_za_impl : public write_za<function_base> ++{ ++public: ++ rtx ++ expand (function_expander &) const override ++ { ++ emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode))); ++ return const0_rtx; ++ } ++}; ++ ++} /* end anonymous namespace */ ++ ++namespace aarch64_sve { ++ ++FUNCTION (arm_has_sme, arm_has_sme_impl, ) ++FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, ) ++FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA, ++ UNSPEC_SME_ADDHA, -1)) ++FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA, ++ UNSPEC_SME_ADDVA, -1)) ++FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode)) ++FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode)) ++FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode)) ++FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode)) ++FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR)) ++FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER)) ++FUNCTION (svldr_za, svldr_za_impl, ) ++FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA, ++ UNSPEC_SME_FMOPA)) ++FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS, ++ UNSPEC_SME_FMOPS)) ++FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR)) ++FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER)) ++FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR)) ++FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER)) ++FUNCTION (svstr_za, svstr_za_impl, ) ++FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1)) ++FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1)) ++FUNCTION (svundef_za, svundef_za_impl, ) ++FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1)) ++FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1)) ++FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR)) ++FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER)) ++FUNCTION (svzero_mask_za, svzero_mask_za_impl, ) ++FUNCTION (svzero_za, svzero_za_impl, ) ++ ++} /* end namespace aarch64_sve */ +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def +new file mode 100644 +index 000000000..5bdcc93f4 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def +@@ -0,0 +1,76 @@ ++/* ACLE support for AArch64 SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#define REQUIRED_EXTENSIONS 0 ++DEF_SVE_FUNCTION (arm_has_sme, bool_inherent, none, none) ++DEF_SVE_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_SME ++DEF_SVE_FUNCTION (svcntsb, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntsd, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntsh, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntsw, count_inherent, none, none) ++DEF_SME_ZA_FUNCTION (svldr, ldr_za, za, none) ++DEF_SME_ZA_FUNCTION (svstr, str_za, za, none) ++DEF_SME_ZA_FUNCTION (svundef, inherent_za, za, none) ++DEF_SME_ZA_FUNCTION (svzero, inherent_za, za, none) ++DEF_SME_ZA_FUNCTION (svzero_mask, inherent_mask_za, za, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SM_ON ++DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_s_integer, za_m) ++DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_s_integer, za_m) ++DEF_SME_ZA_FUNCTION (svld1_hor, load_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svld1_ver, load_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_base, za_m) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, d_za, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, mop_base, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, d_za, za_m) ++DEF_SME_ZA_FUNCTION (svread_hor, read_za_m, za_all_data, m) ++DEF_SME_ZA_FUNCTION (svread_ver, read_za_m, za_all_data, m) ++DEF_SME_ZA_FUNCTION (svst1_hor, store_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svst1_ver, store_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svsumopa, binary_za_uint_m, mop_base_signed, za_m) ++DEF_SME_ZA_FUNCTION (svsumops, binary_za_uint_m, mop_base_signed, za_m) ++DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_base_unsigned, za_m) ++DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_base_unsigned, za_m) ++DEF_SME_ZA_FUNCTION (svwrite_hor, write_za_m, za_all_data, za_m) ++DEF_SME_ZA_FUNCTION (svwrite_ver, write_za_m, za_all_data, za_m) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ ++ | AARCH64_FL_SME_I16I64 \ ++ | AARCH64_FL_SM_ON) ++DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_d_integer, za_m) ++DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_d_integer, za_m) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_i16i64, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, mop_i16i64, za_m) ++DEF_SME_ZA_FUNCTION (svsumopa, binary_za_uint_m, mop_i16i64_signed, za_m) ++DEF_SME_ZA_FUNCTION (svsumops, binary_za_uint_m, mop_i16i64_signed, za_m) ++DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_i16i64_unsigned, za_m) ++DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ ++ | AARCH64_FL_SME_F64F64 \ ++ | AARCH64_FL_SM_ON) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m) ++#undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h +new file mode 100644 +index 000000000..acfed7700 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h +@@ -0,0 +1,57 @@ ++/* ACLE support for AArch64 SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef GCC_AARCH64_SVE_BUILTINS_SME_H ++#define GCC_AARCH64_SVE_BUILTINS_SME_H ++ ++namespace aarch64_sve ++{ ++ namespace functions ++ { ++ extern const function_base *const arm_has_sme; ++ extern const function_base *const arm_in_streaming_mode; ++ extern const function_base *const svaddha_za; ++ extern const function_base *const svaddva_za; ++ extern const function_base *const svcntsb; ++ extern const function_base *const svcntsd; ++ extern const function_base *const svcntsh; ++ extern const function_base *const svcntsw; ++ extern const function_base *const svld1_hor_za; ++ extern const function_base *const svld1_ver_za; ++ extern const function_base *const svldr_za; ++ extern const function_base *const svmopa_za; ++ extern const function_base *const svmops_za; ++ extern const function_base *const svread_hor_za; ++ extern const function_base *const svread_ver_za; ++ extern const function_base *const svst1_hor_za; ++ extern const function_base *const svst1_ver_za; ++ extern const function_base *const svstr_za; ++ extern const function_base *const svsumopa_za; ++ extern const function_base *const svsumops_za; ++ extern const function_base *const svusmopa_za; ++ extern const function_base *const svusmops_za; ++ extern const function_base *const svwrite_hor_za; ++ extern const function_base *const svwrite_ver_za; ++ extern const function_base *const svundef_za; ++ extern const function_base *const svzero_za; ++ extern const function_base *const svzero_mask_za; ++ } ++} ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 3441b4294..32971a7c8 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -51,6 +51,7 @@ + #include "aarch64-sve-builtins.h" + #include "aarch64-sve-builtins-base.h" + #include "aarch64-sve-builtins-sve2.h" ++#include "aarch64-sve-builtins-sme.h" + #include "aarch64-sve-builtins-shapes.h" + + namespace aarch64_sve { +@@ -112,6 +113,7 @@ static const char *const pred_suffixes[NUM_PREDS + 1] = { + "_m", + "_x", + "_z", ++ "_m", + "" + }; + +@@ -136,12 +138,28 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { + TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \ + TYPE_##CLASS == TYPE_unsigned, \ + TYPE_##CLASS == TYPE_float, \ ++ TYPE_##CLASS != TYPE_bool, \ + TYPE_##CLASS == TYPE_bool, \ ++ false, \ ++ 0, \ ++ MODE }, ++#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ ++ { "_" #NAME, \ ++ NUM_VECTOR_TYPES, \ ++ NUM_TYPE_CLASSES, \ ++ BITS, \ ++ BITS / BITS_PER_UNIT, \ ++ false, \ ++ false, \ ++ false, \ ++ false, \ ++ false, \ ++ true, \ + 0, \ + MODE }, + #include "aarch64-sve-builtins.def" + { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false, +- 0, VOIDmode } ++ false, false, 0, VOIDmode } + }; + + CONSTEXPR const group_suffix_info group_suffixes[] = { +@@ -422,6 +440,79 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { + TYPES_while1 (D, b32), \ + TYPES_while1 (D, b64) + ++/* _za8 _za16 _za32 _za64 _za128. */ ++#define TYPES_all_za(S, D) \ ++ S (za8), S (za16), S (za32), S (za64), S (za128) ++ ++/* _za64. */ ++#define TYPES_d_za(S, D) \ ++ S (za64) ++ ++/* { _za8 } x { _s8 _u8 } ++ ++ { _za16 } x { _bf16 _f16 _s16 _u16 } ++ ++ { _za32 } x { _f32 _s32 _u32 } ++ ++ { _za64 } x { _f64 _s64 _u64 }. */ ++#define TYPES_za_bhsd_data(S, D) \ ++ D (za8, s8), D (za8, u8), \ ++ D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \ ++ D (za32, f32), D (za32, s32), D (za32, u32), \ ++ D (za64, f64), D (za64, s64), D (za64, u64) ++ ++/* Likewise, plus: ++ ++ { _za128 } x { _bf16 } ++ { _f16 _f32 _f64 } ++ { _s8 _s16 _s32 _s64 } ++ { _u8 _u16 _u32 _u64 }. */ ++ ++#define TYPES_za_all_data(S, D) \ ++ TYPES_za_bhsd_data (S, D), \ ++ TYPES_reinterpret1 (D, za128) ++ ++/* _za32 x { _s32 _u32 }. */ ++#define TYPES_za_s_integer(S, D) \ ++ D (za32, s32), D (za32, u32) ++ ++ ++/* _za64_f64. */ ++#define TYPES_za_d_float(S, D) \ ++ D (za64, f64) ++ ++/* _za64 x { _s64 _u64 }. */ ++#define TYPES_za_d_integer(S, D) \ ++ D (za64, s64), D (za64, u64) ++ ++/* _za32 x { _s8 _u8 _bf16 _f16 _f32 }. */ ++#define TYPES_mop_base(S, D) \ ++ D (za32, s8), D (za32, u8), D (za32, bf16), D (za32, f16), D (za32, f32) ++ ++/* _za32_s8. */ ++#define TYPES_mop_base_signed(S, D) \ ++ D (za32, s8) ++ ++/* _za32_u8. */ ++#define TYPES_mop_base_unsigned(S, D) \ ++ D (za32, u8) ++ ++/* _za64 x { _s16 _u16 }. */ ++#define TYPES_mop_i16i64(S, D) \ ++ D (za64, s16), D (za64, u16) ++ ++/* _za64_s16. */ ++#define TYPES_mop_i16i64_signed(S, D) \ ++ D (za64, s16) ++ ++/* _za64_u16. */ ++#define TYPES_mop_i16i64_unsigned(S, D) \ ++ D (za64, u16) ++ ++/* _za. */ ++#define TYPES_za(S, D) \ ++ S (za) ++ + /* Describe a pair of type suffixes in which only the first is used. */ + #define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES } + +@@ -489,6 +580,19 @@ DEF_SVE_TYPES_ARRAY (cvt_narrow); + DEF_SVE_TYPES_ARRAY (inc_dec_n); + DEF_SVE_TYPES_ARRAY (reinterpret); + DEF_SVE_TYPES_ARRAY (while); ++DEF_SVE_TYPES_ARRAY (all_za); ++DEF_SVE_TYPES_ARRAY (d_za); ++DEF_SVE_TYPES_ARRAY (za_all_data); ++DEF_SVE_TYPES_ARRAY (za_s_integer); ++DEF_SVE_TYPES_ARRAY (za_d_float); ++DEF_SVE_TYPES_ARRAY (za_d_integer); ++DEF_SVE_TYPES_ARRAY (mop_base); ++DEF_SVE_TYPES_ARRAY (mop_base_signed); ++DEF_SVE_TYPES_ARRAY (mop_base_unsigned); ++DEF_SVE_TYPES_ARRAY (mop_i16i64); ++DEF_SVE_TYPES_ARRAY (mop_i16i64_signed); ++DEF_SVE_TYPES_ARRAY (mop_i16i64_unsigned); ++DEF_SVE_TYPES_ARRAY (za); + + static const group_suffix_index groups_none[] = { + GROUP_none, NUM_GROUP_SUFFIXES +@@ -505,6 +609,9 @@ static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; + explicit suffix. */ + static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS }; + ++/* Used by functions that only support "_m" predication. */ ++static const predication_index preds_m[] = { PRED_m, NUM_PREDS }; ++ + /* Used by functions that allow merging and "don't care" predication, + but are not suitable for predicated MOVPRFX. */ + static const predication_index preds_mx[] = { +@@ -536,17 +643,23 @@ static const predication_index preds_z_or_none[] = { + /* Used by (mostly predicate) functions that only support "_z" predication. */ + static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; + ++/* Used by SME instructions that always merge into ZA. */ ++static const predication_index preds_za_m[] = { PRED_za_m, NUM_PREDS }; ++ + /* A list of all SVE ACLE functions. */ + static CONSTEXPR const function_group_info function_groups[] = { + #define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ + { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ + preds_##PREDS, REQUIRED_EXTENSIONS }, ++#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ ++ { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \ ++ groups_##GROUPS, preds_##PREDS, (REQUIRED_EXTENSIONS | AARCH64_FL_ZA_ON) }, + #include "aarch64-sve-builtins.def" + }; + + /* The scalar type associated with each vector type. */ +-extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES]; +-tree scalar_types[NUM_VECTOR_TYPES]; ++extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES + 1]; ++tree scalar_types[NUM_VECTOR_TYPES + 1]; + + /* The single-predicate and single-vector types, with their built-in + "__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always +@@ -654,7 +767,7 @@ find_type_suffix_for_scalar_type (const_tree type) + /* A linear search should be OK here, since the code isn't hot and + the number of types is only small. */ + for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) +- if (!type_suffixes[suffix_i].bool_p) ++ if (type_suffixes[suffix_i].vector_p) + { + vector_type_index vector_i = type_suffixes[suffix_i].vector_type; + if (matches_type_p (scalar_types[vector_i], type)) +@@ -745,6 +858,20 @@ check_required_extensions (location_t location, tree fndecl, + return false; + } + ++ if (missing_extensions & AARCH64_FL_SM_ON) ++ { ++ error_at (location, "ACLE function %qD can only be called when" ++ " SME streaming mode is enabled", fndecl); ++ return false; ++ } ++ ++ if (missing_extensions & AARCH64_FL_ZA_ON) ++ { ++ error_at (location, "ACLE function %qD can only be called from" ++ " a function that has %qs state", fndecl, "za"); ++ return false; ++ } ++ + static const struct { + aarch64_feature_flags flag; + const char *name; +@@ -780,9 +907,13 @@ report_out_of_range (location_t location, tree fndecl, unsigned int argno, + HOST_WIDE_INT actual, HOST_WIDE_INT min, + HOST_WIDE_INT max) + { +- error_at (location, "passing %wd to argument %d of %qE, which expects" +- " a value in the range [%wd, %wd]", actual, argno + 1, fndecl, +- min, max); ++ if (min == max) ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " the value %wd", actual, argno + 1, fndecl, min); ++ else ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " a value in the range [%wd, %wd]", actual, argno + 1, fndecl, ++ min, max); + } + + /* Report that LOCATION has a call to FNDECL in which argument ARGNO has +@@ -869,7 +1000,7 @@ function_instance::reads_global_state_p () const + return true; + + /* Handle direct reads of global state. */ +- return flags & (CP_READ_MEMORY | CP_READ_FFR); ++ return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA); + } + + /* Return true if calls to the function could modify some form of +@@ -890,7 +1021,7 @@ function_instance::modifies_global_state_p () const + return true; + + /* Handle direct modifications of global state. */ +- return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR); ++ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA); + } + + /* Return true if calls to the function could raise a signal. */ +@@ -922,8 +1053,8 @@ registered_function_hasher::equal (value_type value, const compare_type &key) + return value->instance == key; + } + +-sve_switcher::sve_switcher () +- : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE) ++sve_switcher::sve_switcher (aarch64_feature_flags flags) ++ : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags) + { + /* Changing the ISA flags and have_regs_of_mode should be enough here. + We shouldn't need to pay the compile-time cost of a full target +@@ -979,6 +1110,10 @@ char * + function_builder::get_name (const function_instance &instance, + bool overloaded_p) + { ++ /* __arm_* functions are listed as arm_*, so that the associated GCC ++ code is not in the implementation namespace. */ ++ if (strncmp (instance.base_name, "arm_", 4) == 0) ++ append_name ("__"); + append_name (instance.base_name); + if (overloaded_p) + switch (instance.displacement_units ()) +@@ -1016,12 +1151,72 @@ add_attribute (const char *name, tree attrs) + return tree_cons (get_identifier (name), NULL_TREE, attrs); + } + +-/* Return the appropriate function attributes for INSTANCE. */ ++/* Add attribute NS::NAME to ATTRS. */ ++static tree ++add_attribute (const char *ns, const char *name, tree value, tree attrs) ++{ ++ return tree_cons (build_tree_list (get_identifier (ns), ++ get_identifier (name)), ++ value, attrs); ++} ++ ++/* Attribute arm::NAME describes shared state that is an input if IS_IN ++ and an output if IS_OUT. Check whether a call with call properties ++ CALL_FLAGS needs such an attribute. Add it to in-progress attribute ++ list ATTRS if so. Return the new attribute list. */ ++static tree ++add_shared_state_attribute (const char *name, bool is_in, bool is_out, ++ unsigned int call_flags, tree attrs) ++{ ++ struct state_flag_info ++ { ++ const char *name; ++ unsigned int read_flag; ++ unsigned int write_flag; ++ }; ++ static state_flag_info state_flags[] = ++ { ++ { "za", CP_READ_ZA, CP_WRITE_ZA } ++ }; ++ ++ tree args = NULL_TREE; ++ for (const auto &state_flag : state_flags) ++ { ++ auto all_flags = state_flag.read_flag | state_flag.write_flag; ++ auto these_flags = ((is_in ? state_flag.read_flag : 0) ++ | (is_out ? state_flag.write_flag : 0)); ++ if ((call_flags & all_flags) == these_flags) ++ { ++ tree value = build_string (strlen (state_flag.name) + 1, ++ state_flag.name); ++ args = tree_cons (NULL_TREE, value, args); ++ } ++ } ++ if (args) ++ attrs = add_attribute ("arm", name, args, attrs); ++ return attrs; ++} ++ ++/* Return the appropriate function attributes for INSTANCE, which requires ++ the feature flags in REQUIRED_EXTENSIONS. */ + tree +-function_builder::get_attributes (const function_instance &instance) ++function_builder::get_attributes (const function_instance &instance, ++ aarch64_feature_flags required_extensions) + { + tree attrs = NULL_TREE; + ++ if (required_extensions & AARCH64_FL_SM_ON) ++ attrs = add_attribute ("arm", "streaming", NULL_TREE, attrs); ++ else if (!(required_extensions & AARCH64_FL_SM_OFF)) ++ attrs = add_attribute ("arm", "streaming_compatible", NULL_TREE, attrs); ++ ++ attrs = add_shared_state_attribute ("in", true, false, ++ instance.call_properties (), attrs); ++ attrs = add_shared_state_attribute ("out", false, true, ++ instance.call_properties (), attrs); ++ attrs = add_shared_state_attribute ("inout", true, true, ++ instance.call_properties (), attrs); ++ + if (!instance.modifies_global_state_p ()) + { + if (instance.reads_global_state_p ()) +@@ -1097,7 +1292,7 @@ add_unique_function (const function_instance &instance, + tree fntype = build_function_type_array (return_type, + argument_types.length (), + argument_types.address ()); +- tree attrs = get_attributes (instance); ++ tree attrs = get_attributes (instance, required_extensions); + registered_function &rfn = add_function (instance, name, fntype, attrs, + required_extensions, false, false); + +@@ -1114,7 +1309,7 @@ add_unique_function (const function_instance &instance, + if (strcmp (name, overload_name) != 0) + { + /* Attribute lists shouldn't be shared. */ +- tree attrs = get_attributes (instance); ++ tree attrs = get_attributes (instance, required_extensions); + bool placeholder_p = !(m_direct_overloads || force_direct_overloads); + add_function (instance, overload_name, fntype, attrs, + required_extensions, false, placeholder_p); +@@ -2283,6 +2478,7 @@ bool + function_resolver::check_gp_argument (unsigned int nops, + unsigned int &i, unsigned int &nargs) + { ++ gcc_assert (pred != PRED_za_m); + i = 0; + if (pred != PRED_none) + { +@@ -2488,9 +2684,7 @@ function_checker::function_checker (location_t location, + unsigned int nargs, tree *args) + : function_call_info (location, instance, fndecl), + m_fntype (fntype), m_nargs (nargs), m_args (args), +- /* We don't have to worry about unary _m operations here, since they +- never have arguments that need checking. */ +- m_base_arg (pred != PRED_none ? 1 : 0) ++ m_base_arg (pred != PRED_none && pred != PRED_za_m ? 1 : 0) + { + } + +@@ -2889,21 +3083,51 @@ function_expander::convert_to_pmode (rtx x) + } + + /* Return the base address for a contiguous load or store function. +- MEM_MODE is the mode of the addressed memory. */ ++ MEM_MODE is the mode of the addressed memory, BASE_ARGNO is ++ the index of the base argument, and VNUM_ARGNO is the index of ++ the vnum offset argument (if any). VL_ISA_MODE is AARCH64_FL_SM_ON ++ if the vnum argument is a factor of the SME vector length, 0 if it ++ is a factor of the current prevailing vector length. */ + rtx +-function_expander::get_contiguous_base (machine_mode mem_mode) ++function_expander::get_contiguous_base (machine_mode mem_mode, ++ unsigned int base_argno, ++ unsigned int vnum_argno, ++ aarch64_feature_flags vl_isa_mode) + { +- rtx base = convert_to_pmode (args[1]); ++ rtx base = convert_to_pmode (args[base_argno]); + if (mode_suffix_id == MODE_vnum) + { +- /* Use the size of the memory mode for extending loads and truncating +- stores. Use the size of a full vector for non-extending loads +- and non-truncating stores (including svld[234] and svst[234]). */ +- poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), +- BYTES_PER_SVE_VECTOR); +- rtx offset = gen_int_mode (size, Pmode); +- offset = simplify_gen_binary (MULT, Pmode, args[2], offset); +- base = simplify_gen_binary (PLUS, Pmode, base, offset); ++ rtx vnum = args[vnum_argno]; ++ if (vnum != const0_rtx) ++ { ++ /* Use the size of the memory mode for extending loads and truncating ++ stores. Use the size of a full vector for non-extending loads ++ and non-truncating stores (including svld[234] and svst[234]). */ ++ poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), ++ BYTES_PER_SVE_VECTOR); ++ rtx offset; ++ if ((vl_isa_mode & AARCH64_FL_SM_ON) ++ && !TARGET_STREAMING ++ && !size.is_constant ()) ++ { ++ gcc_assert (known_eq (size, BYTES_PER_SVE_VECTOR)); ++ if (CONST_INT_P (vnum) && IN_RANGE (INTVAL (vnum), -32, 31)) ++ offset = aarch64_sme_vq_immediate (Pmode, INTVAL (vnum) * 16, ++ AARCH64_ISA_MODE); ++ else ++ { ++ offset = aarch64_sme_vq_immediate (Pmode, 16, ++ AARCH64_ISA_MODE); ++ offset = simplify_gen_binary (MULT, Pmode, vnum, offset); ++ } ++ } ++ else ++ { ++ offset = gen_int_mode (size, Pmode); ++ offset = simplify_gen_binary (MULT, Pmode, vnum, offset); ++ } ++ base = simplify_gen_binary (PLUS, Pmode, base, offset); ++ } + } + return base; + } +@@ -2991,11 +3215,18 @@ function_expander::add_input_operand (insn_code icode, rtx x) + machine_mode mode = operand.mode; + if (mode == VOIDmode) + { +- /* The only allowable use of VOIDmode is the wildcard +- aarch64_any_register_operand, which is used to avoid +- combinatorial explosion in the reinterpret patterns. */ +- gcc_assert (operand.predicate == aarch64_any_register_operand); +- mode = GET_MODE (x); ++ /* The only allowable uses of VOIDmode are: ++ ++ - the wildcard aarch64_any_register_operand, which is used ++ to avoid combinatorial explosion in the reinterpret patterns ++ ++ - pmode_register_operand, which always has mode Pmode. */ ++ if (operand.predicate == aarch64_any_register_operand) ++ mode = GET_MODE (x); ++ else if (operand.predicate == pmode_register_operand) ++ mode = Pmode; ++ else ++ gcc_unreachable (); + } + else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode)) + x = expand_vector_broadcast (mode, x); +@@ -3010,7 +3241,7 @@ function_expander::add_input_operand (insn_code icode, rtx x) + + /* Add an integer operand with value X to the instruction. */ + void +-function_expander::add_integer_operand (HOST_WIDE_INT x) ++function_expander::add_integer_operand (poly_int64 x) + { + m_ops.safe_grow (m_ops.length () + 1, true); + create_integer_operand (&m_ops.last (), x); +@@ -3555,7 +3786,10 @@ init_builtins () + sve_switcher sve; + register_builtin_types (); + if (in_lto_p) +- handle_arm_sve_h (); ++ { ++ handle_arm_sve_h (); ++ handle_arm_sme_h (); ++ } + } + + /* Register vector type TYPE under its arm_sve.h name. */ +@@ -3705,7 +3939,8 @@ handle_arm_sve_h () + function_table = new hash_table<registered_function_hasher> (1023); + function_builder builder; + for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) +- builder.register_function_group (function_groups[i]); ++ if (!(function_groups[i].required_extensions & AARCH64_FL_SME)) ++ builder.register_function_group (function_groups[i]); + } + + /* Return the function decl with SVE function subcode CODE, or error_mark_node +@@ -3718,6 +3953,33 @@ builtin_decl (unsigned int code, bool) + return (*registered_functions)[code]->decl; + } + ++/* Implement #pragma GCC aarch64 "arm_sme.h". */ ++void ++handle_arm_sme_h () ++{ ++ if (!function_table) ++ { ++ error ("%qs defined without first defining %qs", ++ "arm_sme.h", "arm_sve.h"); ++ return; ++ } ++ ++ static bool initialized_p; ++ if (initialized_p) ++ { ++ error ("duplicate definition of %qs", "arm_sme.h"); ++ return; ++ } ++ initialized_p = true; ++ ++ sme_switcher sme; ++ ++ function_builder builder; ++ for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) ++ if (function_groups[i].required_extensions & AARCH64_FL_SME) ++ builder.register_function_group (function_groups[i]); ++} ++ + /* If we're implementing manual overloading, check whether the SVE + function with subcode CODE is overloaded, and if so attempt to + determine the corresponding non-overloaded function. The call +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def +index be10b5ea1..69c11b1d0 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins.def +@@ -29,6 +29,10 @@ + #define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) + #endif + ++#ifndef DEF_SME_ZA_SUFFIX ++#define DEF_SME_ZA_SUFFIX(A, B, C) ++#endif ++ + #ifndef DEF_SVE_GROUP_SUFFIX + #define DEF_SVE_GROUP_SUFFIX(A, B, C) + #endif +@@ -42,6 +46,16 @@ + DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) + #endif + ++#ifndef DEF_SME_ZA_FUNCTION_GS ++#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) \ ++ DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) ++#endif ++ ++#ifndef DEF_SME_ZA_FUNCTION ++#define DEF_SME_ZA_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ ++ DEF_SME_ZA_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) ++#endif ++ + DEF_SVE_MODE (n, none, none, none) + DEF_SVE_MODE (index, none, none, elements) + DEF_SVE_MODE (offset, none, none, bytes) +@@ -104,16 +118,30 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) + DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) + DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) + ++/* Associate _za with bytes. This is needed for svldr_vnum_za and ++ svstr_vnum_za, whose ZA offset can be in the range [0, 15], as for za8. */ ++DEF_SME_ZA_SUFFIX (za, 8, VNx16QImode) ++ ++DEF_SME_ZA_SUFFIX (za8, 8, VNx16QImode) ++DEF_SME_ZA_SUFFIX (za16, 16, VNx8HImode) ++DEF_SME_ZA_SUFFIX (za32, 32, VNx4SImode) ++DEF_SME_ZA_SUFFIX (za64, 64, VNx2DImode) ++DEF_SME_ZA_SUFFIX (za128, 128, VNx1TImode) ++ + DEF_SVE_GROUP_SUFFIX (x2, 0, 2) + DEF_SVE_GROUP_SUFFIX (x3, 0, 3) + DEF_SVE_GROUP_SUFFIX (x4, 0, 4) + + #include "aarch64-sve-builtins-base.def" + #include "aarch64-sve-builtins-sve2.def" ++#include "aarch64-sve-builtins-sme.def" + ++#undef DEF_SME_ZA_FUNCTION + #undef DEF_SVE_FUNCTION ++#undef DEF_SME_ZA_FUNCTION_GS + #undef DEF_SVE_FUNCTION_GS + #undef DEF_SVE_GROUP_SUFFIX ++#undef DEF_SME_ZA_SUFFIX + #undef DEF_SVE_TYPE_SUFFIX + #undef DEF_SVE_TYPE + #undef DEF_SVE_MODE +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index f16ac3947..6ef6bb93f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -97,6 +97,8 @@ const unsigned int CP_PREFETCH_MEMORY = 1U << 3; + const unsigned int CP_WRITE_MEMORY = 1U << 4; + const unsigned int CP_READ_FFR = 1U << 5; + const unsigned int CP_WRITE_FFR = 1U << 6; ++const unsigned int CP_READ_ZA = 1U << 7; ++const unsigned int CP_WRITE_ZA = 1U << 8; + + /* Enumerates the SVE predicate and (data) vector types, together called + "vector types" for brevity. */ +@@ -142,6 +144,10 @@ enum predication_index + /* Zero predication: set inactive lanes of the vector result to zero. */ + PRED_z, + ++ /* Merging predication for SME's ZA: merge into slices of the array ++ instead of overwriting the whole slices. */ ++ PRED_za_m, ++ + NUM_PREDS + }; + +@@ -176,6 +182,8 @@ enum type_suffix_index + { + #define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ + TYPE_SUFFIX_ ## NAME, ++#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ ++ TYPE_SUFFIX_ ## NAME, + #include "aarch64-sve-builtins.def" + NUM_TYPE_SUFFIXES + }; +@@ -240,9 +248,13 @@ struct type_suffix_info + unsigned int unsigned_p : 1; + /* True if the suffix is for a floating-point type. */ + unsigned int float_p : 1; ++ /* True if the suffix is for a vector type (integer or float). */ ++ unsigned int vector_p : 1; + /* True if the suffix is for a boolean type. */ + unsigned int bool_p : 1; +- unsigned int spare : 12; ++ /* True if the suffix is for SME's ZA. */ ++ unsigned int za_p : 1; ++ unsigned int spare : 10; + + /* The associated vector or predicate mode. */ + machine_mode vector_mode : 16; +@@ -356,13 +368,15 @@ public: + tree displacement_vector_type () const; + units_index displacement_units () const; + ++ unsigned int num_za_tiles () const; ++ + const type_suffix_info &type_suffix (unsigned int) const; + const group_suffix_info &group_suffix () const; + + tree scalar_type (unsigned int) const; + tree vector_type (unsigned int) const; + tree tuple_type (unsigned int) const; +- unsigned int elements_per_vq (unsigned int i) const; ++ unsigned int elements_per_vq (unsigned int) const; + machine_mode vector_mode (unsigned int) const; + machine_mode tuple_mode (unsigned int) const; + machine_mode gp_mode (unsigned int) const; +@@ -401,7 +415,7 @@ private: + + char *get_name (const function_instance &, bool); + +- tree get_attributes (const function_instance &); ++ tree get_attributes (const function_instance &, aarch64_feature_flags); + + registered_function &add_function (const function_instance &, + const char *, tree, tree, +@@ -605,7 +619,8 @@ public: + bool overlaps_input_p (rtx); + + rtx convert_to_pmode (rtx); +- rtx get_contiguous_base (machine_mode); ++ rtx get_contiguous_base (machine_mode, unsigned int = 1, unsigned int = 2, ++ aarch64_feature_flags = 0); + rtx get_fallback_value (machine_mode, unsigned int, + unsigned int, unsigned int &); + rtx get_reg_target (); +@@ -613,7 +628,7 @@ public: + + void add_output_operand (insn_code); + void add_input_operand (insn_code, rtx); +- void add_integer_operand (HOST_WIDE_INT); ++ void add_integer_operand (poly_int64); + void add_mem_operand (machine_mode, rtx); + void add_address_operand (rtx); + void add_fixed_operand (rtx); +@@ -738,7 +753,7 @@ public: + class sve_switcher : public aarch64_simd_switcher + { + public: +- sve_switcher (); ++ sve_switcher (aarch64_feature_flags = 0); + ~sve_switcher (); + + private: +@@ -746,11 +761,18 @@ private: + bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; + }; + ++/* Extends sve_switch enough for defining arm_sme.h. */ ++class sme_switcher : public sve_switcher ++{ ++public: ++ sme_switcher () : sve_switcher (AARCH64_FL_SME) {} ++}; ++ + extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1]; + extern const mode_suffix_info mode_suffixes[MODE_none + 1]; + extern const group_suffix_info group_suffixes[NUM_GROUP_SUFFIXES]; + +-extern tree scalar_types[NUM_VECTOR_TYPES]; ++extern tree scalar_types[NUM_VECTOR_TYPES + 1]; + extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; + extern tree acle_svpattern; + extern tree acle_svprfop; +@@ -883,6 +905,16 @@ function_instance::displacement_vector_type () const + return acle_vector_types[0][mode_suffix ().displacement_vector_type]; + } + ++/* Return the number of ZA tiles associated with the _za<N> suffix ++ (which is always the first type suffix). */ ++inline unsigned int ++function_instance::num_za_tiles () const ++{ ++ auto &suffix = type_suffix (0); ++ gcc_checking_assert (suffix.za_p); ++ return suffix.element_bytes; ++} ++ + /* If the function takes a vector or scalar displacement, return the units + in which the displacement is measured, otherwise return UNITS_none. */ + inline units_index +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 112dfeabb..113784e31 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -6160,15 +6160,26 @@ aarch64_output_sve_scalar_inc_dec (rtx offset) + } + + /* Return true if a single RDVL instruction can multiply FACTOR by the +- number of 128-bit quadwords in an SVE vector. */ ++ number of 128-bit quadwords in an SVE vector. This is also the ++ range of ADDVL. */ + + static bool +-aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor) ++aarch64_sve_rdvl_addvl_factor_p (HOST_WIDE_INT factor) + { + return (multiple_p (factor, 16) + && IN_RANGE (factor, -32 * 16, 31 * 16)); + } + ++/* Return true if ADDPL can be used to add FACTOR multiplied by the number ++ of quadwords in an SVE vector. */ ++ ++static bool ++aarch64_sve_addpl_factor_p (HOST_WIDE_INT factor) ++{ ++ return (multiple_p (factor, 2) ++ && IN_RANGE (factor, -32 * 2, 31 * 2)); ++} ++ + /* Return true if we can move VALUE into a register using a single + RDVL instruction. */ + +@@ -6176,7 +6187,7 @@ static bool + aarch64_sve_rdvl_immediate_p (poly_int64 value) + { + HOST_WIDE_INT factor = value.coeffs[0]; +- return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor); ++ return value.coeffs[1] == factor && aarch64_sve_rdvl_addvl_factor_p (factor); + } + + /* Likewise for rtx X. */ +@@ -6212,10 +6223,8 @@ aarch64_sve_addvl_addpl_immediate_p (poly_int64 value) + HOST_WIDE_INT factor = value.coeffs[0]; + if (factor == 0 || value.coeffs[1] != factor) + return false; +- /* FACTOR counts VG / 2, so a value of 2 is one predicate width +- and a value of 16 is one vector width. */ +- return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16)) +- || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2))); ++ return (aarch64_sve_rdvl_addvl_factor_p (factor) ++ || aarch64_sve_addpl_factor_p (factor)); + } + + /* Likewise for rtx X. */ +@@ -6315,11 +6324,11 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + number of 128-bit quadwords in an SME vector. ISA_MODE is the + ISA mode in which the calculation is being performed. */ + +-static rtx ++rtx + aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor, + aarch64_feature_flags isa_mode) + { +- gcc_assert (aarch64_sve_rdvl_factor_p (factor)); ++ gcc_assert (aarch64_sve_rdvl_addvl_factor_p (factor)); + if (isa_mode & AARCH64_FL_SM_ON) + /* We're in streaming mode, so we can use normal poly-int values. */ + return gen_int_mode ({ factor, factor }, mode); +@@ -6362,7 +6371,7 @@ aarch64_rdsvl_immediate_p (const_rtx x) + { + HOST_WIDE_INT factor; + return (aarch64_sme_vq_unspec_p (x, &factor) +- && aarch64_sve_rdvl_factor_p (factor)); ++ && aarch64_sve_rdvl_addvl_factor_p (factor)); + } + + /* Return the asm string for an RDSVL instruction that calculates X, +@@ -6379,6 +6388,38 @@ aarch64_output_rdsvl (const_rtx x) + return buffer; + } + ++/* Return true if X is a constant that can be added using ADDSVL or ADDSPL. */ ++ ++bool ++aarch64_addsvl_addspl_immediate_p (const_rtx x) ++{ ++ HOST_WIDE_INT factor; ++ return (aarch64_sme_vq_unspec_p (x, &factor) ++ && (aarch64_sve_rdvl_addvl_factor_p (factor) ++ || aarch64_sve_addpl_factor_p (factor))); ++} ++ ++/* X is a constant that satisfies aarch64_addsvl_addspl_immediate_p. ++ Return the asm string for the associated instruction. */ ++ ++char * ++aarch64_output_addsvl_addspl (rtx x) ++{ ++ static char buffer[sizeof ("addspl\t%x0, %x1, #-") + 3 * sizeof (int)]; ++ HOST_WIDE_INT factor; ++ if (!aarch64_sme_vq_unspec_p (x, &factor)) ++ gcc_unreachable (); ++ if (aarch64_sve_rdvl_addvl_factor_p (factor)) ++ snprintf (buffer, sizeof (buffer), "addsvl\t%%x0, %%x1, #%d", ++ (int) factor / 16); ++ else if (aarch64_sve_addpl_factor_p (factor)) ++ snprintf (buffer, sizeof (buffer), "addspl\t%%x0, %%x1, #%d", ++ (int) factor / 2); ++ else ++ gcc_unreachable (); ++ return buffer; ++} ++ + /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ + + static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = +@@ -6965,7 +7006,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + { + /* Try to use an unshifted CNT[BHWD] or RDVL. */ + if (aarch64_sve_cnt_factor_p (factor) +- || aarch64_sve_rdvl_factor_p (factor)) ++ || aarch64_sve_rdvl_addvl_factor_p (factor)) + { + val = gen_int_mode (poly_int64 (factor, factor), mode); + shift = 0; +@@ -12185,7 +12226,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, + && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) + index = SUBREG_REG (index); + +- if (aarch64_sve_data_mode_p (mode)) ++ if (aarch64_sve_data_mode_p (mode) || mode == VNx1TImode) + { + if (type != ADDRESS_REG_REG + || (1 << shift) != GET_MODE_UNIT_SIZE (mode)) +@@ -12288,7 +12329,8 @@ aarch64_classify_address (struct aarch64_address_info *info, + && ((vec_flags == 0 + && known_lt (GET_MODE_SIZE (mode), 16)) + || vec_flags == VEC_ADVSIMD +- || vec_flags & VEC_SVE_DATA)); ++ || vec_flags & VEC_SVE_DATA ++ || mode == VNx1TImode)); + + /* For SVE, only accept [Rn], [Rn, #offset, MUL VL] and [Rn, Rm, LSL #shift]. + The latter is not valid for SVE predicates, and that's rejected through +@@ -12407,7 +12449,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + /* Make "m" use the LD1 offset range for SVE data modes, so + that pre-RTL optimizers like ivopts will work to that + instead of the wider LDR/STR range. */ +- if (vec_flags == VEC_SVE_DATA) ++ if (vec_flags == VEC_SVE_DATA || mode == VNx1TImode) + return (type == ADDR_QUERY_M + ? offset_4bit_signed_scaled_p (mode, offset) + : offset_9bit_signed_scaled_p (mode, offset)); +@@ -14750,6 +14792,51 @@ aarch64_output_casesi (rtx *operands) + return ""; + } + ++/* Return the asm string for an SME ZERO instruction whose 8-bit mask ++ operand is MASK. */ ++const char * ++aarch64_output_sme_zero_za (rtx mask) ++{ ++ auto mask_val = UINTVAL (mask); ++ if (mask_val == 0) ++ return "zero\t{}"; ++ ++ if (mask_val == 0xff) ++ return "zero\t{ za }"; ++ ++ static constexpr std::pair<unsigned int, char> tiles[] = { ++ { 0xff, 'b' }, ++ { 0x55, 'h' }, ++ { 0x11, 's' }, ++ { 0x01, 'd' } ++ }; ++ /* The last entry in the list has the form "za7.d }", but that's the ++ same length as "za7.d, ". */ ++ static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1]; ++ unsigned int i = 0; ++ i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t"); ++ const char *prefix = "{ "; ++ for (auto &tile : tiles) ++ { ++ auto tile_mask = tile.first; ++ unsigned int tile_index = 0; ++ while (tile_mask < 0x100) ++ { ++ if ((mask_val & tile_mask) == tile_mask) ++ { ++ i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c", ++ prefix, tile_index, tile.second); ++ prefix = ", "; ++ mask_val &= ~tile_mask; ++ } ++ tile_mask <<= 1; ++ tile_index += 1; ++ } ++ } ++ gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer)); ++ snprintf (buffer + i, sizeof (buffer) - i, " }"); ++ return buffer; ++} + + /* Return size in bits of an arithmetic operand which is shifted/scaled and + masked such that it is suitable for a UXTB, UXTH, or UXTW extend +@@ -23756,6 +23843,31 @@ aarch64_sve_struct_memory_operand_p (rtx op) + && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); + } + ++/* Return true if OFFSET is a constant integer and if VNUM is ++ OFFSET * the number of bytes in an SVE vector. This is the requirement ++ that exists in SME LDR and STR instructions, where the VL offset must ++ equal the ZA slice offset. */ ++bool ++aarch64_sme_ldr_vnum_offset_p (rtx offset, rtx vnum) ++{ ++ if (!CONST_INT_P (offset) || !IN_RANGE (INTVAL (offset), 0, 15)) ++ return false; ++ ++ if (TARGET_STREAMING) ++ { ++ poly_int64 const_vnum; ++ return (poly_int_rtx_p (vnum, &const_vnum) ++ && known_eq (const_vnum, ++ INTVAL (offset) * BYTES_PER_SVE_VECTOR)); ++ } ++ else ++ { ++ HOST_WIDE_INT factor; ++ return (aarch64_sme_vq_unspec_p (vnum, &factor) ++ && factor == INTVAL (offset) * 16); ++ } ++} ++ + /* Emit a register copy from operand to operand, taking care not to + early-clobber source registers in the process. + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 8b21faf34..50fdf2f50 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* Macros to test ISA flags. */ + + #define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) ++#define AARCH64_ISA_SM_ON (aarch64_isa_flags & AARCH64_FL_SM_ON) + #define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON) + #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) +@@ -224,6 +225,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) + #define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME) ++#define AARCH64_ISA_SME_I16I64 (aarch64_isa_flags & AARCH64_FL_SME_I16I64) ++#define AARCH64_ISA_SME_F64F64 (aarch64_isa_flags & AARCH64_FL_SME_F64F64) + #define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) +@@ -256,6 +259,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* The current function is a normal non-streaming function. */ + #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) + ++/* The current function has a streaming body. */ ++#define TARGET_STREAMING (AARCH64_ISA_SM_ON) ++ + /* The current function has a streaming-compatible body. */ + #define TARGET_STREAMING_COMPATIBLE \ + ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) +@@ -316,6 +322,15 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + imply anything about the state of PSTATE.SM. */ + #define TARGET_SME (AARCH64_ISA_SME) + ++/* Streaming-mode SME instructions. */ ++#define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME) ++ ++/* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */ ++#define TARGET_SME_I16I64 (AARCH64_ISA_SME_I16I64) ++ ++/* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64. */ ++#define TARGET_SME_F64F64 (AARCH64_ISA_SME_F64F64) ++ + /* ARMv8.3-A features. */ + #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 05a7c6675..6b4341866 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -2147,10 +2147,10 @@ + + (define_insn "*add<mode>3_aarch64" + [(set +- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk") ++ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk,rk") + (plus:GPI +- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk") +- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))] ++ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk,rk") ++ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav,UaV")))] + "" + "@ + add\\t%<w>0, %<w>1, %2 +@@ -2159,10 +2159,11 @@ + sub\\t%<w>0, %<w>1, #%n2 + # + * return aarch64_output_sve_scalar_inc_dec (operands[2]); +- * return aarch64_output_sve_addvl_addpl (operands[2]);" ++ * return aarch64_output_sve_addvl_addpl (operands[2]); ++ * return aarch64_output_addsvl_addspl (operands[2]);" + ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. +- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm") +- (set_attr "arch" "*,*,simd,*,*,sve,sve")] ++ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm,alu_imm") ++ (set_attr "arch" "*,*,simd,*,*,sve,sve,sme")] + ) + + ;; zero_extend version of above +diff --git a/gcc/config/aarch64/arm_sme.h b/gcc/config/aarch64/arm_sme.h +new file mode 100644 +index 000000000..5ddd49f57 +--- /dev/null ++++ b/gcc/config/aarch64/arm_sme.h +@@ -0,0 +1,45 @@ ++/* AArch64 SME intrinsics include file. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef _ARM_SME_H_ ++#define _ARM_SME_H_ ++ ++#include <arm_sve.h> ++#pragma GCC aarch64 "arm_sme.h" ++ ++void __arm_za_disable(void) __arm_streaming_compatible; ++ ++void *__arm_sc_memcpy(void *, const void *, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++void *__arm_sc_memmove(void *, const void *, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++void *__arm_sc_memset(void *, int, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++void *__arm_sc_memchr(void *, int, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++#endif +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index 88fb9a07c..2da423779 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -21,6 +21,9 @@ + (define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + ++(define_register_constraint "Ucj" "W12_W15_REGS" ++ "@internal r12-r15, which can be used to index ZA.") ++ + (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS" + "@internal Registers suitable for an indirect tail call") + +@@ -74,6 +77,12 @@ + a single ADDVL or ADDPL." + (match_operand 0 "aarch64_sve_addvl_addpl_immediate")) + ++(define_constraint "UaV" ++ "@internal ++ A constraint that matches a VG-based constant that can be added by ++ a single ADDSVL or ADDSPL." ++ (match_operand 0 "aarch64_addsvl_addspl_immediate")) ++ + (define_constraint "Uat" + "@internal + A constraint that matches a VG-based constant that can be added by +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index b616f5c9a..152d28f6b 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -450,6 +450,7 @@ + (define_mode_iterator VNx4SF_ONLY [VNx4SF]) + (define_mode_iterator VNx2DI_ONLY [VNx2DI]) + (define_mode_iterator VNx2DF_ONLY [VNx2DF]) ++(define_mode_iterator VNx1TI_ONLY [VNx1TI]) + + ;; All fully-packed SVE vector modes. + (define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI +@@ -608,6 +609,17 @@ + ;; Bfloat16 modes to which V4SF can be converted + (define_mode_iterator V4SF_TO_BF [V4BF V8BF]) + ++;; The modes used to represent different ZA access sizes. ++(define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI]) ++(define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")]) ++ ++(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")]) ++ ++;; The modes for which outer product instructions are supported. ++(define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) ++(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF ++ (VNx2DF "TARGET_SME_F64F64")]) ++ + ;; ------------------------------------------------------------------ + ;; Unspec enumerations for Advance SIMD. These could well go into + ;; aarch64.md but for their use in int_iterators here. +@@ -986,6 +998,28 @@ + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. + UNSPEC_BFCVT ; Used in aarch64-simd.md. + UNSPEC_FCVTXN ; Used in aarch64-simd.md. ++ ++ ;; All used in aarch64-sme.md ++ UNSPEC_SME_ADDHA ++ UNSPEC_SME_ADDVA ++ UNSPEC_SME_FMOPA ++ UNSPEC_SME_FMOPS ++ UNSPEC_SME_LD1_HOR ++ UNSPEC_SME_LD1_VER ++ UNSPEC_SME_READ_HOR ++ UNSPEC_SME_READ_VER ++ UNSPEC_SME_SMOPA ++ UNSPEC_SME_SMOPS ++ UNSPEC_SME_ST1_HOR ++ UNSPEC_SME_ST1_VER ++ UNSPEC_SME_SUMOPA ++ UNSPEC_SME_SUMOPS ++ UNSPEC_SME_UMOPA ++ UNSPEC_SME_UMOPS ++ UNSPEC_SME_USMOPA ++ UNSPEC_SME_USMOPS ++ UNSPEC_SME_WRITE_HOR ++ UNSPEC_SME_WRITE_VER + ]) + + ;; ------------------------------------------------------------------ +@@ -1115,9 +1149,15 @@ + ;; element. + (define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16") + (VNx4BI "32") (VNx2BI "64") +- (VNx16QI "8") (VNx8HI "16") +- (VNx4SI "32") (VNx2DI "64") +- (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")]) ++ (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") ++ (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") ++ (VNx8HF "16") (VNx16HF "16") (VNx32HF "16") ++ (VNx8BF "16") (VNx16BF "16") (VNx32BF "16") ++ (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") ++ (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") ++ (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") ++ (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") ++ (VNx1TI "128")]) + + ;; The number of bits in a vector container. + (define_mode_attr container_bits [(VNx16QI "8") +@@ -1243,6 +1283,7 @@ + (VNx4SF "s") (VNx2SF "s") + (VNx2DI "d") + (VNx2DF "d") ++ (VNx1TI "q") + (BF "h") (V4BF "h") (V8BF "h") + (HF "h") + (SF "s") (DF "d") +@@ -1261,6 +1302,7 @@ + (VNx4SF "w") (VNx2SF "w") + (VNx2DI "d") + (VNx2DF "d") ++ (VNx1TI "q") + (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") +@@ -2052,6 +2094,7 @@ + (VNx4SF "VNx4BI") (VNx2SF "VNx2BI") + (VNx2DI "VNx2BI") + (VNx2DF "VNx2BI") ++ (VNx1TI "VNx2BI") + (VNx32QI "VNx16BI") + (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") + (VNx16BF "VNx8BI") +@@ -2132,6 +2175,8 @@ + ;; The constraint to use for an SVE FCMLA lane index. + (define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")]) + ++(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")]) ++ + ;; ------------------------------------------------------------------- + ;; Code Iterators + ;; ------------------------------------------------------------------- +@@ -3159,6 +3204,20 @@ + (define_int_iterator FCMUL_OP [UNSPEC_FCMUL + UNSPEC_FCMUL_CONJ]) + ++(define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER]) ++(define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER]) ++(define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER]) ++(define_int_iterator SME_WRITE [UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER]) ++ ++(define_int_iterator SME_BINARY_SDI [UNSPEC_SME_ADDHA UNSPEC_SME_ADDVA]) ++ ++(define_int_iterator SME_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS ++ UNSPEC_SME_SUMOPA UNSPEC_SME_SUMOPS ++ UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS ++ UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS]) ++ ++(define_int_iterator SME_FP_MOP [UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS]) ++ + ;; Iterators for atomic operations. + + (define_int_iterator ATOMIC_LDOP +@@ -3231,6 +3290,26 @@ + (UNSPEC_PMULLT "pmullt") + (UNSPEC_PMULLT_PAIR "pmullt_pair") + (UNSPEC_SMATMUL "smatmul") ++ (UNSPEC_SME_ADDHA "addha") ++ (UNSPEC_SME_ADDVA "addva") ++ (UNSPEC_SME_FMOPA "fmopa") ++ (UNSPEC_SME_FMOPS "fmops") ++ (UNSPEC_SME_LD1_HOR "ld1_hor") ++ (UNSPEC_SME_LD1_VER "ld1_ver") ++ (UNSPEC_SME_READ_HOR "read_hor") ++ (UNSPEC_SME_READ_VER "read_ver") ++ (UNSPEC_SME_SMOPA "smopa") ++ (UNSPEC_SME_SMOPS "smops") ++ (UNSPEC_SME_ST1_HOR "st1_hor") ++ (UNSPEC_SME_ST1_VER "st1_ver") ++ (UNSPEC_SME_SUMOPA "sumopa") ++ (UNSPEC_SME_SUMOPS "sumops") ++ (UNSPEC_SME_UMOPA "umopa") ++ (UNSPEC_SME_UMOPS "umops") ++ (UNSPEC_SME_USMOPA "usmopa") ++ (UNSPEC_SME_USMOPS "usmops") ++ (UNSPEC_SME_WRITE_HOR "write_hor") ++ (UNSPEC_SME_WRITE_VER "write_ver") + (UNSPEC_SQCADD90 "sqcadd90") + (UNSPEC_SQCADD270 "sqcadd270") + (UNSPEC_SQRDCMLAH "sqrdcmlah") +@@ -4000,6 +4079,15 @@ + (define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW") + (UNSPEC_WHILEWR "UNSPEC_WHILEWR")]) + ++(define_int_attr hv [(UNSPEC_SME_LD1_HOR "h") ++ (UNSPEC_SME_LD1_VER "v") ++ (UNSPEC_SME_READ_HOR "h") ++ (UNSPEC_SME_READ_VER "v") ++ (UNSPEC_SME_ST1_HOR "h") ++ (UNSPEC_SME_ST1_VER "v") ++ (UNSPEC_SME_WRITE_HOR "h") ++ (UNSPEC_SME_WRITE_VER "v")]) ++ + ;; Iterators and attributes for fpcr fpsr getter setters + + (define_int_iterator GET_FPSCR +diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md +index 1b8496c07..3ec9e9103 100644 +--- a/gcc/config/aarch64/predicates.md ++++ b/gcc/config/aarch64/predicates.md +@@ -212,11 +212,17 @@ + (and (match_code "const_poly_int") + (match_test "aarch64_add_offset_temporaries (op) == 1"))) + ++(define_predicate "aarch64_addsvl_addspl_immediate" ++ (and (match_code "const") ++ (match_test "aarch64_addsvl_addspl_immediate_p (op)"))) ++ + (define_predicate "aarch64_pluslong_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_pluslong_immediate") + (and (match_test "TARGET_SVE") +- (match_operand 0 "aarch64_sve_plus_immediate")))) ++ (match_operand 0 "aarch64_sve_plus_immediate")) ++ (and (match_test "TARGET_SME") ++ (match_operand 0 "aarch64_addsvl_addspl_immediate")))) + + (define_predicate "aarch64_pluslong_or_poly_operand" + (ior (match_operand 0 "aarch64_pluslong_operand") +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index 49731ba92..be60cc003 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -63,6 +63,7 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ + $(srcdir)/config/aarch64/aarch64-sve-builtins.def \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-base.def \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.def \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.def \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ + $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \ + $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ +@@ -72,7 +73,8 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ + $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \ +- $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-sve-builtins.cc + +@@ -113,6 +115,19 @@ aarch64-sve-builtins-sve2.o: \ + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc + ++aarch64-sve-builtins-sme.o: \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ ++ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \ ++ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ ++ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc ++ + aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \ + $(srcdir)/config/aarch64/iterators.md + $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \ +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 2420b05d9..47fff9c90 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19480,6 +19480,10 @@ Enable the Flag Manipulation instructions Extension. + Enable the Pointer Authentication Extension. + @item sme + Enable the Scalable Matrix Extension. ++@item sme-i16i64 ++Enable the FEAT_SME_I16I64 extension to SME. ++@item sme-f64f64 ++Enable the FEAT_SME_F64F64 extension to SME. + + @end table + +diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp +new file mode 100644 +index 000000000..a9ed3a195 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp +@@ -0,0 +1,82 @@ ++# Assembly-based regression-test driver for the SME ACLE. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } { ++ return ++} ++ ++# Load support procs. ++load_lib g++-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SME if we're not testing it already. ++if { [check_effective_target_aarch64_sme] } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++# Turn off any codegen tweaks by default that may affect expected assembly. ++# Tests relying on those should turn them on explicitly. ++set sme_flags "$sme_flags -mtune=generic -moverride=tune=none" ++ ++global gcc_runtest_parallelize_limit_minor ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set old_limit_minor $gcc_runtest_parallelize_limit_minor ++ set gcc_runtest_parallelize_limit_minor 1 ++} ++ ++torture-init ++set-torture-options { ++ "-std=c++11 -O0 -g" ++ "-std=c++14 -O1 -g" ++ "-std=c++17 -Og -g" ++ "-std=c++23 -Os -g" ++ "-std=gnu++11 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" ++ "-std=gnu++23 -Ofast -g" ++} { ++ "-DTEST_FULL" ++ "-DTEST_OVERLOADS" ++} ++ ++# Main loop. ++set gcc_subdir [string replace $subdir 0 2 gcc] ++set files [glob -nocomplain $srcdir/$gcc_subdir/acle-asm/*.c] ++set save-dg-do-what-default ${dg-do-what-default} ++if { [check_effective_target_aarch64_asm_sme-i16i64_ok] } { ++ set dg-do-what-default assemble ++} else { ++ set dg-do-what-default compile ++} ++gcc-dg-runtest [lsort $files] "" "$sme_flags -fno-ipa-icf" ++set dg-do-what-default ${save-dg-do-what-default} ++ ++torture-finish ++ ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set gcc_runtest_parallelize_limit_minor $old_limit_minor ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c +index 9591e3d01..f2f922d4f 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c +@@ -4,6 +4,7 @@ + to be diagnosed. Any attempt to call the function before including + arm_sve.h will lead to a link failure. (Same for taking its address, + etc.) */ +-extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); ++extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t) ++ __arm_streaming_compatible; + + #pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c +index f87201984..f24ef002c 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c +@@ -2,6 +2,7 @@ + + __SVUint8_t + svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++ __arm_streaming_compatible + { + return x; + } +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c +index 1f2e4bf66..6752ea11e 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c +@@ -2,6 +2,7 @@ + + __SVUint8_t + svadd_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++ __arm_streaming_compatible + { + return x; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +index 0e6461fa4..23ebe5e4f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +@@ -45,3 +45,41 @@ + #ifdef __ARM_FEATURE_SVE2_SHA3 + #error Foo + #endif ++ ++#pragma GCC target "+sme" ++#ifndef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+sme+nofp" ++#ifdef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+sme+nosimd" ++#ifdef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+sme+nobf16" ++#ifdef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sme" ++#ifdef __ARM_FEATURE_SME_I16I64 ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_SME_F64F64 ++#error Foo ++#endif ++ ++#pragma GCC target "+sme-i16i64" ++#ifndef __ARM_FEATURE_SME_I16I64 ++#error Foo ++#endif ++ ++#pragma GCC target "+sme-f64f64" ++#ifndef __ARM_FEATURE_SME_F64F64 ++#error Foo ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +new file mode 100644 +index 000000000..e2d002f26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +@@ -0,0 +1,81 @@ ++# Assembly-based regression-test driver for the SME ACLE. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SME if we're not testing it already. ++if { [check_effective_target_aarch64_sme] } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++# Turn off any codegen tweaks by default that may affect expected assembly. ++# Tests relying on those should turn them on explicitly. ++set sme_flags "$sme_flags -mtune=generic -moverride=tune=none" ++ ++global gcc_runtest_parallelize_limit_minor ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set old_limit_minor $gcc_runtest_parallelize_limit_minor ++ set gcc_runtest_parallelize_limit_minor 1 ++} ++ ++torture-init ++set-torture-options { ++ "-std=c90 -O0 -g" ++ "-std=c99 -Og -g" ++ "-std=c11 -Os -g" ++ "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" ++ "-std=gnu90 -O3 -g" ++ "-std=gnu23 -Ofast -g" ++} { ++ "-DTEST_FULL" ++ "-DTEST_OVERLOADS" ++} ++ ++# Main loop. ++set files [glob -nocomplain $srcdir/$subdir/acle-asm/*.c] ++set save-dg-do-what-default ${dg-do-what-default} ++if { [check_effective_target_aarch64_asm_sme-i16i64_ok] } { ++ set dg-do-what-default assemble ++} else { ++ set dg-do-what-default compile ++} ++gcc-dg-runtest [lsort $files] "" "$sme_flags -fno-ipa-icf" ++set dg-do-what-default ${save-dg-do-what-default} ++ ++torture-finish ++ ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set gcc_runtest_parallelize_limit_minor $old_limit_minor ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c +new file mode 100644 +index 000000000..8dee40145 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** addha_za32_s32_0_p0_p1_z0: ++** addha za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_0_p0_p1_z0, svint32_t, ++ svaddha_za32_s32_m (0, p0, p1, z0), ++ svaddha_za32_m (0, p0, p1, z0)) ++ ++/* ++** addha_za32_s32_0_p1_p0_z1: ++** addha za0\.s, p1/m, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_0_p1_p0_z1, svint32_t, ++ svaddha_za32_s32_m (0, p1, p0, z1), ++ svaddha_za32_m (0, p1, p0, z1)) ++ ++/* ++** addha_za32_s32_1_p0_p1_z0: ++** addha za1\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_1_p0_p1_z0, svint32_t, ++ svaddha_za32_s32_m (1, p0, p1, z0), ++ svaddha_za32_m (1, p0, p1, z0)) ++ ++/* ++** addha_za32_s32_3_p0_p1_z0: ++** addha za3\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_3_p0_p1_z0, svint32_t, ++ svaddha_za32_s32_m (3, p0, p1, z0), ++ svaddha_za32_m (3, p0, p1, z0)) ++ ++/* ++** addha_za32_u32_0_p0_p1_z0: ++** addha za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_u32_0_p0_p1_z0, svuint32_t, ++ svaddha_za32_u32_m (0, p0, p1, z0), ++ svaddha_za32_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c +new file mode 100644 +index 000000000..363ff1aab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** addha_za64_s64_0_p0_p1_z0: ++** addha za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_0_p0_p1_z0, svint64_t, ++ svaddha_za64_s64_m (0, p0, p1, z0), ++ svaddha_za64_m (0, p0, p1, z0)) ++ ++/* ++** addha_za64_s64_0_p1_p0_z1: ++** addha za0\.d, p1/m, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_0_p1_p0_z1, svint64_t, ++ svaddha_za64_s64_m (0, p1, p0, z1), ++ svaddha_za64_m (0, p1, p0, z1)) ++ ++/* ++** addha_za64_s64_1_p0_p1_z0: ++** addha za1\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_1_p0_p1_z0, svint64_t, ++ svaddha_za64_s64_m (1, p0, p1, z0), ++ svaddha_za64_m (1, p0, p1, z0)) ++ ++/* ++** addha_za64_s64_7_p0_p1_z0: ++** addha za7\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_7_p0_p1_z0, svint64_t, ++ svaddha_za64_s64_m (7, p0, p1, z0), ++ svaddha_za64_m (7, p0, p1, z0)) ++ ++/* ++** addha_za64_u64_0_p0_p1_z0: ++** addha za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_u64_0_p0_p1_z0, svuint64_t, ++ svaddha_za64_u64_m (0, p0, p1, z0), ++ svaddha_za64_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c +new file mode 100644 +index 000000000..0de019ac8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** addva_za32_s32_0_p0_p1_z0: ++** addva za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_0_p0_p1_z0, svint32_t, ++ svaddva_za32_s32_m (0, p0, p1, z0), ++ svaddva_za32_m (0, p0, p1, z0)) ++ ++/* ++** addva_za32_s32_0_p1_p0_z1: ++** addva za0\.s, p1/m, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_0_p1_p0_z1, svint32_t, ++ svaddva_za32_s32_m (0, p1, p0, z1), ++ svaddva_za32_m (0, p1, p0, z1)) ++ ++/* ++** addva_za32_s32_1_p0_p1_z0: ++** addva za1\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_1_p0_p1_z0, svint32_t, ++ svaddva_za32_s32_m (1, p0, p1, z0), ++ svaddva_za32_m (1, p0, p1, z0)) ++ ++/* ++** addva_za32_s32_3_p0_p1_z0: ++** addva za3\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_3_p0_p1_z0, svint32_t, ++ svaddva_za32_s32_m (3, p0, p1, z0), ++ svaddva_za32_m (3, p0, p1, z0)) ++ ++/* ++** addva_za32_u32_0_p0_p1_z0: ++** addva za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_u32_0_p0_p1_z0, svuint32_t, ++ svaddva_za32_u32_m (0, p0, p1, z0), ++ svaddva_za32_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c +new file mode 100644 +index 000000000..d83d4e03c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** addva_za64_s64_0_p0_p1_z0: ++** addva za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_0_p0_p1_z0, svint64_t, ++ svaddva_za64_s64_m (0, p0, p1, z0), ++ svaddva_za64_m (0, p0, p1, z0)) ++ ++/* ++** addva_za64_s64_0_p1_p0_z1: ++** addva za0\.d, p1/m, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_0_p1_p0_z1, svint64_t, ++ svaddva_za64_s64_m (0, p1, p0, z1), ++ svaddva_za64_m (0, p1, p0, z1)) ++ ++/* ++** addva_za64_s64_1_p0_p1_z0: ++** addva za1\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_1_p0_p1_z0, svint64_t, ++ svaddva_za64_s64_m (1, p0, p1, z0), ++ svaddva_za64_m (1, p0, p1, z0)) ++ ++/* ++** addva_za64_s64_7_p0_p1_z0: ++** addva za7\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_7_p0_p1_z0, svint64_t, ++ svaddva_za64_s64_m (7, p0, p1, z0), ++ svaddva_za64_m (7, p0, p1, z0)) ++ ++/* ++** addva_za64_u64_0_p0_p1_z0: ++** addva za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_u64_0_p0_p1_z0, svuint64_t, ++ svaddva_za64_u64_m (0, p0, p1, z0), ++ svaddva_za64_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c +new file mode 100644 +index 000000000..e37793f9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c +@@ -0,0 +1,25 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+nosme" ++ ++/* ++** test_nosme: ++** ... ++** bl __arm_sme_state ++** lsr x0, x0, #?63 ++** ... ++*/ ++PROTO (test_nosme, int, ()) { return __arm_has_sme (); } ++ ++#pragma GCC target "+sme" ++ ++/* ++** test_sme: ++** mov w0, #?1 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_has_sme (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c +new file mode 100644 +index 000000000..ba475d67b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c +@@ -0,0 +1,11 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NON_STREAMING ++#include "test_sme_acle.h" ++ ++/* ++** test_sme: ++** mov w0, #?0 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c +new file mode 100644 +index 000000000..b88d47921 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c +@@ -0,0 +1,11 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** test_sme: ++** mov w0, #?1 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c +new file mode 100644 +index 000000000..fb3588a64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c +@@ -0,0 +1,26 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+nosme" ++ ++/* ++** test_nosme: ++** ... ++** bl __arm_sme_state ++** and w0, w0, #?1 ++** ... ++*/ ++PROTO (test_nosme, int, ()) { return __arm_in_streaming_mode (); } ++ ++#pragma GCC target "+sme" ++ ++/* ++** test_sme: ++** mrs x([0-9]+), svcr ++** and w0, w\1, #?1 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c +new file mode 100644 +index 000000000..0a8de45be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c +@@ -0,0 +1,310 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntb_1: ++** cntb x0 ++** ret ++*/ ++PROTO (cntb_1, uint64_t, ()) { return svcntsb (); } ++ ++/* ++** cntb_2: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntb_2, uint64_t, ()) { return svcntsb () * 2; } ++ ++/* ++** cntb_3: ++** cntb x0, all, mul #3 ++** ret ++*/ ++PROTO (cntb_3, uint64_t, ()) { return svcntsb () * 3; } ++ ++/* ++** cntb_4: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntb_4, uint64_t, ()) { return svcntsb () * 4; } ++ ++/* ++** cntb_8: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntb_8, uint64_t, ()) { return svcntsb () * 8; } ++ ++/* ++** cntb_15: ++** cntb x0, all, mul #15 ++** ret ++*/ ++PROTO (cntb_15, uint64_t, ()) { return svcntsb () * 15; } ++ ++/* ++** cntb_16: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntb_16, uint64_t, ()) { return svcntsb () * 16; } ++ ++/* ++** cntb_17: ++** rdvl x0, #17 ++** ret ++*/ ++PROTO (cntb_17, uint64_t, ()) { return svcntsb () * 17; } ++ ++/* ++** cntb_31: ++** rdvl x0, #31 ++** ret ++*/ ++PROTO (cntb_31, uint64_t, ()) { return svcntsb () * 31; } ++ ++/* ++** cntb_32: ++** cntb (x[0-9]+) ++** lsl x0, \1, 5 ++** ret ++*/ ++PROTO (cntb_32, uint64_t, ()) { return svcntsb () * 32; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_33: ++** cntb (x[0-9]+) ++** lsl x0, \1, 5 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_33, uint64_t, ()) { return svcntsb () * 33; } ++ ++/* ++** cntb_64: ++** cntb (x[0-9]+) ++** lsl x0, \1, 6 ++** ret ++*/ ++PROTO (cntb_64, uint64_t, ()) { return svcntsb () * 64; } ++ ++/* ++** cntb_128: ++** cntb (x[0-9]+) ++** lsl x0, \1, 7 ++** ret ++*/ ++PROTO (cntb_128, uint64_t, ()) { return svcntsb () * 128; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_129: ++** cntb (x[0-9]+) ++** lsl x0, \1, 7 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_129, uint64_t, ()) { return svcntsb () * 129; } ++ ++/* ++** cntb_m1: ++** rdvl x0, #-1 ++** ret ++*/ ++PROTO (cntb_m1, uint64_t, ()) { return -svcntsb (); } ++ ++/* ++** cntb_m13: ++** rdvl x0, #-13 ++** ret ++*/ ++PROTO (cntb_m13, uint64_t, ()) { return -svcntsb () * 13; } ++ ++/* ++** cntb_m15: ++** rdvl x0, #-15 ++** ret ++*/ ++PROTO (cntb_m15, uint64_t, ()) { return -svcntsb () * 15; } ++ ++/* ++** cntb_m16: ++** rdvl x0, #-16 ++** ret ++*/ ++PROTO (cntb_m16, uint64_t, ()) { return -svcntsb () * 16; } ++ ++/* ++** cntb_m17: ++** rdvl x0, #-17 ++** ret ++*/ ++PROTO (cntb_m17, uint64_t, ()) { return -svcntsb () * 17; } ++ ++/* ++** cntb_m32: ++** rdvl x0, #-32 ++** ret ++*/ ++PROTO (cntb_m32, uint64_t, ()) { return -svcntsb () * 32; } ++ ++/* ++** cntb_m33: ++** rdvl x0, #-32 ++** decb x0 ++** ret ++*/ ++PROTO (cntb_m33, uint64_t, ()) { return -svcntsb () * 33; } ++ ++/* ++** cntb_m34: ++** rdvl (x[0-9]+), #-17 ++** lsl x0, \1, #?1 ++** ret ++*/ ++PROTO (cntb_m34, uint64_t, ()) { return -svcntsb () * 34; } ++ ++/* ++** cntb_m64: ++** rdvl (x[0-9]+), #-1 ++** lsl x0, \1, #?6 ++** ret ++*/ ++PROTO (cntb_m64, uint64_t, ()) { return -svcntsb () * 64; } ++ ++/* ++** incb_1: ++** incb x0 ++** ret ++*/ ++PROTO (incb_1, uint64_t, (uint64_t x0)) { return x0 + svcntsb (); } ++ ++/* ++** incb_2: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incb_2, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 2; } ++ ++/* ++** incb_3: ++** incb x0, all, mul #3 ++** ret ++*/ ++PROTO (incb_3, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 3; } ++ ++/* ++** incb_4: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (incb_4, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 4; } ++ ++/* ++** incb_8: ++** incb x0, all, mul #8 ++** ret ++*/ ++PROTO (incb_8, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 8; } ++ ++/* ++** incb_15: ++** incb x0, all, mul #15 ++** ret ++*/ ++PROTO (incb_15, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 15; } ++ ++/* ++** incb_16: ++** incb x0, all, mul #16 ++** ret ++*/ ++PROTO (incb_16, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 16; } ++ ++/* ++** incb_17: ++** addvl x0, x0, #17 ++** ret ++*/ ++PROTO (incb_17, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 17; } ++ ++/* ++** incb_31: ++** addvl x0, x0, #31 ++** ret ++*/ ++PROTO (incb_31, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 31; } ++ ++/* ++** decb_1: ++** decb x0 ++** ret ++*/ ++PROTO (decb_1, uint64_t, (uint64_t x0)) { return x0 - svcntsb (); } ++ ++/* ++** decb_2: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decb_2, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 2; } ++ ++/* ++** decb_3: ++** decb x0, all, mul #3 ++** ret ++*/ ++PROTO (decb_3, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 3; } ++ ++/* ++** decb_4: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (decb_4, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 4; } ++ ++/* ++** decb_8: ++** decb x0, all, mul #8 ++** ret ++*/ ++PROTO (decb_8, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 8; } ++ ++/* ++** decb_15: ++** decb x0, all, mul #15 ++** ret ++*/ ++PROTO (decb_15, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 15; } ++ ++/* ++** decb_16: ++** decb x0, all, mul #16 ++** ret ++*/ ++PROTO (decb_16, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 16; } ++ ++/* ++** decb_17: ++** addvl x0, x0, #-17 ++** ret ++*/ ++PROTO (decb_17, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 17; } ++ ++/* ++** decb_31: ++** addvl x0, x0, #-31 ++** ret ++*/ ++PROTO (decb_31, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 31; } ++ ++/* ++** decb_32: ++** addvl x0, x0, #-32 ++** ret ++*/ ++PROTO (decb_32, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 32; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c +new file mode 100644 +index 000000000..9ee4c8afc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsb: ++** rdsvl x0, #1 ++** ret ++*/ ++PROTO (cntsb, uint64_t, ()) { return svcntsb (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c +new file mode 100644 +index 000000000..3bf9498e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c +@@ -0,0 +1,277 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntd_1: ++** cntd x0 ++** ret ++*/ ++PROTO (cntd_1, uint64_t, ()) { return svcntsd (); } ++ ++/* ++** cntd_2: ++** cntw x0 ++** ret ++*/ ++PROTO (cntd_2, uint64_t, ()) { return svcntsd () * 2; } ++ ++/* ++** cntd_3: ++** cntd x0, all, mul #3 ++** ret ++*/ ++PROTO (cntd_3, uint64_t, ()) { return svcntsd () * 3; } ++ ++/* ++** cntd_4: ++** cnth x0 ++** ret ++*/ ++PROTO (cntd_4, uint64_t, ()) { return svcntsd () * 4; } ++ ++/* ++** cntd_8: ++** cntb x0 ++** ret ++*/ ++PROTO (cntd_8, uint64_t, ()) { return svcntsd () * 8; } ++ ++/* ++** cntd_15: ++** cntd x0, all, mul #15 ++** ret ++*/ ++PROTO (cntd_15, uint64_t, ()) { return svcntsd () * 15; } ++ ++/* ++** cntd_16: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntd_16, uint64_t, ()) { return svcntsd () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntd_17: ++** rdvl (x[0-9]+), #17 ++** asr x0, \1, 3 ++** ret ++*/ ++PROTO (cntd_17, uint64_t, ()) { return svcntsd () * 17; } ++ ++/* ++** cntd_32: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntd_32, uint64_t, ()) { return svcntsd () * 32; } ++ ++/* ++** cntd_64: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntd_64, uint64_t, ()) { return svcntsd () * 64; } ++ ++/* ++** cntd_128: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntd_128, uint64_t, ()) { return svcntsd () * 128; } ++ ++/* ++** cntd_m1: ++** cntd (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m1, uint64_t, ()) { return -svcntsd (); } ++ ++/* ++** cntd_m13: ++** cntd (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m13, uint64_t, ()) { return -svcntsd () * 13; } ++ ++/* ++** cntd_m15: ++** cntd (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m15, uint64_t, ()) { return -svcntsd () * 15; } ++ ++/* ++** cntd_m16: ++** rdvl x0, #-2 ++** ret ++*/ ++PROTO (cntd_m16, uint64_t, ()) { return -svcntsd () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntd_m17: ++** rdvl (x[0-9]+), #-17 ++** asr x0, \1, 3 ++** ret ++*/ ++PROTO (cntd_m17, uint64_t, ()) { return -svcntsd () * 17; } ++ ++/* ++** incd_1: ++** incd x0 ++** ret ++*/ ++PROTO (incd_1, uint64_t, (uint64_t x0)) { return x0 + svcntsd (); } ++ ++/* ++** incd_2: ++** incw x0 ++** ret ++*/ ++PROTO (incd_2, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 2; } ++ ++/* ++** incd_3: ++** incd x0, all, mul #3 ++** ret ++*/ ++PROTO (incd_3, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 3; } ++ ++/* ++** incd_4: ++** inch x0 ++** ret ++*/ ++PROTO (incd_4, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 4; } ++ ++/* ++** incd_7: ++** incd x0, all, mul #7 ++** ret ++*/ ++PROTO (incd_7, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 7; } ++ ++/* ++** incd_8: ++** incb x0 ++** ret ++*/ ++PROTO (incd_8, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 8; } ++ ++/* ++** incd_9: ++** incd x0, all, mul #9 ++** ret ++*/ ++PROTO (incd_9, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 9; } ++ ++/* ++** incd_15: ++** incd x0, all, mul #15 ++** ret ++*/ ++PROTO (incd_15, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 15; } ++ ++/* ++** incd_16: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incd_16, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 16; } ++ ++/* ++** incd_18: ++** incw x0, all, mul #9 ++** ret ++*/ ++PROTO (incd_18, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 18; } ++ ++/* ++** incd_30: ++** incw x0, all, mul #15 ++** ret ++*/ ++PROTO (incd_30, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 30; } ++ ++/* ++** decd_1: ++** decd x0 ++** ret ++*/ ++PROTO (decd_1, uint64_t, (uint64_t x0)) { return x0 - svcntsd (); } ++ ++/* ++** decd_2: ++** decw x0 ++** ret ++*/ ++PROTO (decd_2, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 2; } ++ ++/* ++** decd_3: ++** decd x0, all, mul #3 ++** ret ++*/ ++PROTO (decd_3, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 3; } ++ ++/* ++** decd_4: ++** dech x0 ++** ret ++*/ ++PROTO (decd_4, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 4; } ++ ++/* ++** decd_7: ++** decd x0, all, mul #7 ++** ret ++*/ ++PROTO (decd_7, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 7; } ++ ++/* ++** decd_8: ++** decb x0 ++** ret ++*/ ++PROTO (decd_8, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 8; } ++ ++/* ++** decd_9: ++** decd x0, all, mul #9 ++** ret ++*/ ++PROTO (decd_9, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 9; } ++ ++/* ++** decd_15: ++** decd x0, all, mul #15 ++** ret ++*/ ++PROTO (decd_15, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 15; } ++ ++/* ++** decd_16: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decd_16, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 16; } ++ ++/* ++** decd_18: ++** decw x0, all, mul #9 ++** ret ++*/ ++PROTO (decd_18, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 18; } ++ ++/* ++** decd_30: ++** decw x0, all, mul #15 ++** ret ++*/ ++PROTO (decd_30, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c +new file mode 100644 +index 000000000..90fb374ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsd: ++** rdsvl (x[0-9])+, #1 ++** lsr x0, \1, #?3 ++** ret ++*/ ++PROTO (cntsd, uint64_t, ()) { return svcntsd (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c +new file mode 100644 +index 000000000..021c39a14 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c +@@ -0,0 +1,279 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cnth_1: ++** cnth x0 ++** ret ++*/ ++PROTO (cnth_1, uint64_t, ()) { return svcntsh (); } ++ ++/* ++** cnth_2: ++** cntb x0 ++** ret ++*/ ++PROTO (cnth_2, uint64_t, ()) { return svcntsh () * 2; } ++ ++/* ++** cnth_3: ++** cnth x0, all, mul #3 ++** ret ++*/ ++PROTO (cnth_3, uint64_t, ()) { return svcntsh () * 3; } ++ ++/* ++** cnth_4: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cnth_4, uint64_t, ()) { return svcntsh () * 4; } ++ ++/* ++** cnth_8: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cnth_8, uint64_t, ()) { return svcntsh () * 8; } ++ ++/* ++** cnth_15: ++** cnth x0, all, mul #15 ++** ret ++*/ ++PROTO (cnth_15, uint64_t, ()) { return svcntsh () * 15; } ++ ++/* ++** cnth_16: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cnth_16, uint64_t, ()) { return svcntsh () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cnth_17: ++** rdvl (x[0-9]+), #17 ++** asr x0, \1, 1 ++** ret ++*/ ++PROTO (cnth_17, uint64_t, ()) { return svcntsh () * 17; } ++ ++/* ++** cnth_32: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cnth_32, uint64_t, ()) { return svcntsh () * 32; } ++ ++/* ++** cnth_64: ++** cntb (x[0-9]+) ++** lsl x0, \1, 5 ++** ret ++*/ ++PROTO (cnth_64, uint64_t, ()) { return svcntsh () * 64; } ++ ++/* ++** cnth_128: ++** cntb (x[0-9]+) ++** lsl x0, \1, 6 ++** ret ++*/ ++PROTO (cnth_128, uint64_t, ()) { return svcntsh () * 128; } ++ ++/* ++** cnth_m1: ++** cnth (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m1, uint64_t, ()) { return -svcntsh (); } ++ ++/* ++** cnth_m13: ++** cnth (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m13, uint64_t, ()) { return -svcntsh () * 13; } ++ ++/* ++** cnth_m15: ++** cnth (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m15, uint64_t, ()) { return -svcntsh () * 15; } ++ ++/* ++** cnth_m16: ++** rdvl x0, #-8 ++** ret ++*/ ++PROTO (cnth_m16, uint64_t, ()) { return -svcntsh () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cnth_m17: ++** rdvl (x[0-9]+), #-17 ++** asr x0, \1, 1 ++** ret ++*/ ++PROTO (cnth_m17, uint64_t, ()) { return -svcntsh () * 17; } ++ ++/* ++** inch_1: ++** inch x0 ++** ret ++*/ ++PROTO (inch_1, uint64_t, (uint64_t x0)) { return x0 + svcntsh (); } ++ ++/* ++** inch_2: ++** incb x0 ++** ret ++*/ ++PROTO (inch_2, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 2; } ++ ++/* ++** inch_3: ++** inch x0, all, mul #3 ++** ret ++*/ ++PROTO (inch_3, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 3; } ++ ++/* ++** inch_4: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (inch_4, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 4; } ++ ++/* ++** inch_7: ++** inch x0, all, mul #7 ++** ret ++*/ ++PROTO (inch_7, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 7; } ++ ++/* ++** inch_8: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (inch_8, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 8; } ++ ++/* ++** inch_9: ++** inch x0, all, mul #9 ++** ret ++*/ ++PROTO (inch_9, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 9; } ++ ++/* ++** inch_15: ++** inch x0, all, mul #15 ++** ret ++*/ ++PROTO (inch_15, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 15; } ++ ++/* ++** inch_16: ++** incb x0, all, mul #8 ++** ret ++*/ ++PROTO (inch_16, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 16; } ++ ++/* ++** inch_18: ++** incb x0, all, mul #9 ++** ret ++*/ ++PROTO (inch_18, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 18; } ++ ++/* ++** inch_30: ++** incb x0, all, mul #15 ++** ret ++*/ ++PROTO (inch_30, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 30; } ++ ++/* ++** dech_1: ++** dech x0 ++** ret ++*/ ++PROTO (dech_1, uint64_t, (uint64_t x0)) { return x0 - svcntsh (); } ++ ++/* ++** dech_2: ++** decb x0 ++** ret ++*/ ++PROTO (dech_2, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 2; } ++ ++/* ++** dech_3: ++** dech x0, all, mul #3 ++** ret ++*/ ++PROTO (dech_3, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 3; } ++ ++/* ++** dech_4: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (dech_4, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 4; } ++ ++/* ++** dech_7: ++** dech x0, all, mul #7 ++** ret ++*/ ++PROTO (dech_7, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 7; } ++ ++/* ++** dech_8: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (dech_8, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 8; } ++ ++/* ++** dech_9: ++** dech x0, all, mul #9 ++** ret ++*/ ++PROTO (dech_9, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 9; } ++ ++/* ++** dech_15: ++** dech x0, all, mul #15 ++** ret ++*/ ++PROTO (dech_15, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 15; } ++ ++/* ++** dech_16: ++** decb x0, all, mul #8 ++** ret ++*/ ++PROTO (dech_16, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 16; } ++ ++/* ++** dech_18: ++** decb x0, all, mul #9 ++** ret ++*/ ++PROTO (dech_18, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 18; } ++ ++/* ++** dech_30: ++** decb x0, all, mul #15 ++** ret ++*/ ++PROTO (dech_30, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c +new file mode 100644 +index 000000000..9f6c85208 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsh: ++** rdsvl (x[0-9])+, #1 ++** lsr x0, \1, #?1 ++** ret ++*/ ++PROTO (cntsh, uint64_t, ()) { return svcntsh (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c +new file mode 100644 +index 000000000..c421e1b8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c +@@ -0,0 +1,278 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntw_1: ++** cntw x0 ++** ret ++*/ ++PROTO (cntw_1, uint64_t, ()) { return svcntsw (); } ++ ++/* ++** cntw_2: ++** cnth x0 ++** ret ++*/ ++PROTO (cntw_2, uint64_t, ()) { return svcntsw () * 2; } ++ ++/* ++** cntw_3: ++** cntw x0, all, mul #3 ++** ret ++*/ ++PROTO (cntw_3, uint64_t, ()) { return svcntsw () * 3; } ++ ++/* ++** cntw_4: ++** cntb x0 ++** ret ++*/ ++PROTO (cntw_4, uint64_t, ()) { return svcntsw () * 4; } ++ ++/* ++** cntw_8: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntw_8, uint64_t, ()) { return svcntsw () * 8; } ++ ++/* ++** cntw_15: ++** cntw x0, all, mul #15 ++** ret ++*/ ++PROTO (cntw_15, uint64_t, ()) { return svcntsw () * 15; } ++ ++/* ++** cntw_16: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntw_16, uint64_t, ()) { return svcntsw () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntw_17: ++** rdvl (x[0-9]+), #17 ++** asr x0, \1, 2 ++** ret ++*/ ++PROTO (cntw_17, uint64_t, ()) { return svcntsw () * 17; } ++ ++/* ++** cntw_32: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntw_32, uint64_t, ()) { return svcntsw () * 32; } ++ ++/* ++** cntw_64: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntw_64, uint64_t, ()) { return svcntsw () * 64; } ++ ++/* ++** cntw_128: ++** cntb (x[0-9]+) ++** lsl x0, \1, 5 ++** ret ++*/ ++PROTO (cntw_128, uint64_t, ()) { return svcntsw () * 128; } ++ ++/* ++** cntw_m1: ++** cntw (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m1, uint64_t, ()) { return -svcntsw (); } ++ ++/* ++** cntw_m13: ++** cntw (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m13, uint64_t, ()) { return -svcntsw () * 13; } ++ ++/* ++** cntw_m15: ++** cntw (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m15, uint64_t, ()) { return -svcntsw () * 15; } ++ ++/* ++** cntw_m16: ++** rdvl (x[0-9]+), #-4 ++** ret ++*/ ++PROTO (cntw_m16, uint64_t, ()) { return -svcntsw () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntw_m17: ++** rdvl (x[0-9]+), #-17 ++** asr x0, \1, 2 ++** ret ++*/ ++PROTO (cntw_m17, uint64_t, ()) { return -svcntsw () * 17; } ++ ++/* ++** incw_1: ++** incw x0 ++** ret ++*/ ++PROTO (incw_1, uint64_t, (uint64_t x0)) { return x0 + svcntsw (); } ++ ++/* ++** incw_2: ++** inch x0 ++** ret ++*/ ++PROTO (incw_2, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 2; } ++ ++/* ++** incw_3: ++** incw x0, all, mul #3 ++** ret ++*/ ++PROTO (incw_3, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 3; } ++ ++/* ++** incw_4: ++** incb x0 ++** ret ++*/ ++PROTO (incw_4, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 4; } ++ ++/* ++** incw_7: ++** incw x0, all, mul #7 ++** ret ++*/ ++PROTO (incw_7, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 7; } ++ ++/* ++** incw_8: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incw_8, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 8; } ++ ++/* ++** incw_9: ++** incw x0, all, mul #9 ++** ret ++*/ ++PROTO (incw_9, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 9; } ++ ++/* ++** incw_15: ++** incw x0, all, mul #15 ++** ret ++*/ ++PROTO (incw_15, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 15; } ++ ++/* ++** incw_16: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (incw_16, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 16; } ++ ++/* ++** incw_18: ++** inch x0, all, mul #9 ++** ret ++*/ ++PROTO (incw_18, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 18; } ++ ++/* ++** incw_30: ++** inch x0, all, mul #15 ++** ret ++*/ ++PROTO (incw_30, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 30; } ++ ++/* ++** decw_1: ++** decw x0 ++** ret ++*/ ++PROTO (decw_1, uint64_t, (uint64_t x0)) { return x0 - svcntsw (); } ++ ++/* ++** decw_2: ++** dech x0 ++** ret ++*/ ++PROTO (decw_2, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 2; } ++ ++/* ++** decw_3: ++** decw x0, all, mul #3 ++** ret ++*/ ++PROTO (decw_3, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 3; } ++ ++/* ++** decw_4: ++** decb x0 ++** ret ++*/ ++PROTO (decw_4, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 4; } ++ ++/* ++** decw_7: ++** decw x0, all, mul #7 ++** ret ++*/ ++PROTO (decw_7, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 7; } ++ ++/* ++** decw_8: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decw_8, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 8; } ++ ++/* ++** decw_9: ++** decw x0, all, mul #9 ++** ret ++*/ ++PROTO (decw_9, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 9; } ++ ++/* ++** decw_15: ++** decw x0, all, mul #15 ++** ret ++*/ ++PROTO (decw_15, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 15; } ++ ++/* ++** decw_16: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (decw_16, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 16; } ++ ++/* ++** decw_18: ++** dech x0, all, mul #9 ++** ret ++*/ ++PROTO (decw_18, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 18; } ++ ++/* ++** decw_30: ++** dech x0, all, mul #15 ++** ret ++*/ ++PROTO (decw_30, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c +new file mode 100644 +index 000000000..75ca937c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsw: ++** rdsvl (x[0-9])+, #1 ++** lsr x0, \1, #?2 ++** ret ++*/ ++PROTO (cntsw, uint64_t, ()) { return svcntsw (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c +new file mode 100644 +index 000000000..fbbeb4f12 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za128_0_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_0_0_0, ++ svld1_hor_vnum_za128 (0, 0, p0, x1, 0), ++ svld1_hor_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_7_1_0: ++** mov (w1[2-5]), #?1 ++** ld1q { za7h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_7_1_0, ++ svld1_hor_vnum_za128 (7, 1, p0, x1, 0), ++ svld1_hor_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w1[2-5]), #?6 ++** ld1q { za11h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_1_5, ++ svld1_hor_vnum_za128 (11, 1, p0, x1, 5), ++ svld1_hor_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** ld1_vnum_za128_3_w0_0: ++** mov (w1[2-5]), w0 ++** ld1q { za3h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_3_w0_0, ++ svld1_hor_vnum_za128 (3, w0, p0, x1, 0), ++ svld1_hor_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1q { za5h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_5_w0_0, ++ svld1_hor_vnum_za128 (5, w0, p0, x1, 13), ++ svld1_hor_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za128_11_w0_0: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1q { za11h\.q\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_w0_0, ++ svld1_hor_vnum_za128 (11, w0, p0, x1, x2), ++ svld1_hor_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za128_15_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** ld1q { za15h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_15_w0p1_0, ++ svld1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c +new file mode 100644 +index 000000000..30e7a71ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_0_1, ++ svld1_hor_vnum_za16 (1, 0, p0, x1, 1), ++ svld1_hor_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_1_1, ++ svld1_hor_vnum_za16 (1, 1, p0, x1, 1), ++ svld1_hor_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?8 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_0_8, ++ svld1_hor_vnum_za16 (0, 0, p0, x1, 8), ++ svld1_hor_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?9 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_1_8, ++ svld1_hor_vnum_za16 (0, 1, p0, x1, 8), ++ svld1_hor_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_0, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_1, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w1[2-5]), w0 ++** ld1h { za0h\.h\[\1, 7\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_7, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 7), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** ld1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w1[2-5]), w0, #?8 ++** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_8, ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 8), ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_13, ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 13), ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za16_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1h { za0h\.h\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_x2, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, x2), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za16_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0p1_0, ++ svld1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c +new file mode 100644 +index 000000000..49ffaede8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1w { za3h\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_0_1, ++ svld1_hor_vnum_za32 (3, 0, p0, x1, 1), ++ svld1_hor_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1w { za2h\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_1, ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 1), ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?4 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_0_4, ++ svld1_hor_vnum_za32 (0, 0, p0, x1, 4), ++ svld1_hor_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?5 ++** ld1w { za2h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_4, ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 4), ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_0, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_1, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w1[2-5]), w0 ++** ld1w { za0h\.s\[\1, 3\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_3, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 3), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** ld1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w1[2-5]), w0, #?4 ++** ld1w { za1h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0_4, ++ svld1_hor_vnum_za32 (1, w0, p0, x1, 4), ++ svld1_hor_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1w { za3h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_w0_13, ++ svld1_hor_vnum_za32 (3, w0, p0, x1, 13), ++ svld1_hor_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za32_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1w { za0h\.s\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_x2, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, x2), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za32_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1w { za1h\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0p1_0, ++ svld1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c +new file mode 100644 +index 000000000..df09b1c81 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1d { za3h\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_3_0_1, ++ svld1_hor_vnum_za64 (3, 0, p0, x1, 1), ++ svld1_hor_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1d { za7h\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_7_1_1, ++ svld1_hor_vnum_za64 (7, 1, p0, x1, 1), ++ svld1_hor_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?2 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_0_2, ++ svld1_hor_vnum_za64 (0, 0, p0, x1, 2), ++ svld1_hor_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?3 ++** ld1d { za5h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_5_1_2, ++ svld1_hor_vnum_za64 (5, 1, p0, x1, 2), ++ svld1_hor_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_0, ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_1, ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w1[2-5]), w0, #?2 ++** ld1d { za6h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_6_w0_2, ++ svld1_hor_vnum_za64 (6, w0, p0, x1, 2), ++ svld1_hor_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1d { za2h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_2_w0_13, ++ svld1_hor_vnum_za64 (2, w0, p0, x1, 13), ++ svld1_hor_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za64_4_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1d { za4h\.d\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_4_w0_x2, ++ svld1_hor_vnum_za64 (4, w0, p0, x1, x2), ++ svld1_hor_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za64_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1d { za1h\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_1_w0p1_0, ++ svld1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c +new file mode 100644 +index 000000000..c42931d3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_1, ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 1), ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_1, ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 1), ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?16 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_16, ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 16), ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?17 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_16, ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 16), ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_0, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_1, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_15, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 15), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** ld1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_16, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 16), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_x2: ++** cntb (x[0-9]+) ++** mul (x[0-9]+), (?:\1, x2|x2, \1) ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1b { za0h\.b\[\3, 0\] }, p0/z, \[x1, \2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_x2, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, x2), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za8_0_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0p1_0, ++ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c +new file mode 100644 +index 000000000..2c6292217 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za128_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_0, ++ svld1_hor_za128 (0, 0, p0, x1), ++ svld1_hor_za128 (0, 0, p0, x1)) ++ ++/* ++** ld1_za128_0_1: ++** mov (w1[2-5]), #?1 ++** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_1, ++ svld1_hor_za128 (0, 1, p0, x1), ++ svld1_hor_za128 (0, 1, p0, x1)) ++ ++/* ++** ld1_za128_0_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0, ++ svld1_hor_za128 (0, w0, p0, x1), ++ svld1_hor_za128 (0, w0, p0, x1)) ++ ++/* ++** ld1_za128_0_w0_p1: ++** add (w1[2-5]), w0, #?1 ++** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0_p1, ++ svld1_hor_za128 (0, w0 + 1, p0, x1), ++ svld1_hor_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za128_7_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za7h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_7_w0, ++ svld1_hor_za128 (7, w0, p0, x1), ++ svld1_hor_za128 (7, w0, p0, x1)) ++ ++/* ++** ld1_za128_13_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za13h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_13_w0, ++ svld1_hor_za128 (13, w0, p0, x1), ++ svld1_hor_za128 (13, w0, p0, x1)) ++ ++/* ++** ld1_za128_15_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za15h\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_15_w0, ++ svld1_hor_za128 (15, w0, p0, x1), ++ svld1_hor_za128 (15, w0, p0, x1)) ++ ++/* ++** ld1_za128_9_w0_index: ++** mov (w1[2-5]), w0 ++** ld1q { za9h\.q\[\1, 0\] }, p0/z, \[x1, x2, lsl #?4\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_9_w0_index, ++ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16), ++ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c +new file mode 100644 +index 000000000..3570bea61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za16_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_0, ++ svld1_hor_za16 (0, 0, p0, x1), ++ svld1_hor_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** ld1_za16_0_7: ++** mov (w1[2-5]), #?7 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_7, ++ svld1_hor_za16 (0, 7, p0, x1), ++ svld1_hor_za16 (0, 7, p0, x1)) ++ ++/* ++** ld1_za16_0_8: ++** mov (w1[2-5]), #?8 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_8, ++ svld1_hor_za16 (0, 8, p0, x1), ++ svld1_hor_za16 (0, 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0: ++** mov (w1[2-5]), w0 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0, ++ svld1_hor_za16 (0, w0, p0, x1), ++ svld1_hor_za16 (0, w0, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p1, ++ svld1_hor_za16 (0, w0 + 1, p0, x1), ++ svld1_hor_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p7: ++** mov (w1[2-5]), w0 ++** ld1h { za0h\.h\[\1, 7\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p7, ++ svld1_hor_za16 (0, w0 + 7, p0, x1), ++ svld1_hor_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0: ++** mov (w1[2-5]), w0 ++** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0, ++ svld1_hor_za16 (1, w0, p0, x1), ++ svld1_hor_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** ld1_za16_1_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p1, ++ svld1_hor_za16 (1, w0 + 1, p0, x1), ++ svld1_hor_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p7: ++** mov (w1[2-5]), w0 ++** ld1h { za1h\.h\[\1, 7\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p7, ++ svld1_hor_za16 (1, w0 + 7, p0, x1), ++ svld1_hor_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p5_index: ++** mov (w1[2-5]), w0 ++** ld1h { za1h\.h\[\1, 5\] }, p0/z, \[x1, x2, lsl #?1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p5_index, ++ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** ld1_za16_0_w0_p8: ++** add (w1[2-5]), w0, #?8 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p8, ++ svld1_hor_za16 (0, w0 + 8, p0, x1), ++ svld1_hor_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_m1, ++ svld1_hor_za16 (0, w0 - 1, p0, x1), ++ svld1_hor_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c +new file mode 100644 +index 000000000..a8f6606bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za32_0_0: ++** mov (w1[2-5]), (?:w0|#?0) ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_0, ++ svld1_hor_za32 (0, 0, p0, x1), ++ svld1_hor_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** ld1_za32_0_3: ++** mov (w1[2-5]), #?3 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_3, ++ svld1_hor_za32 (0, 3, p0, x1), ++ svld1_hor_za32 (0, 3, p0, x1)) ++ ++/* ++** ld1_za32_0_4: ++** mov (w1[2-5]), #?4 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_4, ++ svld1_hor_za32 (0, 4, p0, x1), ++ svld1_hor_za32 (0, 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0: ++** mov (w1[2-5]), w0 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0, ++ svld1_hor_za32 (0, w0, p0, x1), ++ svld1_hor_za32 (0, w0, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p1, ++ svld1_hor_za32 (0, w0 + 1, p0, x1), ++ svld1_hor_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p3: ++** mov (w1[2-5]), w0 ++** ld1w { za0h\.s\[\1, 3\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p3, ++ svld1_hor_za32 (0, w0 + 3, p0, x1), ++ svld1_hor_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_3_w0: ++** mov (w1[2-5]), w0 ++** ld1w { za3h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0, ++ svld1_hor_za32 (3, w0, p0, x1), ++ svld1_hor_za32 (3, w0, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1w { za3h\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p1, ++ svld1_hor_za32 (3, w0 + 1, p0, x1), ++ svld1_hor_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p3: ++** mov (w1[2-5]), w0 ++** ld1w { za3h\.s\[\1, 3\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p3, ++ svld1_hor_za32 (3, w0 + 3, p0, x1), ++ svld1_hor_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_1_w0_p2_index: ++** mov (w1[2-5]), w0 ++** ld1w { za1h\.s\[\1, 2\] }, p0/z, \[x1, x2, lsl #?2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_1_w0_p2_index, ++ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** ld1_za32_0_w0_p4: ++** add (w1[2-5]), w0, #?4 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p4, ++ svld1_hor_za32 (0, w0 + 4, p0, x1), ++ svld1_hor_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_m1, ++ svld1_hor_za32 (0, w0 - 1, p0, x1), ++ svld1_hor_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c +new file mode 100644 +index 000000000..f4573eb71 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za64_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_0, ++ svld1_hor_za64 (0, 0, p0, x1), ++ svld1_hor_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** ld1_za64_0_1: ++** mov (w1[2-5]), #?1 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_1, ++ svld1_hor_za64 (0, 1, p0, x1), ++ svld1_hor_za64 (0, 1, p0, x1)) ++ ++/* ++** ld1_za64_0_2: ++** mov (w1[2-5]), #?2 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_2, ++ svld1_hor_za64 (0, 2, p0, x1), ++ svld1_hor_za64 (0, 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0: ++** mov (w1[2-5]), w0 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0, ++ svld1_hor_za64 (0, w0, p0, x1), ++ svld1_hor_za64 (0, w0, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p1, ++ svld1_hor_za64 (0, w0 + 1, p0, x1), ++ svld1_hor_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_7_w0: ++** mov (w1[2-5]), w0 ++** ld1d { za7h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0, ++ svld1_hor_za64 (7, w0, p0, x1), ++ svld1_hor_za64 (7, w0, p0, x1)) ++ ++/* ++** ld1_za64_7_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1d { za7h\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0_p1, ++ svld1_hor_za64 (7, w0 + 1, p0, x1), ++ svld1_hor_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_5_w0_p1_index: ++** mov (w1[2-5]), w0 ++** ld1d { za5h\.d\[\1, 1\] }, p0/z, \[x1, x2, lsl #?3\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_5_w0_p1_index, ++ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** ld1_za64_0_w0_p2: ++** add (w1[2-5]), w0, #?2 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p2, ++ svld1_hor_za64 (0, w0 + 2, p0, x1), ++ svld1_hor_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_m1, ++ svld1_hor_za64 (0, w0 - 1, p0, x1), ++ svld1_hor_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c +new file mode 100644 +index 000000000..eef0927cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za8_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_0, ++ svld1_hor_za8 (0, 0, p0, x1), ++ svld1_hor_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** ld1_za8_0_15: ++** mov (w1[2-5]), #?15 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_15, ++ svld1_hor_za8 (0, 15, p0, x1), ++ svld1_hor_za8 (0, 15, p0, x1)) ++ ++/* ++** ld1_za8_0_16: ++** mov (w1[2-5]), #?16 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_16, ++ svld1_hor_za8 (0, 16, p0, x1), ++ svld1_hor_za8 (0, 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0: ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0, ++ svld1_hor_za8 (0, w0, p0, x1), ++ svld1_hor_za8 (0, w0, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p1, ++ svld1_hor_za8 (0, w0 + 1, p0, x1), ++ svld1_hor_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p15: ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p15, ++ svld1_hor_za8 (0, w0 + 15, p0, x1), ++ svld1_hor_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p13_index: ++** mov (w1[2-5]), w0 ++** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1, x2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p13_index, ++ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2), ++ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** ld1_za8_0_w0_p16: ++** add (w1[2-5]), w0, #?16 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p16, ++ svld1_hor_za8 (0, w0 + 16, p0, x1), ++ svld1_hor_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_m1, ++ svld1_hor_za8 (0, w0 - 1, p0, x1), ++ svld1_hor_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c +new file mode 100644 +index 000000000..e90da4b33 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za128_0_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_0_0_0, ++ svld1_ver_vnum_za128 (0, 0, p0, x1, 0), ++ svld1_ver_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_7_1_0: ++** mov (w1[2-5]), #?1 ++** ld1q { za7v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_7_1_0, ++ svld1_ver_vnum_za128 (7, 1, p0, x1, 0), ++ svld1_ver_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w1[2-5]), #?6 ++** ld1q { za11v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_1_5, ++ svld1_ver_vnum_za128 (11, 1, p0, x1, 5), ++ svld1_ver_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** ld1_vnum_za128_3_w0_0: ++** mov (w1[2-5]), w0 ++** ld1q { za3v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_3_w0_0, ++ svld1_ver_vnum_za128 (3, w0, p0, x1, 0), ++ svld1_ver_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1q { za5v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_5_w0_0, ++ svld1_ver_vnum_za128 (5, w0, p0, x1, 13), ++ svld1_ver_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za128_11_w0_0: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1q { za11v\.q\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_w0_0, ++ svld1_ver_vnum_za128 (11, w0, p0, x1, x2), ++ svld1_ver_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za128_15_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** ld1q { za15v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_15_w0p1_0, ++ svld1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c +new file mode 100644 +index 000000000..7868cf4ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_0_1, ++ svld1_ver_vnum_za16 (1, 0, p0, x1, 1), ++ svld1_ver_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_1_1, ++ svld1_ver_vnum_za16 (1, 1, p0, x1, 1), ++ svld1_ver_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?8 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_0_8, ++ svld1_ver_vnum_za16 (0, 0, p0, x1, 8), ++ svld1_ver_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?9 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_1_8, ++ svld1_ver_vnum_za16 (0, 1, p0, x1, 8), ++ svld1_ver_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_0, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1h { za0v\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_1, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w1[2-5]), w0 ++** ld1h { za0v\.h\[\1, 7\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_7, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 7), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** ld1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w1[2-5]), w0, #?8 ++** ld1h { za1v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_8, ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 8), ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1h { za1v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_13, ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 13), ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za16_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1h { za0v\.h\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_x2, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, x2), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za16_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0p1_0, ++ svld1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c +new file mode 100644 +index 000000000..053b60140 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1w { za3v\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_0_1, ++ svld1_ver_vnum_za32 (3, 0, p0, x1, 1), ++ svld1_ver_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1w { za2v\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_1, ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 1), ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?4 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_0_4, ++ svld1_ver_vnum_za32 (0, 0, p0, x1, 4), ++ svld1_ver_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?5 ++** ld1w { za2v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_4, ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 4), ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_0, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1w { za0v\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_1, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w1[2-5]), w0 ++** ld1w { za0v\.s\[\1, 3\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_3, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 3), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** ld1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w1[2-5]), w0, #?4 ++** ld1w { za1v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0_4, ++ svld1_ver_vnum_za32 (1, w0, p0, x1, 4), ++ svld1_ver_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1w { za3v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_w0_13, ++ svld1_ver_vnum_za32 (3, w0, p0, x1, 13), ++ svld1_ver_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za32_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1w { za0v\.s\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_x2, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, x2), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za32_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1w { za1v\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0p1_0, ++ svld1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c +new file mode 100644 +index 000000000..d04764979 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1d { za3v\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_3_0_1, ++ svld1_ver_vnum_za64 (3, 0, p0, x1, 1), ++ svld1_ver_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1d { za7v\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_7_1_1, ++ svld1_ver_vnum_za64 (7, 1, p0, x1, 1), ++ svld1_ver_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?2 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_0_2, ++ svld1_ver_vnum_za64 (0, 0, p0, x1, 2), ++ svld1_ver_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?3 ++** ld1d { za5v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_5_1_2, ++ svld1_ver_vnum_za64 (5, 1, p0, x1, 2), ++ svld1_ver_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_0, ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1d { za0v\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_1, ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w1[2-5]), w0, #?2 ++** ld1d { za6v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_6_w0_2, ++ svld1_ver_vnum_za64 (6, w0, p0, x1, 2), ++ svld1_ver_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** ld1d { za2v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_2_w0_13, ++ svld1_ver_vnum_za64 (2, w0, p0, x1, 13), ++ svld1_ver_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za64_4_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1d { za4v\.d\[\3, 0\] }, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_4_w0_x2, ++ svld1_ver_vnum_za64 (4, w0, p0, x1, x2), ++ svld1_ver_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za64_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1d { za1v\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_1_w0p1_0, ++ svld1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c +new file mode 100644 +index 000000000..e99d95e3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_1, ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 1), ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_1, ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 1), ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?16 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_16, ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 16), ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?17 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_16, ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 16), ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_0: ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_0, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_1, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 15\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_15, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 15), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** ld1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_16, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 16), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_x2: ++** cntb (x[0-9]+) ++** mul (x[0-9]+), (?:\1, x2|x2, \1) ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** ld1b { za0v\.b\[\3, 0\] }, p0/z, \[x1, \2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_x2, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, x2), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za8_0_w0p1_0: ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0p1_0, ++ svld1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c +new file mode 100644 +index 000000000..e81f40258 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za128_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_0, ++ svld1_ver_za128 (0, 0, p0, x1), ++ svld1_ver_za128 (0, 0, p0, x1)) ++ ++/* ++** ld1_za128_0_1: ++** mov (w1[2-5]), #?1 ++** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_1, ++ svld1_ver_za128 (0, 1, p0, x1), ++ svld1_ver_za128 (0, 1, p0, x1)) ++ ++/* ++** ld1_za128_0_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0, ++ svld1_ver_za128 (0, w0, p0, x1), ++ svld1_ver_za128 (0, w0, p0, x1)) ++ ++/* ++** ld1_za128_0_w0_p1: ++** add (w1[2-5]), w0, #?1 ++** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0_p1, ++ svld1_ver_za128 (0, w0 + 1, p0, x1), ++ svld1_ver_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za128_7_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za7v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_7_w0, ++ svld1_ver_za128 (7, w0, p0, x1), ++ svld1_ver_za128 (7, w0, p0, x1)) ++ ++/* ++** ld1_za128_13_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za13v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_13_w0, ++ svld1_ver_za128 (13, w0, p0, x1), ++ svld1_ver_za128 (13, w0, p0, x1)) ++ ++/* ++** ld1_za128_15_w0: ++** mov (w1[2-5]), w0 ++** ld1q { za15v\.q\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_15_w0, ++ svld1_ver_za128 (15, w0, p0, x1), ++ svld1_ver_za128 (15, w0, p0, x1)) ++ ++/* ++** ld1_za128_9_w0_index: ++** mov (w1[2-5]), w0 ++** ld1q { za9v\.q\[\1, 0\] }, p0/z, \[x1, x2, lsl #?4\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_9_w0_index, ++ svld1_ver_za128 (9, w0, p0, x1 + x2 * 16), ++ svld1_ver_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c +new file mode 100644 +index 000000000..0938b1eba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za16_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_0, ++ svld1_ver_za16 (0, 0, p0, x1), ++ svld1_ver_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** ld1_za16_0_7: ++** mov (w1[2-5]), #?7 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_7, ++ svld1_ver_za16 (0, 7, p0, x1), ++ svld1_ver_za16 (0, 7, p0, x1)) ++ ++/* ++** ld1_za16_0_8: ++** mov (w1[2-5]), #?8 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_8, ++ svld1_ver_za16 (0, 8, p0, x1), ++ svld1_ver_za16 (0, 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0: ++** mov (w1[2-5]), w0 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0, ++ svld1_ver_za16 (0, w0, p0, x1), ++ svld1_ver_za16 (0, w0, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1h { za0v\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p1, ++ svld1_ver_za16 (0, w0 + 1, p0, x1), ++ svld1_ver_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p7: ++** mov (w1[2-5]), w0 ++** ld1h { za0v\.h\[\1, 7\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p7, ++ svld1_ver_za16 (0, w0 + 7, p0, x1), ++ svld1_ver_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0: ++** mov (w1[2-5]), w0 ++** ld1h { za1v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0, ++ svld1_ver_za16 (1, w0, p0, x1), ++ svld1_ver_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** ld1_za16_1_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p1, ++ svld1_ver_za16 (1, w0 + 1, p0, x1), ++ svld1_ver_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p7: ++** mov (w1[2-5]), w0 ++** ld1h { za1v\.h\[\1, 7\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p7, ++ svld1_ver_za16 (1, w0 + 7, p0, x1), ++ svld1_ver_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p5_index: ++** mov (w1[2-5]), w0 ++** ld1h { za1v\.h\[\1, 5\] }, p0/z, \[x1, x2, lsl #?1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p5_index, ++ svld1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svld1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** ld1_za16_0_w0_p8: ++** add (w1[2-5]), w0, #?8 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p8, ++ svld1_ver_za16 (0, w0 + 8, p0, x1), ++ svld1_ver_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_m1, ++ svld1_ver_za16 (0, w0 - 1, p0, x1), ++ svld1_ver_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c +new file mode 100644 +index 000000000..bb9d93184 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za32_0_0: ++** mov (w1[2-5]), (?:w0|#?0) ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_0, ++ svld1_ver_za32 (0, 0, p0, x1), ++ svld1_ver_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** ld1_za32_0_3: ++** mov (w1[2-5]), #?3 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_3, ++ svld1_ver_za32 (0, 3, p0, x1), ++ svld1_ver_za32 (0, 3, p0, x1)) ++ ++/* ++** ld1_za32_0_4: ++** mov (w1[2-5]), #?4 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_4, ++ svld1_ver_za32 (0, 4, p0, x1), ++ svld1_ver_za32 (0, 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0: ++** mov (w1[2-5]), w0 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0, ++ svld1_ver_za32 (0, w0, p0, x1), ++ svld1_ver_za32 (0, w0, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1w { za0v\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p1, ++ svld1_ver_za32 (0, w0 + 1, p0, x1), ++ svld1_ver_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p3: ++** mov (w1[2-5]), w0 ++** ld1w { za0v\.s\[\1, 3\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p3, ++ svld1_ver_za32 (0, w0 + 3, p0, x1), ++ svld1_ver_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_3_w0: ++** mov (w1[2-5]), w0 ++** ld1w { za3v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0, ++ svld1_ver_za32 (3, w0, p0, x1), ++ svld1_ver_za32 (3, w0, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1w { za3v\.s\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p1, ++ svld1_ver_za32 (3, w0 + 1, p0, x1), ++ svld1_ver_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p3: ++** mov (w1[2-5]), w0 ++** ld1w { za3v\.s\[\1, 3\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p3, ++ svld1_ver_za32 (3, w0 + 3, p0, x1), ++ svld1_ver_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_1_w0_p2_index: ++** mov (w1[2-5]), w0 ++** ld1w { za1v\.s\[\1, 2\] }, p0/z, \[x1, x2, lsl #?2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_1_w0_p2_index, ++ svld1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svld1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** ld1_za32_0_w0_p4: ++** add (w1[2-5]), w0, #?4 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p4, ++ svld1_ver_za32 (0, w0 + 4, p0, x1), ++ svld1_ver_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_m1, ++ svld1_ver_za32 (0, w0 - 1, p0, x1), ++ svld1_ver_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c +new file mode 100644 +index 000000000..58d73ad06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za64_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_0, ++ svld1_ver_za64 (0, 0, p0, x1), ++ svld1_ver_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** ld1_za64_0_1: ++** mov (w1[2-5]), #?1 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_1, ++ svld1_ver_za64 (0, 1, p0, x1), ++ svld1_ver_za64 (0, 1, p0, x1)) ++ ++/* ++** ld1_za64_0_2: ++** mov (w1[2-5]), #?2 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_2, ++ svld1_ver_za64 (0, 2, p0, x1), ++ svld1_ver_za64 (0, 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0: ++** mov (w1[2-5]), w0 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0, ++ svld1_ver_za64 (0, w0, p0, x1), ++ svld1_ver_za64 (0, w0, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1d { za0v\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p1, ++ svld1_ver_za64 (0, w0 + 1, p0, x1), ++ svld1_ver_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_7_w0: ++** mov (w1[2-5]), w0 ++** ld1d { za7v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0, ++ svld1_ver_za64 (7, w0, p0, x1), ++ svld1_ver_za64 (7, w0, p0, x1)) ++ ++/* ++** ld1_za64_7_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1d { za7v\.d\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0_p1, ++ svld1_ver_za64 (7, w0 + 1, p0, x1), ++ svld1_ver_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_5_w0_p1_index: ++** mov (w1[2-5]), w0 ++** ld1d { za5v\.d\[\1, 1\] }, p0/z, \[x1, x2, lsl #?3\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_5_w0_p1_index, ++ svld1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svld1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** ld1_za64_0_w0_p2: ++** add (w1[2-5]), w0, #?2 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p2, ++ svld1_ver_za64 (0, w0 + 2, p0, x1), ++ svld1_ver_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_m1, ++ svld1_ver_za64 (0, w0 - 1, p0, x1), ++ svld1_ver_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c +new file mode 100644 +index 000000000..38211b211 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za8_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_0, ++ svld1_ver_za8 (0, 0, p0, x1), ++ svld1_ver_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** ld1_za8_0_15: ++** mov (w1[2-5]), #?15 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_15, ++ svld1_ver_za8 (0, 15, p0, x1), ++ svld1_ver_za8 (0, 15, p0, x1)) ++ ++/* ++** ld1_za8_0_16: ++** mov (w1[2-5]), #?16 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_16, ++ svld1_ver_za8 (0, 16, p0, x1), ++ svld1_ver_za8 (0, 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0: ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0, ++ svld1_ver_za8 (0, w0, p0, x1), ++ svld1_ver_za8 (0, w0, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p1: ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p1, ++ svld1_ver_za8 (0, w0 + 1, p0, x1), ++ svld1_ver_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p15: ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 15\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p15, ++ svld1_ver_za8 (0, w0 + 15, p0, x1), ++ svld1_ver_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p13_index: ++** mov (w1[2-5]), w0 ++** ld1b { za0v\.b\[\1, 15\] }, p0/z, \[x1, x2\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p13_index, ++ svld1_ver_za8 (0, w0 + 15, p0, x1 + x2), ++ svld1_ver_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** ld1_za8_0_w0_p16: ++** add (w1[2-5]), w0, #?16 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p16, ++ svld1_ver_za8 (0, w0 + 16, p0, x1), ++ svld1_ver_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_m1, ++ svld1_ver_za8 (0, w0 - 1, p0, x1), ++ svld1_ver_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c +new file mode 100644 +index 000000000..90495d080 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c +@@ -0,0 +1,147 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ldr_vnum_za_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_0, ++ svldr_vnum_za (0, x1, 0), ++ svldr_vnum_za (0, x1, 0)) ++ ++/* ++** ldr_vnum_za_0_1: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_1, ++ svldr_vnum_za (0, x1, 1), ++ svldr_vnum_za (0, x1, 1)) ++ ++/* ++** ldr_vnum_za_1_0: ++** mov (w1[2-5]), #?1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_0, ++ svldr_vnum_za (1, x1, 0), ++ svldr_vnum_za (1, x1, 0)) ++ ++/* ++** ldr_vnum_za_1_2: ++** mov (w1[2-5]), #?1 ++** ldr za\[\1, 2\], \[x1(?:, #2, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_2, ++ svldr_vnum_za (1, x1, 2), ++ svldr_vnum_za (1, x1, 2)) ++ ++/* ++** ldr_vnum_za_w0_0: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_0, ++ svldr_vnum_za (w0, x1, 0), ++ svldr_vnum_za (w0, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0_1: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_1, ++ svldr_vnum_za (w0, x1, 1), ++ svldr_vnum_za (w0, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0_13: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 13\], \[x1, #13, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_13, ++ svldr_vnum_za (w0, x1, 13), ++ svldr_vnum_za (w0, x1, 13)) ++ ++/* ++** ldr_vnum_za_w0_15: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 15\], \[x1, #15, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_15, ++ svldr_vnum_za (w0, x1, 15), ++ svldr_vnum_za (w0, x1, 15)) ++ ++/* ++** ldr_vnum_za_w0_16: ++** ( ++** add (w1[2-5]), w0, #?16 ++** incb x1, all, mul #16 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_16, ++ svldr_vnum_za (w0, x1, 16), ++ svldr_vnum_za (w0, x1, 16)) ++ ++/* ++** ldr_vnum_za_w0_m1: ++** ( ++** sub (w1[2-5]), w0, #?1 ++** decb x1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** decb x1 ++** sub (w1[2-5]), w0, #?1 ++** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_m1, ++ svldr_vnum_za (w0, x1, -1), ++ svldr_vnum_za (w0, x1, -1)) ++ ++/* ++** ldr_vnum_za_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, ++ svldr_vnum_za (w0 + 1, x1, 0), ++ svldr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0m1_1: ++** sub (w1[2-5]), w0, #?1 ++** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, ++ svldr_vnum_za (w0 - 1, x1, 1), ++ svldr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0p2_3: ++** add (w1[2-5]), w0, #?2 ++** ldr za\[\1, 3\], \[x1(?:, #3, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, ++ svldr_vnum_za (w0 + 2, x1, 3), ++ svldr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c +new file mode 100644 +index 000000000..dfc2d139f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c +@@ -0,0 +1,148 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** ldr_vnum_za_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_0, ++ svldr_vnum_za (0, x1, 0), ++ svldr_vnum_za (0, x1, 0)) ++ ++/* ++** ldr_vnum_za_0_1: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_1, ++ svldr_vnum_za (0, x1, 1), ++ svldr_vnum_za (0, x1, 1)) ++ ++/* ++** ldr_vnum_za_1_0: ++** mov (w1[2-5]), #?1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_0, ++ svldr_vnum_za (1, x1, 0), ++ svldr_vnum_za (1, x1, 0)) ++ ++/* ++** ldr_vnum_za_1_2: ++** mov (w1[2-5]), #?1 ++** ldr za\[\1, 2\], \[x1(?:, #2, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_2, ++ svldr_vnum_za (1, x1, 2), ++ svldr_vnum_za (1, x1, 2)) ++ ++/* ++** ldr_vnum_za_w0_0: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_0, ++ svldr_vnum_za (w0, x1, 0), ++ svldr_vnum_za (w0, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0_1: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_1, ++ svldr_vnum_za (w0, x1, 1), ++ svldr_vnum_za (w0, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0_13: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 13\], \[x1, #13, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_13, ++ svldr_vnum_za (w0, x1, 13), ++ svldr_vnum_za (w0, x1, 13)) ++ ++/* ++** ldr_vnum_za_w0_15: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 15\], \[x1, #15, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_15, ++ svldr_vnum_za (w0, x1, 15), ++ svldr_vnum_za (w0, x1, 15)) ++ ++/* ++** ldr_vnum_za_w0_16: ++** ( ++** add (w1[2-5]), w0, #?16 ++** addsvl (x[0-9]+), x1, #16 ++** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** addsvl (x[0-9]+), x1, #16 ++** add (w1[2-5]), w0, #?16 ++** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_16, ++ svldr_vnum_za (w0, x1, 16), ++ svldr_vnum_za (w0, x1, 16)) ++ ++/* ++** ldr_vnum_za_w0_m1: ++** ( ++** sub (w1[2-5]), w0, #?1 ++** addsvl (x[0-9]+), x1, #-1 ++** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** addsvl (x[0-9]+), x1, #-1 ++** sub (w1[2-5]), w0, #?1 ++** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_m1, ++ svldr_vnum_za (w0, x1, -1), ++ svldr_vnum_za (w0, x1, -1)) ++ ++/* ++** ldr_vnum_za_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, ++ svldr_vnum_za (w0 + 1, x1, 0), ++ svldr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0m1_1: ++** sub (w1[2-5]), w0, #?1 ++** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, ++ svldr_vnum_za (w0 - 1, x1, 1), ++ svldr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0p2_3: ++** add (w1[2-5]), w0, #?2 ++** ldr za\[\1, 3\], \[x1(?:, #3, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, ++ svldr_vnum_za (w0 + 2, x1, 3), ++ svldr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c +new file mode 100644 +index 000000000..313b3239a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c +@@ -0,0 +1,124 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ldr_za_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_0, ++ svldr_za (0, x1), ++ svldr_za (0, x1)) ++ ++/* ++** ldr_za_1: ++** mov (w1[2-5]), #?1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_1, ++ svldr_za (1, x1), ++ svldr_za (1, x1)) ++ ++/* ++** ldr_za_w0: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0, ++ svldr_za (w0, x1), ++ svldr_za (w0, x1)) ++ ++/* ++** ldr_za_w0_1_vnum: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_1_vnum, ++ svldr_za (w0 + 1, x1 + svcntsb ()), ++ svldr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** ldr_za_w0_13_vnum: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 13\], \[x1, #13, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_13_vnum, ++ svldr_za (w0 + 13, x1 + svcntsb () * 13), ++ svldr_za (w0 + 13, x1 + svcntsb () * 13)) ++ ++/* ++** ldr_za_w0_15_vnum: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 15\], \[x1, #15, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_15_vnum, ++ svldr_za (w0 + 15, x1 + svcntsb () * 15), ++ svldr_za (w0 + 15, x1 + svcntsb () * 15)) ++ ++/* ++** ldr_za_w0_16_vnum: ++** ( ++** add (w1[2-5]), w0, #?16 ++** incb x1, all, mul #16 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_16_vnum, ++ svldr_za (w0 + 16, x1 + svcntsb () * 16), ++ svldr_za (w0 + 16, x1 + svcntsb () * 16)) ++ ++/* ++** ldr_za_w0_m1_vnum: ++** ( ++** sub (w1[2-5]), w0, #?1 ++** decb x1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** decb x1 ++** sub (w1[2-5]), w0, #?1 ++** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_m1_vnum, ++ svldr_za (w0 - 1, x1 - svcntsb ()), ++ svldr_za (w0 - 1, x1 - svcntsb ())) ++ ++/* ++** ldr_za_w0p2: ++** add (w1[2-5]), w0, #?2 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0p2, ++ svldr_za (w0 + 2, x1), ++ svldr_za (w0 + 2, x1)) ++ ++/* ++** ldr_za_offset: ++** ( ++** mov (w1[2-5]), w0 ++** add (x[0-9]+), x1, #?1 ++** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** add (x[0-9]+), x1, #?1 ++** mov (w1[2-5]), w0 ++** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_offset, ++ svldr_za (w0, x1 + 1), ++ svldr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c +new file mode 100644 +index 000000000..a27be7671 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** ldr_za_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_0, ++ svldr_za (0, x1), ++ svldr_za (0, x1)) ++ ++/* ++** ldr_za_1: ++** mov (w1[2-5]), #?1 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_1, ++ svldr_za (1, x1), ++ svldr_za (1, x1)) ++ ++/* ++** ldr_za_w0: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0, ++ svldr_za (w0, x1), ++ svldr_za (w0, x1)) ++ ++/* ++** ldr_za_w0_1_vnum: ++** mov (w1[2-5]), w0 ++** ldr za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_1_vnum, ++ svldr_za (w0 + 1, x1 + svcntsb ()), ++ svldr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** ldr_za_w0p2: ++** add (w1[2-5]), w0, #?2 ++** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0p2, ++ svldr_za (w0 + 2, x1), ++ svldr_za (w0 + 2, x1)) ++ ++/* ++** ldr_za_offset: ++** ( ++** mov (w1[2-5]), w0 ++** add (x[0-9]+), x1, #?1 ++** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** add (x[0-9]+), x1, #?1 ++** mov (w1[2-5]), w0 ++** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_offset, ++ svldr_za (w0, x1 + 1), ++ svldr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c +new file mode 100644 +index 000000000..480de2c7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c +@@ -0,0 +1,102 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** mopa_za32_s8_0_p0_p1_z0_z1: ++** smopa za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_s8_0_p0_p1_z0_z1, svint8_t, ++ svmopa_za32_s8_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_s8_0_p1_p0_z1_z0: ++** smopa za0\.s, p1/m, p0/m, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_s8_0_p1_p0_z1_z0, svint8_t, ++ svmopa_za32_s8_m (0, p1, p0, z1, z0), ++ svmopa_za32_m (0, p1, p0, z1, z0)) ++ ++/* ++** mopa_za32_s8_3_p0_p1_z0_z1: ++** smopa za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_s8_3_p0_p1_z0_z1, svint8_t, ++ svmopa_za32_s8_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_u8_0_p0_p1_z0_z1: ++** umopa za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_u8_0_p0_p1_z0_z1, svuint8_t, ++ svmopa_za32_u8_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_u8_3_p0_p1_z0_z1: ++** umopa za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_u8_3_p0_p1_z0_z1, svuint8_t, ++ svmopa_za32_u8_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_bf16_0_p0_p1_z0_z1: ++** bfmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t, ++ svmopa_za32_bf16_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_bf16_3_p0_p1_z0_z1: ++** bfmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t, ++ svmopa_za32_bf16_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f16_0_p0_p1_z0_z1: ++** fmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f16_0_p0_p1_z0_z1, svfloat16_t, ++ svmopa_za32_f16_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f16_3_p0_p1_z0_z1: ++** fmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f16_3_p0_p1_z0_z1, svfloat16_t, ++ svmopa_za32_f16_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f32_0_p0_p1_z0_z1: ++** fmopa za0\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f32_0_p0_p1_z0_z1, svfloat32_t, ++ svmopa_za32_f32_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f32_3_p0_p1_z0_z1: ++** fmopa za3\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f32_3_p0_p1_z0_z1, svfloat32_t, ++ svmopa_za32_f32_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c +new file mode 100644 +index 000000000..f523b9605 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c +@@ -0,0 +1,70 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** mopa_za64_s16_0_p0_p1_z0_z1: ++** smopa za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_s16_0_p0_p1_z0_z1, svint16_t, ++ svmopa_za64_s16_m (0, p0, p1, z0, z1), ++ svmopa_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_s16_0_p1_p0_z1_z0: ++** smopa za0\.d, p1/m, p0/m, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_s16_0_p1_p0_z1_z0, svint16_t, ++ svmopa_za64_s16_m (0, p1, p0, z1, z0), ++ svmopa_za64_m (0, p1, p0, z1, z0)) ++ ++/* ++** mopa_za64_s16_7_p0_p1_z0_z1: ++** smopa za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_s16_7_p0_p1_z0_z1, svint16_t, ++ svmopa_za64_s16_m (7, p0, p1, z0, z1), ++ svmopa_za64_m (7, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_u16_0_p0_p1_z0_z1: ++** umopa za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_u16_0_p0_p1_z0_z1, svuint16_t, ++ svmopa_za64_u16_m (0, p0, p1, z0, z1), ++ svmopa_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_u16_7_p0_p1_z0_z1: ++** umopa za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_u16_7_p0_p1_z0_z1, svuint16_t, ++ svmopa_za64_u16_m (7, p0, p1, z0, z1), ++ svmopa_za64_m (7, p0, p1, z0, z1)) ++ ++#pragma GCC target "+nosme-i16i64+sme-f64f64" ++ ++/* ++** mopa_za64_f64_0_p0_p1_z0_z1: ++** fmopa za0\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_f64_0_p0_p1_z0_z1, svfloat64_t, ++ svmopa_za64_f64_m (0, p0, p1, z0, z1), ++ svmopa_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_f64_7_p0_p1_z0_z1: ++** fmopa za7\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_f64_7_p0_p1_z0_z1, svfloat64_t, ++ svmopa_za64_f64_m (7, p0, p1, z0, z1), ++ svmopa_za64_m (7, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c +new file mode 100644 +index 000000000..63c2b80fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c +@@ -0,0 +1,102 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** mops_za32_s8_0_p0_p1_z0_z1: ++** smops za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_s8_0_p0_p1_z0_z1, svint8_t, ++ svmops_za32_s8_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_s8_0_p1_p0_z1_z0: ++** smops za0\.s, p1/m, p0/m, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_s8_0_p1_p0_z1_z0, svint8_t, ++ svmops_za32_s8_m (0, p1, p0, z1, z0), ++ svmops_za32_m (0, p1, p0, z1, z0)) ++ ++/* ++** mops_za32_s8_3_p0_p1_z0_z1: ++** smops za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_s8_3_p0_p1_z0_z1, svint8_t, ++ svmops_za32_s8_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_u8_0_p0_p1_z0_z1: ++** umops za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_u8_0_p0_p1_z0_z1, svuint8_t, ++ svmops_za32_u8_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_u8_3_p0_p1_z0_z1: ++** umops za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_u8_3_p0_p1_z0_z1, svuint8_t, ++ svmops_za32_u8_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_bf16_0_p0_p1_z0_z1: ++** bfmops za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t, ++ svmops_za32_bf16_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_bf16_3_p0_p1_z0_z1: ++** bfmops za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t, ++ svmops_za32_bf16_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f16_0_p0_p1_z0_z1: ++** fmops za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f16_0_p0_p1_z0_z1, svfloat16_t, ++ svmops_za32_f16_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f16_3_p0_p1_z0_z1: ++** fmops za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f16_3_p0_p1_z0_z1, svfloat16_t, ++ svmops_za32_f16_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f32_0_p0_p1_z0_z1: ++** fmops za0\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f32_0_p0_p1_z0_z1, svfloat32_t, ++ svmops_za32_f32_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f32_3_p0_p1_z0_z1: ++** fmops za3\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f32_3_p0_p1_z0_z1, svfloat32_t, ++ svmops_za32_f32_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c +new file mode 100644 +index 000000000..bc04c3cf7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c +@@ -0,0 +1,70 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** mops_za64_s16_0_p0_p1_z0_z1: ++** smops za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_s16_0_p0_p1_z0_z1, svint16_t, ++ svmops_za64_s16_m (0, p0, p1, z0, z1), ++ svmops_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_s16_0_p1_p0_z1_z0: ++** smops za0\.d, p1/m, p0/m, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_s16_0_p1_p0_z1_z0, svint16_t, ++ svmops_za64_s16_m (0, p1, p0, z1, z0), ++ svmops_za64_m (0, p1, p0, z1, z0)) ++ ++/* ++** mops_za64_s16_7_p0_p1_z0_z1: ++** smops za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_s16_7_p0_p1_z0_z1, svint16_t, ++ svmops_za64_s16_m (7, p0, p1, z0, z1), ++ svmops_za64_m (7, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_u16_0_p0_p1_z0_z1: ++** umops za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_u16_0_p0_p1_z0_z1, svuint16_t, ++ svmops_za64_u16_m (0, p0, p1, z0, z1), ++ svmops_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_u16_7_p0_p1_z0_z1: ++** umops za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_u16_7_p0_p1_z0_z1, svuint16_t, ++ svmops_za64_u16_m (7, p0, p1, z0, z1), ++ svmops_za64_m (7, p0, p1, z0, z1)) ++ ++#pragma GCC target "+nosme-i16i64+sme-f64f64" ++ ++/* ++** mops_za64_f64_0_p0_p1_z0_z1: ++** fmops za0\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_f64_0_p0_p1_z0_z1, svfloat64_t, ++ svmops_za64_f64_m (0, p0, p1, z0, z1), ++ svmops_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_f64_7_p0_p1_z0_z1: ++** fmops za7\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_f64_7_p0_p1_z0_z1, svfloat64_t, ++ svmops_za64_f64_m (7, p0, p1, z0, z1), ++ svmops_za64_m (7, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c +new file mode 100644 +index 000000000..c8eef3b16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c +@@ -0,0 +1,435 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za128_s8_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, 0), ++ z0 = svread_hor_za128_m (z0, p0, 0, 0)) ++ ++/* ++** read_za128_s8_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, 1), ++ z0 = svread_hor_za128_m (z0, p0, 0, 1)) ++ ++/* ++** read_za128_s8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s8_0_w0p1_tied: ++** add (w1[2-5]), w0, #?1 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za128_s8_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za128_s8_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za1h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_1_w0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 1, w0), ++ z0 = svread_hor_za128_m (z0, p0, 1, w0)) ++ ++/* ++** read_za128_s8_15_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za15h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_15_w0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 15, w0), ++ z0 = svread_hor_za128_m (z0, p0, 15, w0)) ++ ++/* ++** read_za128_s8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_untied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, ++ z0 = svread_hor_za128_u8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, ++ z0 = svread_hor_za128_u8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_tied, svint16_t, ++ z0 = svread_hor_za128_s16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_untied, svint16_t, ++ z0 = svread_hor_za128_s16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_tied, svuint16_t, ++ z0 = svread_hor_za128_u16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_untied, svuint16_t, ++ z0 = svread_hor_za128_u16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_hor_za128_f16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_hor_za128_f16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_hor_za128_bf16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_hor_za128_bf16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_tied, svint32_t, ++ z0 = svread_hor_za128_s32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_untied, svint32_t, ++ z0 = svread_hor_za128_s32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_tied, svuint32_t, ++ z0 = svread_hor_za128_u32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_untied, svuint32_t, ++ z0 = svread_hor_za128_u32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_hor_za128_f32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_hor_za128_f32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_tied, svint64_t, ++ z0 = svread_hor_za128_s64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_untied, svint64_t, ++ z0 = svread_hor_za128_s64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_tied, svuint64_t, ++ z0 = svread_hor_za128_u64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_untied, svuint64_t, ++ z0 = svread_hor_za128_u64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_hor_za128_f64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0h\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0h\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_hor_za128_f64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c +new file mode 100644 +index 000000000..2e0a96591 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za16_s16_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, 0), ++ z0 = svread_hor_za16_m (z0, p0, 0, 0)) ++ ++/* ++** read_za16_s16_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, 1), ++ z0 = svread_hor_za16_m (z0, p0, 0, 1)) ++ ++/* ++** read_za16_s16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_s16_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p1_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za16_s16_0_w0p7_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\1, 7\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p7_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 7), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0p8_tied: ++** add (w1[2-5]), w0, #?8 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p8_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 8), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 8)) ++ ++/* ++** read_za16_s16_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0m1_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za16_s16_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za1h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0), ++ z0 = svread_hor_za16_m (z0, p0, 1, w0)) ++ ++/* ++** read_za16_s16_1_w0p7_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za1h\.h\[\1, 7\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0p7_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0 + 7), ++ z0 = svread_hor_za16_m (z0, p0, 1, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0h\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_untied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_tied, svuint16_t, ++ z0 = svread_hor_za16_u16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0h\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_untied, svuint16_t, ++ z0 = svread_hor_za16_u16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_hor_za16_f16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0h\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_hor_za16_f16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_hor_za16_bf16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0h\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0h\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_hor_za16_bf16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c +new file mode 100644 +index 000000000..d111b60a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c +@@ -0,0 +1,196 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za32_s32_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, 0), ++ z0 = svread_hor_za32_m (z0, p0, 0, 0)) ++ ++/* ++** read_za32_s32_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, 1), ++ z0 = svread_hor_za32_m (z0, p0, 0, 1)) ++ ++/* ++** read_za32_s32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_s32_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p1_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za32_s32_0_w0p3_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\1, 3\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p3_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 3), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0p4_tied: ++** add (w1[2-5]), w0, #?4 ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p4_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 4), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 4)) ++ ++/* ++** read_za32_s32_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0m1_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za32_s32_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za1h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0), ++ z0 = svread_hor_za32_m (z0, p0, 1, w0)) ++ ++/* ++** read_za32_s32_1_w0p3_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za1h\.s\[\1, 3\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0p3_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0 + 3), ++ z0 = svread_hor_za32_m (z0, p0, 1, w0 + 3)) ++ ++/* ++** read_za32_s32_3_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za3h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0), ++ z0 = svread_hor_za32_m (z0, p0, 3, w0)) ++ ++/* ++** read_za32_s32_3_w0p3_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za3h\.s\[\1, 3\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0p3_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0 + 3), ++ z0 = svread_hor_za32_m (z0, p0, 3, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.s, p0/m, za0h\.s\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_untied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_tied, svuint32_t, ++ z0 = svread_hor_za32_u32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.s, p0/m, za0h\.s\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_untied, svuint32_t, ++ z0 = svread_hor_za32_u32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_hor_za32_f32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0h\.s\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0h\.s\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.s, p0/m, za0h\.s\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_hor_za32_f32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za32_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c +new file mode 100644 +index 000000000..b75c531a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c +@@ -0,0 +1,186 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za64_s64_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, 0), ++ z0 = svread_hor_za64_m (z0, p0, 0, 0)) ++ ++/* ++** read_za64_s64_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, 1), ++ z0 = svread_hor_za64_m (z0, p0, 0, 1)) ++ ++/* ++** read_za64_s64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_s64_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0p2_tied: ++** add (w1[2-5]), w0, #?2 ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p2_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 2), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 2)) ++ ++/* ++** read_za64_s64_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0m1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za64_s64_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za1h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0), ++ z0 = svread_hor_za64_m (z0, p0, 1, w0)) ++ ++/* ++** read_za64_s64_1_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za1h\.d\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0p1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0 + 1), ++ z0 = svread_hor_za64_m (z0, p0, 1, w0 + 1)) ++ ++/* ++** read_za64_s64_7_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za7h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0), ++ z0 = svread_hor_za64_m (z0, p0, 7, w0)) ++ ++/* ++** read_za64_s64_7_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za7h\.d\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0p1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0 + 1), ++ z0 = svread_hor_za64_m (z0, p0, 7, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.d, p0/m, za0h\.d\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_untied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_tied, svuint64_t, ++ z0 = svread_hor_za64_u64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.d, p0/m, za0h\.d\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_untied, svuint64_t, ++ z0 = svread_hor_za64_u64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_hor_za64_f64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0h\.d\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0h\.d\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.d, p0/m, za0h\.d\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_hor_za64_f64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za64_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c +new file mode 100644 +index 000000000..0ad5a953f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za8_s8_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, 0), ++ z0 = svread_hor_za8_m (z0, p0, 0, 0)) ++ ++/* ++** read_za8_s8_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, 1), ++ z0 = svread_hor_za8_m (z0, p0, 0, 1)) ++ ++/* ++** read_za8_s8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_s8_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0h\.b\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za8_s8_0_w0p15_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0h\.b\[\1, 15\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p15_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 15), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 15)) ++ ++/* ++** read_za8_s8_0_w0p16_tied: ++** add (w1[2-5]), w0, #?16 ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p16_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 16), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 16)) ++ ++/* ++** read_za8_s8_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za8_s8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0h\.b\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.b, p0/m, za0h\.b\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_untied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za8_m (z1, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, ++ z0 = svread_hor_za8_u8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0h\.b\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0h\.b\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.b, p0/m, za0h\.b\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, ++ z0 = svread_hor_za8_u8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za8_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c +new file mode 100644 +index 000000000..93d5d60ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c +@@ -0,0 +1,435 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za128_s8_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, 0), ++ z0 = svread_ver_za128_m (z0, p0, 0, 0)) ++ ++/* ++** read_za128_s8_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, 1), ++ z0 = svread_ver_za128_m (z0, p0, 0, 1)) ++ ++/* ++** read_za128_s8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s8_0_w0p1_tied: ++** add (w1[2-5]), w0, #?1 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za128_s8_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za128_s8_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za1v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_1_w0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 1, w0), ++ z0 = svread_ver_za128_m (z0, p0, 1, w0)) ++ ++/* ++** read_za128_s8_15_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za15v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_15_w0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 15, w0), ++ z0 = svread_ver_za128_m (z0, p0, 15, w0)) ++ ++/* ++** read_za128_s8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_untied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, ++ z0 = svread_ver_za128_u8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, ++ z0 = svread_ver_za128_u8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_tied, svint16_t, ++ z0 = svread_ver_za128_s16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_untied, svint16_t, ++ z0 = svread_ver_za128_s16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_tied, svuint16_t, ++ z0 = svread_ver_za128_u16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_untied, svuint16_t, ++ z0 = svread_ver_za128_u16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_ver_za128_f16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_ver_za128_f16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_ver_za128_bf16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_ver_za128_bf16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_tied, svint32_t, ++ z0 = svread_ver_za128_s32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_untied, svint32_t, ++ z0 = svread_ver_za128_s32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_tied, svuint32_t, ++ z0 = svread_ver_za128_u32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_untied, svuint32_t, ++ z0 = svread_ver_za128_u32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_ver_za128_f32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_ver_za128_f32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_tied, svint64_t, ++ z0 = svread_ver_za128_s64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_untied, svint64_t, ++ z0 = svread_ver_za128_s64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_tied, svuint64_t, ++ z0 = svread_ver_za128_u64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_untied, svuint64_t, ++ z0 = svread_ver_za128_u64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_ver_za128_f64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.q, p0/m, za0v\.q\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.q, p0/m, za0v\.q\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_ver_za128_f64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c +new file mode 100644 +index 000000000..d0353dce6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za16_s16_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, 0), ++ z0 = svread_ver_za16_m (z0, p0, 0, 0)) ++ ++/* ++** read_za16_s16_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, 1), ++ z0 = svread_ver_za16_m (z0, p0, 0, 1)) ++ ++/* ++** read_za16_s16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_s16_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p1_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za16_s16_0_w0p7_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\1, 7\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p7_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 7), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0p8_tied: ++** add (w1[2-5]), w0, #?8 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p8_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 8), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 8)) ++ ++/* ++** read_za16_s16_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0m1_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za16_s16_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za1v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0), ++ z0 = svread_ver_za16_m (z0, p0, 1, w0)) ++ ++/* ++** read_za16_s16_1_w0p7_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za1v\.h\[\1, 7\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0p7_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0 + 7), ++ z0 = svread_ver_za16_m (z0, p0, 1, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0v\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_untied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_tied, svuint16_t, ++ z0 = svread_ver_za16_u16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0v\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_untied, svuint16_t, ++ z0 = svread_ver_za16_u16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_ver_za16_f16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0v\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_ver_za16_f16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_ver_za16_bf16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.h, p0/m, za0v\.h\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.h, p0/m, za0v\.h\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_ver_za16_bf16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c +new file mode 100644 +index 000000000..362e818ee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c +@@ -0,0 +1,196 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za32_s32_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, 0), ++ z0 = svread_ver_za32_m (z0, p0, 0, 0)) ++ ++/* ++** read_za32_s32_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, 1), ++ z0 = svread_ver_za32_m (z0, p0, 0, 1)) ++ ++/* ++** read_za32_s32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_s32_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p1_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za32_s32_0_w0p3_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\1, 3\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p3_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 3), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0p4_tied: ++** add (w1[2-5]), w0, #?4 ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p4_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 4), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 4)) ++ ++/* ++** read_za32_s32_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0m1_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za32_s32_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za1v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0), ++ z0 = svread_ver_za32_m (z0, p0, 1, w0)) ++ ++/* ++** read_za32_s32_1_w0p3_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za1v\.s\[\1, 3\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0p3_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0 + 3), ++ z0 = svread_ver_za32_m (z0, p0, 1, w0 + 3)) ++ ++/* ++** read_za32_s32_3_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za3v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0), ++ z0 = svread_ver_za32_m (z0, p0, 3, w0)) ++ ++/* ++** read_za32_s32_3_w0p3_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za3v\.s\[\1, 3\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0p3_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0 + 3), ++ z0 = svread_ver_za32_m (z0, p0, 3, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.s, p0/m, za0v\.s\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_untied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_tied, svuint32_t, ++ z0 = svread_ver_za32_u32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.s, p0/m, za0v\.s\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_untied, svuint32_t, ++ z0 = svread_ver_za32_u32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_ver_za32_f32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0v\.s\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.s, p0/m, za0v\.s\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.s, p0/m, za0v\.s\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_ver_za32_f32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za32_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c +new file mode 100644 +index 000000000..dba3c6ffa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c +@@ -0,0 +1,186 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za64_s64_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, 0), ++ z0 = svread_ver_za64_m (z0, p0, 0, 0)) ++ ++/* ++** read_za64_s64_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, 1), ++ z0 = svread_ver_za64_m (z0, p0, 0, 1)) ++ ++/* ++** read_za64_s64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_s64_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0p2_tied: ++** add (w1[2-5]), w0, #?2 ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p2_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 2), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 2)) ++ ++/* ++** read_za64_s64_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0m1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za64_s64_1_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za1v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0), ++ z0 = svread_ver_za64_m (z0, p0, 1, w0)) ++ ++/* ++** read_za64_s64_1_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za1v\.d\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0p1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0 + 1), ++ z0 = svread_ver_za64_m (z0, p0, 1, w0 + 1)) ++ ++/* ++** read_za64_s64_7_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za7v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0), ++ z0 = svread_ver_za64_m (z0, p0, 7, w0)) ++ ++/* ++** read_za64_s64_7_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za7v\.d\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0p1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0 + 1), ++ z0 = svread_ver_za64_m (z0, p0, 7, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.d, p0/m, za0v\.d\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_untied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_tied, svuint64_t, ++ z0 = svread_ver_za64_u64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.d, p0/m, za0v\.d\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_untied, svuint64_t, ++ z0 = svread_ver_za64_u64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_ver_za64_f64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0v\.d\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.d, p0/m, za0v\.d\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.d, p0/m, za0v\.d\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_ver_za64_f64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za64_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c +new file mode 100644 +index 000000000..87564d1fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za8_s8_0_0_tied: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, 0), ++ z0 = svread_ver_za8_m (z0, p0, 0, 0)) ++ ++/* ++** read_za8_s8_0_1_tied: ++** mov (w1[2-5]), #?1 ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, 1), ++ z0 = svread_ver_za8_m (z0, p0, 0, 1)) ++ ++/* ++** read_za8_s8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_s8_0_w0p1_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0v\.b\[\1, 1\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za8_s8_0_w0p15_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0v\.b\[\1, 15\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p15_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 15), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 15)) ++ ++/* ++** read_za8_s8_0_w0p16_tied: ++** add (w1[2-5]), w0, #?16 ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p16_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 16), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 16)) ++ ++/* ++** read_za8_s8_0_w0m1_tied: ++** sub (w1[2-5]), w0, #?1 ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za8_s8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0v\.b\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.b, p0/m, za0v\.b\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_untied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za8_m (z1, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_tied: ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, ++ z0 = svread_ver_za8_u8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_untied: ++** ( ++** mov (w1[2-5]), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0v\.b\[\1, 0\] ++** | ++** mov z0\.d, z1\.d ++** mov (w1[2-5]), w0 ++** mova z0\.b, p0/m, za0v\.b\[\2, 0\] ++** | ++** mov (w1[2-5]), w0 ++** mova z1\.b, p0/m, za0v\.b\[\3, 0\] ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, ++ z0 = svread_ver_za8_u8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za8_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c +new file mode 100644 +index 000000000..057b6f21e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za128_0_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_0_0_0, ++ svst1_hor_vnum_za128 (0, 0, p0, x1, 0), ++ svst1_hor_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_7_1_0: ++** mov (w1[2-5]), #?1 ++** st1q { za7h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_7_1_0, ++ svst1_hor_vnum_za128 (7, 1, p0, x1, 0), ++ svst1_hor_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w1[2-5]), #?6 ++** st1q { za11h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_1_5, ++ svst1_hor_vnum_za128 (11, 1, p0, x1, 5), ++ svst1_hor_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** st1_vnum_za128_3_w0_0: ++** mov (w1[2-5]), w0 ++** st1q { za3h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_3_w0_0, ++ svst1_hor_vnum_za128 (3, w0, p0, x1, 0), ++ svst1_hor_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1q { za5h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_5_w0_0, ++ svst1_hor_vnum_za128 (5, w0, p0, x1, 13), ++ svst1_hor_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za128_11_w0_0: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1q { za11h\.q\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_w0_0, ++ svst1_hor_vnum_za128 (11, w0, p0, x1, x2), ++ svst1_hor_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za128_15_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** st1q { za15h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_15_w0p1_0, ++ svst1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c +new file mode 100644 +index 000000000..0b57dda0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_0_1, ++ svst1_hor_vnum_za16 (1, 0, p0, x1, 1), ++ svst1_hor_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_1_1, ++ svst1_hor_vnum_za16 (1, 1, p0, x1, 1), ++ svst1_hor_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?8 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_0_8, ++ svst1_hor_vnum_za16 (0, 0, p0, x1, 8), ++ svst1_hor_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?9 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_1_8, ++ svst1_hor_vnum_za16 (0, 1, p0, x1, 8), ++ svst1_hor_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_0, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1h { za0h\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_1, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w1[2-5]), w0 ++** st1h { za0h\.h\[\1, 7\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_7, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 7), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** st1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w1[2-5]), w0, #?8 ++** st1h { za1h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_8, ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 8), ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1h { za1h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_13, ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 13), ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za16_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1h { za0h\.h\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_x2, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, x2), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za16_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0p1_0, ++ svst1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c +new file mode 100644 +index 000000000..d4381182f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1w { za3h\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_0_1, ++ svst1_hor_vnum_za32 (3, 0, p0, x1, 1), ++ svst1_hor_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1w { za2h\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_1, ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 1), ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?4 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_0_4, ++ svst1_hor_vnum_za32 (0, 0, p0, x1, 4), ++ svst1_hor_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?5 ++** st1w { za2h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_4, ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 4), ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_0, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1w { za0h\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_1, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w1[2-5]), w0 ++** st1w { za0h\.s\[\1, 3\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_3, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 3), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** st1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w1[2-5]), w0, #?4 ++** st1w { za1h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0_4, ++ svst1_hor_vnum_za32 (1, w0, p0, x1, 4), ++ svst1_hor_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1w { za3h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_w0_13, ++ svst1_hor_vnum_za32 (3, w0, p0, x1, 13), ++ svst1_hor_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za32_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1w { za0h\.s\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_x2, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, x2), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za32_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1w { za1h\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0p1_0, ++ svst1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c +new file mode 100644 +index 000000000..be6063712 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1d { za3h\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_3_0_1, ++ svst1_hor_vnum_za64 (3, 0, p0, x1, 1), ++ svst1_hor_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1d { za7h\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_7_1_1, ++ svst1_hor_vnum_za64 (7, 1, p0, x1, 1), ++ svst1_hor_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?2 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_0_2, ++ svst1_hor_vnum_za64 (0, 0, p0, x1, 2), ++ svst1_hor_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?3 ++** st1d { za5h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_5_1_2, ++ svst1_hor_vnum_za64 (5, 1, p0, x1, 2), ++ svst1_hor_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_0, ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1d { za0h\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_1, ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w1[2-5]), w0, #?2 ++** st1d { za6h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_6_w0_2, ++ svst1_hor_vnum_za64 (6, w0, p0, x1, 2), ++ svst1_hor_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1d { za2h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_2_w0_13, ++ svst1_hor_vnum_za64 (2, w0, p0, x1, 13), ++ svst1_hor_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za64_4_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1d { za4h\.d\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_4_w0_x2, ++ svst1_hor_vnum_za64 (4, w0, p0, x1, x2), ++ svst1_hor_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za64_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1d { za1h\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_1_w0p1_0, ++ svst1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c +new file mode 100644 +index 000000000..eed41d25e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_1, ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 1), ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_1, ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 1), ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?16 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_16, ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 16), ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?17 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_16, ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 16), ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_0, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_1, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 15\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_15, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 15), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** st1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_16, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 16), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_x2: ++** cntb (x[0-9]+) ++** mul (x[0-9]+), (?:\1, x2|x2, \1) ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1b { za0h\.b\[\3, 0\] }, p0, \[x1, \2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_x2, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, x2), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za8_0_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0p1_0, ++ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c +new file mode 100644 +index 000000000..5f3d613d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za128_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_0, ++ svst1_hor_za128 (0, 0, p0, x1), ++ svst1_hor_za128 (0, 0, p0, x1)) ++ ++/* ++** st1_za128_0_1: ++** mov (w1[2-5]), #?1 ++** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_1, ++ svst1_hor_za128 (0, 1, p0, x1), ++ svst1_hor_za128 (0, 1, p0, x1)) ++ ++/* ++** st1_za128_0_w0: ++** mov (w1[2-5]), w0 ++** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0, ++ svst1_hor_za128 (0, w0, p0, x1), ++ svst1_hor_za128 (0, w0, p0, x1)) ++ ++/* ++** st1_za128_0_w0_p1: ++** add (w1[2-5]), w0, #?1 ++** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0_p1, ++ svst1_hor_za128 (0, w0 + 1, p0, x1), ++ svst1_hor_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za128_7_w0: ++** mov (w1[2-5]), w0 ++** st1q { za7h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_7_w0, ++ svst1_hor_za128 (7, w0, p0, x1), ++ svst1_hor_za128 (7, w0, p0, x1)) ++ ++/* ++** st1_za128_13_w0: ++** mov (w1[2-5]), w0 ++** st1q { za13h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_13_w0, ++ svst1_hor_za128 (13, w0, p0, x1), ++ svst1_hor_za128 (13, w0, p0, x1)) ++ ++/* ++** st1_za128_15_w0: ++** mov (w1[2-5]), w0 ++** st1q { za15h\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_15_w0, ++ svst1_hor_za128 (15, w0, p0, x1), ++ svst1_hor_za128 (15, w0, p0, x1)) ++ ++/* ++** st1_za128_9_w0_index: ++** mov (w1[2-5]), w0 ++** st1q { za9h\.q\[\1, 0\] }, p0, \[x1, x2, lsl #?4\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_9_w0_index, ++ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16), ++ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c +new file mode 100644 +index 000000000..206306b23 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za16_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_0, ++ svst1_hor_za16 (0, 0, p0, x1), ++ svst1_hor_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** st1_za16_0_7: ++** mov (w1[2-5]), #?7 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_7, ++ svst1_hor_za16 (0, 7, p0, x1), ++ svst1_hor_za16 (0, 7, p0, x1)) ++ ++/* ++** st1_za16_0_8: ++** mov (w1[2-5]), #?8 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_8, ++ svst1_hor_za16 (0, 8, p0, x1), ++ svst1_hor_za16 (0, 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0: ++** mov (w1[2-5]), w0 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0, ++ svst1_hor_za16 (0, w0, p0, x1), ++ svst1_hor_za16 (0, w0, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1h { za0h\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p1, ++ svst1_hor_za16 (0, w0 + 1, p0, x1), ++ svst1_hor_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p7: ++** mov (w1[2-5]), w0 ++** st1h { za0h\.h\[\1, 7\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p7, ++ svst1_hor_za16 (0, w0 + 7, p0, x1), ++ svst1_hor_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0: ++** mov (w1[2-5]), w0 ++** st1h { za1h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0, ++ svst1_hor_za16 (1, w0, p0, x1), ++ svst1_hor_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** st1_za16_1_w0_p1: ++** mov (w1[2-5]), w0 ++** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p1, ++ svst1_hor_za16 (1, w0 + 1, p0, x1), ++ svst1_hor_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p7: ++** mov (w1[2-5]), w0 ++** st1h { za1h\.h\[\1, 7\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p7, ++ svst1_hor_za16 (1, w0 + 7, p0, x1), ++ svst1_hor_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p5_index: ++** mov (w1[2-5]), w0 ++** st1h { za1h\.h\[\1, 5\] }, p0, \[x1, x2, lsl #?1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p5_index, ++ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** st1_za16_0_w0_p8: ++** add (w1[2-5]), w0, #?8 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p8, ++ svst1_hor_za16 (0, w0 + 8, p0, x1), ++ svst1_hor_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_m1, ++ svst1_hor_za16 (0, w0 - 1, p0, x1), ++ svst1_hor_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c +new file mode 100644 +index 000000000..ed9b2b2e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za32_0_0: ++** mov (w1[2-5]), (?:w0|#?0) ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_0, ++ svst1_hor_za32 (0, 0, p0, x1), ++ svst1_hor_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** st1_za32_0_3: ++** mov (w1[2-5]), #?3 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_3, ++ svst1_hor_za32 (0, 3, p0, x1), ++ svst1_hor_za32 (0, 3, p0, x1)) ++ ++/* ++** st1_za32_0_4: ++** mov (w1[2-5]), #?4 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_4, ++ svst1_hor_za32 (0, 4, p0, x1), ++ svst1_hor_za32 (0, 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0: ++** mov (w1[2-5]), w0 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0, ++ svst1_hor_za32 (0, w0, p0, x1), ++ svst1_hor_za32 (0, w0, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1w { za0h\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p1, ++ svst1_hor_za32 (0, w0 + 1, p0, x1), ++ svst1_hor_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p3: ++** mov (w1[2-5]), w0 ++** st1w { za0h\.s\[\1, 3\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p3, ++ svst1_hor_za32 (0, w0 + 3, p0, x1), ++ svst1_hor_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_3_w0: ++** mov (w1[2-5]), w0 ++** st1w { za3h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0, ++ svst1_hor_za32 (3, w0, p0, x1), ++ svst1_hor_za32 (3, w0, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p1: ++** mov (w1[2-5]), w0 ++** st1w { za3h\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p1, ++ svst1_hor_za32 (3, w0 + 1, p0, x1), ++ svst1_hor_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p3: ++** mov (w1[2-5]), w0 ++** st1w { za3h\.s\[\1, 3\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p3, ++ svst1_hor_za32 (3, w0 + 3, p0, x1), ++ svst1_hor_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_1_w0_p2_index: ++** mov (w1[2-5]), w0 ++** st1w { za1h\.s\[\1, 2\] }, p0, \[x1, x2, lsl #?2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_1_w0_p2_index, ++ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** st1_za32_0_w0_p4: ++** add (w1[2-5]), w0, #?4 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p4, ++ svst1_hor_za32 (0, w0 + 4, p0, x1), ++ svst1_hor_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_m1, ++ svst1_hor_za32 (0, w0 - 1, p0, x1), ++ svst1_hor_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c +new file mode 100644 +index 000000000..3600f5b8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za64_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_0, ++ svst1_hor_za64 (0, 0, p0, x1), ++ svst1_hor_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** st1_za64_0_1: ++** mov (w1[2-5]), #?1 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_1, ++ svst1_hor_za64 (0, 1, p0, x1), ++ svst1_hor_za64 (0, 1, p0, x1)) ++ ++/* ++** st1_za64_0_2: ++** mov (w1[2-5]), #?2 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_2, ++ svst1_hor_za64 (0, 2, p0, x1), ++ svst1_hor_za64 (0, 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0: ++** mov (w1[2-5]), w0 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0, ++ svst1_hor_za64 (0, w0, p0, x1), ++ svst1_hor_za64 (0, w0, p0, x1)) ++ ++/* ++** st1_za64_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1d { za0h\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p1, ++ svst1_hor_za64 (0, w0 + 1, p0, x1), ++ svst1_hor_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_7_w0: ++** mov (w1[2-5]), w0 ++** st1d { za7h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0, ++ svst1_hor_za64 (7, w0, p0, x1), ++ svst1_hor_za64 (7, w0, p0, x1)) ++ ++/* ++** st1_za64_7_w0_p1: ++** mov (w1[2-5]), w0 ++** st1d { za7h\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0_p1, ++ svst1_hor_za64 (7, w0 + 1, p0, x1), ++ svst1_hor_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_5_w0_p1_index: ++** mov (w1[2-5]), w0 ++** st1d { za5h\.d\[\1, 1\] }, p0, \[x1, x2, lsl #?3\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_5_w0_p1_index, ++ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** st1_za64_0_w0_p2: ++** add (w1[2-5]), w0, #?2 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p2, ++ svst1_hor_za64 (0, w0 + 2, p0, x1), ++ svst1_hor_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_m1, ++ svst1_hor_za64 (0, w0 - 1, p0, x1), ++ svst1_hor_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c +new file mode 100644 +index 000000000..9026fae9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za8_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_0, ++ svst1_hor_za8 (0, 0, p0, x1), ++ svst1_hor_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** st1_za8_0_15: ++** mov (w1[2-5]), #?15 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_15, ++ svst1_hor_za8 (0, 15, p0, x1), ++ svst1_hor_za8 (0, 15, p0, x1)) ++ ++/* ++** st1_za8_0_16: ++** mov (w1[2-5]), #?16 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_16, ++ svst1_hor_za8 (0, 16, p0, x1), ++ svst1_hor_za8 (0, 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0: ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0, ++ svst1_hor_za8 (0, w0, p0, x1), ++ svst1_hor_za8 (0, w0, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p1, ++ svst1_hor_za8 (0, w0 + 1, p0, x1), ++ svst1_hor_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p15: ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 15\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p15, ++ svst1_hor_za8 (0, w0 + 15, p0, x1), ++ svst1_hor_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p13_index: ++** mov (w1[2-5]), w0 ++** st1b { za0h\.b\[\1, 15\] }, p0, \[x1, x2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p13_index, ++ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2), ++ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** st1_za8_0_w0_p16: ++** add (w1[2-5]), w0, #?16 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p16, ++ svst1_hor_za8 (0, w0 + 16, p0, x1), ++ svst1_hor_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_m1, ++ svst1_hor_za8 (0, w0 - 1, p0, x1), ++ svst1_hor_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c +new file mode 100644 +index 000000000..210687a48 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za128_0_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_0_0_0, ++ svst1_ver_vnum_za128 (0, 0, p0, x1, 0), ++ svst1_ver_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_7_1_0: ++** mov (w1[2-5]), #?1 ++** st1q { za7v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_7_1_0, ++ svst1_ver_vnum_za128 (7, 1, p0, x1, 0), ++ svst1_ver_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w1[2-5]), #?6 ++** st1q { za11v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_1_5, ++ svst1_ver_vnum_za128 (11, 1, p0, x1, 5), ++ svst1_ver_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** st1_vnum_za128_3_w0_0: ++** mov (w1[2-5]), w0 ++** st1q { za3v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_3_w0_0, ++ svst1_ver_vnum_za128 (3, w0, p0, x1, 0), ++ svst1_ver_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1q { za5v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_5_w0_0, ++ svst1_ver_vnum_za128 (5, w0, p0, x1, 13), ++ svst1_ver_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za128_11_w0_0: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1q { za11v\.q\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_w0_0, ++ svst1_ver_vnum_za128 (11, w0, p0, x1, x2), ++ svst1_ver_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za128_15_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** st1q { za15v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_15_w0p1_0, ++ svst1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c +new file mode 100644 +index 000000000..f75a22402 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_0_1, ++ svst1_ver_vnum_za16 (1, 0, p0, x1, 1), ++ svst1_ver_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_1_1, ++ svst1_ver_vnum_za16 (1, 1, p0, x1, 1), ++ svst1_ver_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?8 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_0_8, ++ svst1_ver_vnum_za16 (0, 0, p0, x1, 8), ++ svst1_ver_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w1[2-5]), #?9 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_1_8, ++ svst1_ver_vnum_za16 (0, 1, p0, x1, 8), ++ svst1_ver_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_0, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1h { za0v\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_1, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w1[2-5]), w0 ++** st1h { za0v\.h\[\1, 7\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_7, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 7), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** st1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w1[2-5]), w0, #?8 ++** st1h { za1v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_8, ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 8), ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1h { za1v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_13, ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 13), ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za16_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1h { za0v\.h\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_x2, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, x2), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za16_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0p1_0, ++ svst1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c +new file mode 100644 +index 000000000..45db67a9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1w { za3v\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_0_1, ++ svst1_ver_vnum_za32 (3, 0, p0, x1, 1), ++ svst1_ver_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1w { za2v\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_1, ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 1), ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?4 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_0_4, ++ svst1_ver_vnum_za32 (0, 0, p0, x1, 4), ++ svst1_ver_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w1[2-5]), #?5 ++** st1w { za2v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_4, ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 4), ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_0, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1w { za0v\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_1, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w1[2-5]), w0 ++** st1w { za0v\.s\[\1, 3\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_3, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 3), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** st1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w1[2-5]), w0, #?4 ++** st1w { za1v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0_4, ++ svst1_ver_vnum_za32 (1, w0, p0, x1, 4), ++ svst1_ver_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1w { za3v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_w0_13, ++ svst1_ver_vnum_za32 (3, w0, p0, x1, 13), ++ svst1_ver_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za32_0_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1w { za0v\.s\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_x2, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, x2), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za32_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1w { za1v\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0p1_0, ++ svst1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c +new file mode 100644 +index 000000000..bd061fc61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1d { za3v\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_3_0_1, ++ svst1_ver_vnum_za64 (3, 0, p0, x1, 1), ++ svst1_ver_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1d { za7v\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_7_1_1, ++ svst1_ver_vnum_za64 (7, 1, p0, x1, 1), ++ svst1_ver_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?2 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_0_2, ++ svst1_ver_vnum_za64 (0, 0, p0, x1, 2), ++ svst1_ver_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w1[2-5]), #?3 ++** st1d { za5v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_5_1_2, ++ svst1_ver_vnum_za64 (5, 1, p0, x1, 2), ++ svst1_ver_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_0, ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1d { za0v\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_1, ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w1[2-5]), w0, #?2 ++** st1d { za6v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_6_w0_2, ++ svst1_ver_vnum_za64 (6, w0, p0, x1, 2), ++ svst1_ver_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w1[2-5]), w0, #?13 ++** st1d { za2v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_2_w0_13, ++ svst1_ver_vnum_za64 (2, w0, p0, x1, 13), ++ svst1_ver_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za64_4_w0_x2: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1d { za4v\.d\[\3, 0\] }, p0, \[\2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_4_w0_x2, ++ svst1_ver_vnum_za64 (4, w0, p0, x1, x2), ++ svst1_ver_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za64_1_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1d { za1v\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_1_w0p1_0, ++ svst1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c +new file mode 100644 +index 000000000..b15a7eb08 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_1, ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 1), ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w1[2-5]), #?1 ++** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_1, ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 1), ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?16 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_16, ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 16), ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w1[2-5]), #?17 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_16, ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 16), ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_0: ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_0, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_1, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 15\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_15, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 15), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** st1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_16, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 16), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_x2: ++** cntb (x[0-9]+) ++** mul (x[0-9]+), (?:\1, x2|x2, \1) ++** add (w1[2-5]), (?:w0, w2|w2, w0) ++** st1b { za0v\.b\[\3, 0\] }, p0, \[x1, \2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_x2, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, x2), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za8_0_w0p1_0: ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0p1_0, ++ svst1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c +new file mode 100644 +index 000000000..7be6d5a5f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za128_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_0, ++ svst1_ver_za128 (0, 0, p0, x1), ++ svst1_ver_za128 (0, 0, p0, x1)) ++ ++/* ++** st1_za128_0_1: ++** mov (w1[2-5]), #?1 ++** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_1, ++ svst1_ver_za128 (0, 1, p0, x1), ++ svst1_ver_za128 (0, 1, p0, x1)) ++ ++/* ++** st1_za128_0_w0: ++** mov (w1[2-5]), w0 ++** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0, ++ svst1_ver_za128 (0, w0, p0, x1), ++ svst1_ver_za128 (0, w0, p0, x1)) ++ ++/* ++** st1_za128_0_w0_p1: ++** add (w1[2-5]), w0, #?1 ++** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0_p1, ++ svst1_ver_za128 (0, w0 + 1, p0, x1), ++ svst1_ver_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za128_7_w0: ++** mov (w1[2-5]), w0 ++** st1q { za7v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_7_w0, ++ svst1_ver_za128 (7, w0, p0, x1), ++ svst1_ver_za128 (7, w0, p0, x1)) ++ ++/* ++** st1_za128_13_w0: ++** mov (w1[2-5]), w0 ++** st1q { za13v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_13_w0, ++ svst1_ver_za128 (13, w0, p0, x1), ++ svst1_ver_za128 (13, w0, p0, x1)) ++ ++/* ++** st1_za128_15_w0: ++** mov (w1[2-5]), w0 ++** st1q { za15v\.q\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_15_w0, ++ svst1_ver_za128 (15, w0, p0, x1), ++ svst1_ver_za128 (15, w0, p0, x1)) ++ ++/* ++** st1_za128_9_w0_index: ++** mov (w1[2-5]), w0 ++** st1q { za9v\.q\[\1, 0\] }, p0, \[x1, x2, lsl #?4\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_9_w0_index, ++ svst1_ver_za128 (9, w0, p0, x1 + x2 * 16), ++ svst1_ver_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c +new file mode 100644 +index 000000000..1bbf12a14 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za16_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_0, ++ svst1_ver_za16 (0, 0, p0, x1), ++ svst1_ver_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** st1_za16_0_7: ++** mov (w1[2-5]), #?7 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_7, ++ svst1_ver_za16 (0, 7, p0, x1), ++ svst1_ver_za16 (0, 7, p0, x1)) ++ ++/* ++** st1_za16_0_8: ++** mov (w1[2-5]), #?8 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_8, ++ svst1_ver_za16 (0, 8, p0, x1), ++ svst1_ver_za16 (0, 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0: ++** mov (w1[2-5]), w0 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0, ++ svst1_ver_za16 (0, w0, p0, x1), ++ svst1_ver_za16 (0, w0, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1h { za0v\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p1, ++ svst1_ver_za16 (0, w0 + 1, p0, x1), ++ svst1_ver_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p7: ++** mov (w1[2-5]), w0 ++** st1h { za0v\.h\[\1, 7\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p7, ++ svst1_ver_za16 (0, w0 + 7, p0, x1), ++ svst1_ver_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0: ++** mov (w1[2-5]), w0 ++** st1h { za1v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0, ++ svst1_ver_za16 (1, w0, p0, x1), ++ svst1_ver_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** st1_za16_1_w0_p1: ++** mov (w1[2-5]), w0 ++** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p1, ++ svst1_ver_za16 (1, w0 + 1, p0, x1), ++ svst1_ver_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p7: ++** mov (w1[2-5]), w0 ++** st1h { za1v\.h\[\1, 7\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p7, ++ svst1_ver_za16 (1, w0 + 7, p0, x1), ++ svst1_ver_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p5_index: ++** mov (w1[2-5]), w0 ++** st1h { za1v\.h\[\1, 5\] }, p0, \[x1, x2, lsl #?1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p5_index, ++ svst1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svst1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** st1_za16_0_w0_p8: ++** add (w1[2-5]), w0, #?8 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p8, ++ svst1_ver_za16 (0, w0 + 8, p0, x1), ++ svst1_ver_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_m1, ++ svst1_ver_za16 (0, w0 - 1, p0, x1), ++ svst1_ver_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c +new file mode 100644 +index 000000000..9809e9708 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za32_0_0: ++** mov (w1[2-5]), (?:w0|#?0) ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_0, ++ svst1_ver_za32 (0, 0, p0, x1), ++ svst1_ver_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** st1_za32_0_3: ++** mov (w1[2-5]), #?3 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_3, ++ svst1_ver_za32 (0, 3, p0, x1), ++ svst1_ver_za32 (0, 3, p0, x1)) ++ ++/* ++** st1_za32_0_4: ++** mov (w1[2-5]), #?4 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_4, ++ svst1_ver_za32 (0, 4, p0, x1), ++ svst1_ver_za32 (0, 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0: ++** mov (w1[2-5]), w0 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0, ++ svst1_ver_za32 (0, w0, p0, x1), ++ svst1_ver_za32 (0, w0, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1w { za0v\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p1, ++ svst1_ver_za32 (0, w0 + 1, p0, x1), ++ svst1_ver_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p3: ++** mov (w1[2-5]), w0 ++** st1w { za0v\.s\[\1, 3\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p3, ++ svst1_ver_za32 (0, w0 + 3, p0, x1), ++ svst1_ver_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_3_w0: ++** mov (w1[2-5]), w0 ++** st1w { za3v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0, ++ svst1_ver_za32 (3, w0, p0, x1), ++ svst1_ver_za32 (3, w0, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p1: ++** mov (w1[2-5]), w0 ++** st1w { za3v\.s\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p1, ++ svst1_ver_za32 (3, w0 + 1, p0, x1), ++ svst1_ver_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p3: ++** mov (w1[2-5]), w0 ++** st1w { za3v\.s\[\1, 3\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p3, ++ svst1_ver_za32 (3, w0 + 3, p0, x1), ++ svst1_ver_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_1_w0_p2_index: ++** mov (w1[2-5]), w0 ++** st1w { za1v\.s\[\1, 2\] }, p0, \[x1, x2, lsl #?2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_1_w0_p2_index, ++ svst1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svst1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** st1_za32_0_w0_p4: ++** add (w1[2-5]), w0, #?4 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p4, ++ svst1_ver_za32 (0, w0 + 4, p0, x1), ++ svst1_ver_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_m1, ++ svst1_ver_za32 (0, w0 - 1, p0, x1), ++ svst1_ver_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c +new file mode 100644 +index 000000000..0e93f4da3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za64_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_0, ++ svst1_ver_za64 (0, 0, p0, x1), ++ svst1_ver_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** st1_za64_0_1: ++** mov (w1[2-5]), #?1 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_1, ++ svst1_ver_za64 (0, 1, p0, x1), ++ svst1_ver_za64 (0, 1, p0, x1)) ++ ++/* ++** st1_za64_0_2: ++** mov (w1[2-5]), #?2 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_2, ++ svst1_ver_za64 (0, 2, p0, x1), ++ svst1_ver_za64 (0, 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0: ++** mov (w1[2-5]), w0 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0, ++ svst1_ver_za64 (0, w0, p0, x1), ++ svst1_ver_za64 (0, w0, p0, x1)) ++ ++/* ++** st1_za64_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1d { za0v\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p1, ++ svst1_ver_za64 (0, w0 + 1, p0, x1), ++ svst1_ver_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_7_w0: ++** mov (w1[2-5]), w0 ++** st1d { za7v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0, ++ svst1_ver_za64 (7, w0, p0, x1), ++ svst1_ver_za64 (7, w0, p0, x1)) ++ ++/* ++** st1_za64_7_w0_p1: ++** mov (w1[2-5]), w0 ++** st1d { za7v\.d\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0_p1, ++ svst1_ver_za64 (7, w0 + 1, p0, x1), ++ svst1_ver_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_5_w0_p1_index: ++** mov (w1[2-5]), w0 ++** st1d { za5v\.d\[\1, 1\] }, p0, \[x1, x2, lsl #?3\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_5_w0_p1_index, ++ svst1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svst1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** st1_za64_0_w0_p2: ++** add (w1[2-5]), w0, #?2 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p2, ++ svst1_ver_za64 (0, w0 + 2, p0, x1), ++ svst1_ver_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_m1, ++ svst1_ver_za64 (0, w0 - 1, p0, x1), ++ svst1_ver_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c +new file mode 100644 +index 000000000..c76b5c28b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za8_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_0, ++ svst1_ver_za8 (0, 0, p0, x1), ++ svst1_ver_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** st1_za8_0_15: ++** mov (w1[2-5]), #?15 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_15, ++ svst1_ver_za8 (0, 15, p0, x1), ++ svst1_ver_za8 (0, 15, p0, x1)) ++ ++/* ++** st1_za8_0_16: ++** mov (w1[2-5]), #?16 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_16, ++ svst1_ver_za8 (0, 16, p0, x1), ++ svst1_ver_za8 (0, 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0: ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0, ++ svst1_ver_za8 (0, w0, p0, x1), ++ svst1_ver_za8 (0, w0, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p1: ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p1, ++ svst1_ver_za8 (0, w0 + 1, p0, x1), ++ svst1_ver_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p15: ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 15\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p15, ++ svst1_ver_za8 (0, w0 + 15, p0, x1), ++ svst1_ver_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p13_index: ++** mov (w1[2-5]), w0 ++** st1b { za0v\.b\[\1, 15\] }, p0, \[x1, x2\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p13_index, ++ svst1_ver_za8 (0, w0 + 15, p0, x1 + x2), ++ svst1_ver_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** st1_za8_0_w0_p16: ++** add (w1[2-5]), w0, #?16 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p16, ++ svst1_ver_za8 (0, w0 + 16, p0, x1), ++ svst1_ver_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0_m1: ++** sub (w1[2-5]), w0, #?1 ++** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_m1, ++ svst1_ver_za8 (0, w0 - 1, p0, x1), ++ svst1_ver_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c +new file mode 100644 +index 000000000..3ef7e0c09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c +@@ -0,0 +1,147 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** str_vnum_za_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_0, ++ svstr_vnum_za (0, x1, 0), ++ svstr_vnum_za (0, x1, 0)) ++ ++/* ++** str_vnum_za_0_1: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_1, ++ svstr_vnum_za (0, x1, 1), ++ svstr_vnum_za (0, x1, 1)) ++ ++/* ++** str_vnum_za_1_0: ++** mov (w1[2-5]), #?1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_0, ++ svstr_vnum_za (1, x1, 0), ++ svstr_vnum_za (1, x1, 0)) ++ ++/* ++** str_vnum_za_1_2: ++** mov (w1[2-5]), #?1 ++** str za\[\1, 2\], \[x1(?:, #2, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_2, ++ svstr_vnum_za (1, x1, 2), ++ svstr_vnum_za (1, x1, 2)) ++ ++/* ++** str_vnum_za_w0_0: ++** mov (w1[2-5]), w0 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_0, ++ svstr_vnum_za (w0, x1, 0), ++ svstr_vnum_za (w0, x1, 0)) ++ ++/* ++** str_vnum_za_w0_1: ++** mov (w1[2-5]), w0 ++** str za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_1, ++ svstr_vnum_za (w0, x1, 1), ++ svstr_vnum_za (w0, x1, 1)) ++ ++/* ++** str_vnum_za_w0_13: ++** mov (w1[2-5]), w0 ++** str za\[\1, 13\], \[x1, #13, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_13, ++ svstr_vnum_za (w0, x1, 13), ++ svstr_vnum_za (w0, x1, 13)) ++ ++/* ++** str_vnum_za_w0_15: ++** mov (w1[2-5]), w0 ++** str za\[\1, 15\], \[x1, #15, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_15, ++ svstr_vnum_za (w0, x1, 15), ++ svstr_vnum_za (w0, x1, 15)) ++ ++/* ++** str_vnum_za_w0_16: ++** ( ++** add (w1[2-5]), w0, #?16 ++** incb x1, all, mul #16 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_16, ++ svstr_vnum_za (w0, x1, 16), ++ svstr_vnum_za (w0, x1, 16)) ++ ++/* ++** str_vnum_za_w0_m1: ++** ( ++** sub (w1[2-5]), w0, #?1 ++** decb x1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** decb x1 ++** sub (w1[2-5]), w0, #?1 ++** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_m1, ++ svstr_vnum_za (w0, x1, -1), ++ svstr_vnum_za (w0, x1, -1)) ++ ++/* ++** str_vnum_za_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p1_0, ++ svstr_vnum_za (w0 + 1, x1, 0), ++ svstr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** str_vnum_za_w0m1_1: ++** sub (w1[2-5]), w0, #?1 ++** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0m1_1, ++ svstr_vnum_za (w0 - 1, x1, 1), ++ svstr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** str_vnum_za_w0p2_3: ++** add (w1[2-5]), w0, #?2 ++** str za\[\1, 3\], \[x1(?:, #3, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p2_3, ++ svstr_vnum_za (w0 + 2, x1, 3), ++ svstr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c +new file mode 100644 +index 000000000..7cd09e67c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c +@@ -0,0 +1,148 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** str_vnum_za_0_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_0, ++ svstr_vnum_za (0, x1, 0), ++ svstr_vnum_za (0, x1, 0)) ++ ++/* ++** str_vnum_za_0_1: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_1, ++ svstr_vnum_za (0, x1, 1), ++ svstr_vnum_za (0, x1, 1)) ++ ++/* ++** str_vnum_za_1_0: ++** mov (w1[2-5]), #?1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_0, ++ svstr_vnum_za (1, x1, 0), ++ svstr_vnum_za (1, x1, 0)) ++ ++/* ++** str_vnum_za_1_2: ++** mov (w1[2-5]), #?1 ++** str za\[\1, 2\], \[x1(?:, #2, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_2, ++ svstr_vnum_za (1, x1, 2), ++ svstr_vnum_za (1, x1, 2)) ++ ++/* ++** str_vnum_za_w0_0: ++** mov (w1[2-5]), w0 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_0, ++ svstr_vnum_za (w0, x1, 0), ++ svstr_vnum_za (w0, x1, 0)) ++ ++/* ++** str_vnum_za_w0_1: ++** mov (w1[2-5]), w0 ++** str za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_1, ++ svstr_vnum_za (w0, x1, 1), ++ svstr_vnum_za (w0, x1, 1)) ++ ++/* ++** str_vnum_za_w0_13: ++** mov (w1[2-5]), w0 ++** str za\[\1, 13\], \[x1, #13, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_13, ++ svstr_vnum_za (w0, x1, 13), ++ svstr_vnum_za (w0, x1, 13)) ++ ++/* ++** str_vnum_za_w0_15: ++** mov (w1[2-5]), w0 ++** str za\[\1, 15\], \[x1, #15, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_15, ++ svstr_vnum_za (w0, x1, 15), ++ svstr_vnum_za (w0, x1, 15)) ++ ++/* ++** str_vnum_za_w0_16: ++** ( ++** add (w1[2-5]), w0, #?16 ++** addsvl (x[0-9]+), x1, #16 ++** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** addsvl (x[0-9]+), x1, #16 ++** add (w1[2-5]), w0, #?16 ++** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_16, ++ svstr_vnum_za (w0, x1, 16), ++ svstr_vnum_za (w0, x1, 16)) ++ ++/* ++** str_vnum_za_w0_m1: ++** ( ++** sub (w1[2-5]), w0, #?1 ++** addsvl (x[0-9]+), x1, #-1 ++** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** addsvl (x[0-9]+), x1, #-1 ++** sub (w1[2-5]), w0, #?1 ++** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_m1, ++ svstr_vnum_za (w0, x1, -1), ++ svstr_vnum_za (w0, x1, -1)) ++ ++/* ++** str_vnum_za_w0p1_0: ++** add (w1[2-5]), w0, #?1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p1_0, ++ svstr_vnum_za (w0 + 1, x1, 0), ++ svstr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** str_vnum_za_w0m1_1: ++** sub (w1[2-5]), w0, #?1 ++** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0m1_1, ++ svstr_vnum_za (w0 - 1, x1, 1), ++ svstr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** str_vnum_za_w0p2_3: ++** add (w1[2-5]), w0, #?2 ++** str za\[\1, 3\], \[x1(?:, #3, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p2_3, ++ svstr_vnum_za (w0 + 2, x1, 3), ++ svstr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c +new file mode 100644 +index 000000000..4d953c596 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c +@@ -0,0 +1,124 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** str_za_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_0, ++ svstr_za (0, x1), ++ svstr_za (0, x1)) ++ ++/* ++** str_za_1: ++** mov (w1[2-5]), #?1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_1, ++ svstr_za (1, x1), ++ svstr_za (1, x1)) ++ ++/* ++** str_za_w0: ++** mov (w1[2-5]), w0 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0, ++ svstr_za (w0, x1), ++ svstr_za (w0, x1)) ++ ++/* ++** str_za_w0_1_vnum: ++** mov (w1[2-5]), w0 ++** str za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_1_vnum, ++ svstr_za (w0 + 1, x1 + svcntsb ()), ++ svstr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** str_za_w0_13_vnum: ++** mov (w1[2-5]), w0 ++** str za\[\1, 13\], \[x1, #13, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_13_vnum, ++ svstr_za (w0 + 13, x1 + svcntsb () * 13), ++ svstr_za (w0 + 13, x1 + svcntsb () * 13)) ++ ++/* ++** str_za_w0_15_vnum: ++** mov (w1[2-5]), w0 ++** str za\[\1, 15\], \[x1, #15, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_15_vnum, ++ svstr_za (w0 + 15, x1 + svcntsb () * 15), ++ svstr_za (w0 + 15, x1 + svcntsb () * 15)) ++ ++/* ++** str_za_w0_16_vnum: ++** ( ++** add (w1[2-5]), w0, #?16 ++** incb x1, all, mul #16 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** incb x1, all, mul #16 ++** add (w1[2-5]), w0, #?16 ++** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_16_vnum, ++ svstr_za (w0 + 16, x1 + svcntsb () * 16), ++ svstr_za (w0 + 16, x1 + svcntsb () * 16)) ++ ++/* ++** str_za_w0_m1_vnum: ++** ( ++** sub (w1[2-5]), w0, #?1 ++** decb x1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** | ++** decb x1 ++** sub (w1[2-5]), w0, #?1 ++** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_m1_vnum, ++ svstr_za (w0 - 1, x1 - svcntsb ()), ++ svstr_za (w0 - 1, x1 - svcntsb ())) ++ ++/* ++** str_za_w0p2: ++** add (w1[2-5]), w0, #?2 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0p2, ++ svstr_za (w0 + 2, x1), ++ svstr_za (w0 + 2, x1)) ++ ++/* ++** str_za_offset: ++** ( ++** mov (w1[2-5]), w0 ++** add (x[0-9]+), x1, #?1 ++** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** add (x[0-9]+), x1, #?1 ++** mov (w1[2-5]), w0 ++** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_offset, ++ svstr_za (w0, x1 + 1), ++ svstr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c +new file mode 100644 +index 000000000..3406055e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** str_za_0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_0, ++ svstr_za (0, x1), ++ svstr_za (0, x1)) ++ ++/* ++** str_za_1: ++** mov (w1[2-5]), #?1 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_1, ++ svstr_za (1, x1), ++ svstr_za (1, x1)) ++ ++/* ++** str_za_w0: ++** mov (w1[2-5]), w0 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0, ++ svstr_za (w0, x1), ++ svstr_za (w0, x1)) ++ ++/* ++** str_za_w0_1_vnum: ++** mov (w1[2-5]), w0 ++** str za\[\1, 1\], \[x1, #1, mul vl\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_1_vnum, ++ svstr_za (w0 + 1, x1 + svcntsb ()), ++ svstr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** str_za_w0p2: ++** add (w1[2-5]), w0, #?2 ++** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0p2, ++ svstr_za (w0 + 2, x1), ++ svstr_za (w0 + 2, x1)) ++ ++/* ++** str_za_offset: ++** ( ++** mov (w1[2-5]), w0 ++** add (x[0-9]+), x1, #?1 ++** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] ++** | ++** add (x[0-9]+), x1, #?1 ++** mov (w1[2-5]), w0 ++** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_offset, ++ svstr_za (w0, x1 + 1), ++ svstr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c +new file mode 100644 +index 000000000..9dd66f722 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** sumopa_za32_s8_0_p0_p1_z0_z4: ++** sumopa za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumopa_za32_s8_m (0, p0, p1, z0, z4), ++ svsumopa_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumopa_za32_s8_0_p1_p0_z4_z0: ++** sumopa za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t, ++ svsumopa_za32_s8_m (0, p1, p0, z4, z0), ++ svsumopa_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumopa_za32_s8_3_p0_p1_z0_z4: ++** sumopa za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumopa_za32_s8_m (3, p0, p1, z0, z4), ++ svsumopa_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c +new file mode 100644 +index 000000000..2a78ab85d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** sumopa_za64_s16_0_p0_p1_z0_z4: ++** sumopa za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumopa_za64_s16_m (0, p0, p1, z0, z4), ++ svsumopa_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumopa_za64_s16_0_p1_p0_z4_z0: ++** sumopa za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t, ++ svsumopa_za64_s16_m (0, p1, p0, z4, z0), ++ svsumopa_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumopa_za64_s16_7_p0_p1_z0_z4: ++** sumopa za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumopa_za64_s16_m (7, p0, p1, z0, z4), ++ svsumopa_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c +new file mode 100644 +index 000000000..55cb92d1b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** sumops_za32_s8_0_p0_p1_z0_z4: ++** sumops za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumops_za32_s8_m (0, p0, p1, z0, z4), ++ svsumops_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumops_za32_s8_0_p1_p0_z4_z0: ++** sumops za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t, ++ svsumops_za32_s8_m (0, p1, p0, z4, z0), ++ svsumops_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumops_za32_s8_3_p0_p1_z0_z4: ++** sumops za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumops_za32_s8_m (3, p0, p1, z0, z4), ++ svsumops_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c +new file mode 100644 +index 000000000..910a45b29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** sumops_za64_s16_0_p0_p1_z0_z4: ++** sumops za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumops_za64_s16_m (0, p0, p1, z0, z4), ++ svsumops_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumops_za64_s16_0_p1_p0_z4_z0: ++** sumops za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t, ++ svsumops_za64_s16_m (0, p1, p0, z4, z0), ++ svsumops_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumops_za64_s16_7_p0_p1_z0_z4: ++** sumops za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumops_za64_s16_m (7, p0, p1, z0, z4), ++ svsumops_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h +new file mode 100644 +index 000000000..aaadab2f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h +@@ -0,0 +1,62 @@ ++#ifndef TEST_SME_ACLE_H ++#define TEST_SME_ACLE_H 1 ++ ++#if (!defined(STREAMING_COMPATIBLE) \ ++ && !defined(NON_STREAMING) \ ++ && !defined(STREAMING)) ++#define STREAMING ++#endif ++ ++#if !defined(NO_SHARED_ZA) ++#define SHARED_ZA ++#endif ++ ++#include "../../sve/acle/asm/test_sve_acle.h" ++ ++#include <arm_sme.h> ++ ++#define TEST_LOAD_ZA(NAME, CODE1, CODE2) \ ++ PROTO (NAME, void, (svbool_t p0, int32_t w0, const char *x1, \ ++ uint64_t x2)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_STORE_ZA(NAME, CODE1, CODE2) \ ++ PROTO (NAME, void, (svbool_t p0, int32_t w0, char *x1, \ ++ uint64_t x2)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_READ_ZA(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, svbool_t p0, \ ++ int32_t w0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_WRITE_ZA(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ ++ int32_t w0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_UNIFORM_ZA(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ ++ svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_DUAL_ZA(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, void, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3, \ ++ TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c +new file mode 100644 +index 000000000..5474328fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** undef_za_1: ++** ret ++*/ ++PROTO (undef_za_1, void, ()) { svundef_za (); } ++ ++/* ++** undef_za_2: ++** ret ++*/ ++PROTO (undef_za_2, void, ()) ++{ ++ svzero_za (); ++ svundef_za (); ++} ++ ++/* ++** undef_za_3: ++** mov (w1[2-5]), (?:wzr|#?0) ++** str za\[\1, 0\], \[x0(?:, #0, mul vl)\] ++** ret ++*/ ++PROTO (undef_za_3, void, (void *ptr)) ++{ ++ svzero_za (); ++ svundef_za (); ++ svstr_za (0, ptr); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c +new file mode 100644 +index 000000000..bbc0b6c11 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** usmopa_za32_u8_0_p0_p1_z0_z4: ++** usmopa za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmopa_za32_u8_m (0, p0, p1, z0, z4), ++ svusmopa_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmopa_za32_u8_0_p1_p0_z4_z0: ++** usmopa za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t, ++ svusmopa_za32_u8_m (0, p1, p0, z4, z0), ++ svusmopa_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmopa_za32_u8_3_p0_p1_z0_z4: ++** usmopa za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmopa_za32_u8_m (3, p0, p1, z0, z4), ++ svusmopa_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c +new file mode 100644 +index 000000000..64ee25bc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** usmopa_za64_u16_0_p0_p1_z0_z4: ++** usmopa za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmopa_za64_u16_m (0, p0, p1, z0, z4), ++ svusmopa_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmopa_za64_u16_0_p1_p0_z4_z0: ++** usmopa za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t, ++ svusmopa_za64_u16_m (0, p1, p0, z4, z0), ++ svusmopa_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmopa_za64_u16_7_p0_p1_z0_z4: ++** usmopa za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmopa_za64_u16_m (7, p0, p1, z0, z4), ++ svusmopa_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c +new file mode 100644 +index 000000000..98fd33157 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** usmops_za32_u8_0_p0_p1_z0_z4: ++** usmops za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmops_za32_u8_m (0, p0, p1, z0, z4), ++ svusmops_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmops_za32_u8_0_p1_p0_z4_z0: ++** usmops za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t, ++ svusmops_za32_u8_m (0, p1, p0, z4, z0), ++ svusmops_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmops_za32_u8_3_p0_p1_z0_z4: ++** usmops za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmops_za32_u8_m (3, p0, p1, z0, z4), ++ svusmops_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c +new file mode 100644 +index 000000000..e20cdab41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** usmops_za64_u16_0_p0_p1_z0_z4: ++** usmops za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmops_za64_u16_m (0, p0, p1, z0, z4), ++ svusmops_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmops_za64_u16_0_p1_p0_z4_z0: ++** usmops za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t, ++ svusmops_za64_u16_m (0, p1, p0, z4, z0), ++ svusmops_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmops_za64_u16_7_p0_p1_z0_z4: ++** usmops za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmops_za64_u16_m (7, p0, p1, z0, z4), ++ svusmops_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c +new file mode 100644 +index 000000000..119a2535e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c +@@ -0,0 +1,193 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za128_s8_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, 0, p0, z0), ++ svwrite_hor_za128_m (0, 0, p0, z0)) ++ ++/* ++** write_za128_s8_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, 1, p0, z0), ++ svwrite_hor_za128_m (0, 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0p1_z0: ++** add (w1[2-5]), w0, #?1 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0p1_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za128_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0m1_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za128_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za128_s8_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_1_w0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (1, w0, p0, z0), ++ svwrite_hor_za128_m (1, w0, p0, z0)) ++ ++/* ++** write_za128_s8_15_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za15h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_15_w0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (15, w0, p0, z0), ++ svwrite_hor_za128_m (15, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z1\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z1, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0, p0, z1), ++ svwrite_hor_za128_m (0, w0, p0, z1)) ++ ++/* ++** write_za128_u8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, ++ svwrite_hor_za128_u8_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s16_0_w0_z0, svint16_t, ++ svwrite_hor_za128_s16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u16_0_w0_z0, svuint16_t, ++ svwrite_hor_za128_u16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f16_0_w0_z0, svfloat16_t, ++ svwrite_hor_za128_f16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_bf16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_hor_za128_bf16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s32_0_w0_z0, svint32_t, ++ svwrite_hor_za128_s32_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u32_0_w0_z0, svuint32_t, ++ svwrite_hor_za128_u32_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f32_0_w0_z0, svfloat32_t, ++ svwrite_hor_za128_f32_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s64_0_w0_z0, svint64_t, ++ svwrite_hor_za128_s64_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u64_0_w0_z0, svuint64_t, ++ svwrite_hor_za128_u64_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f64_0_w0_z0, svfloat64_t, ++ svwrite_hor_za128_f64_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c +new file mode 100644 +index 000000000..c8f13f7bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za16_s16_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, 0, p0, z0), ++ svwrite_hor_za16_m (0, 0, p0, z0)) ++ ++/* ++** write_za16_s16_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, 1, p0, z0), ++ svwrite_hor_za16_m (0, 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 1\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p1_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za16_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p7_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 7\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p7_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 + 7, p0, z0), ++ svwrite_hor_za16_m (0, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p8_z0: ++** add (w1[2-5]), w0, #?8 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p8_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 + 8, p0, z0), ++ svwrite_hor_za16_m (0, w0 + 8, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0m1_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za16_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0_z0, svint16_t, ++ svwrite_hor_za16_s16_m (1, w0, p0, z0), ++ svwrite_hor_za16_m (1, w0, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0p7_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.h\[\1, 7\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0p7_z0, svint16_t, ++ svwrite_hor_za16_s16_m (1, w0 + 7, p0, z0), ++ svwrite_hor_za16_m (1, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 0\], p0/m, z1\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z1, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0, p0, z1), ++ svwrite_hor_za16_m (0, w0, p0, z1)) ++ ++/* ++** write_za16_u16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_u16_0_w0_z0, svuint16_t, ++ svwrite_hor_za16_u16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_f16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_f16_0_w0_z0, svfloat16_t, ++ svwrite_hor_za16_f16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_bf16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_hor_za16_bf16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c +new file mode 100644 +index 000000000..ea2f5ae89 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c +@@ -0,0 +1,143 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za32_s32_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, 0, p0, z0), ++ svwrite_hor_za32_m (0, 0, p0, z0)) ++ ++/* ++** write_za32_s32_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, 1, p0, z0), ++ svwrite_hor_za32_m (0, 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0, p0, z0), ++ svwrite_hor_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.s\[\1, 1\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p1_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za32_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p3_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.s\[\1, 3\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p3_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 + 3, p0, z0), ++ svwrite_hor_za32_m (0, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p4_z0: ++** add (w1[2-5]), w0, #?4 ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p4_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 + 4, p0, z0), ++ svwrite_hor_za32_m (0, w0 + 4, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0m1_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za32_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (1, w0, p0, z0), ++ svwrite_hor_za32_m (1, w0, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0p3_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.s\[\1, 3\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0p3_z0, svint32_t, ++ svwrite_hor_za32_s32_m (1, w0 + 3, p0, z0), ++ svwrite_hor_za32_m (1, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za3h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (3, w0, p0, z0), ++ svwrite_hor_za32_m (3, w0, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0p3_z0: ++** mov (w1[2-5]), w0 ++** mova za3h\.s\[\1, 3\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0p3_z0, svint32_t, ++ svwrite_hor_za32_s32_m (3, w0 + 3, p0, z0), ++ svwrite_hor_za32_m (3, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0h\.s\[\1, 0\], p0/m, z1\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z1, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0, p0, z1), ++ svwrite_hor_za32_m (0, w0, p0, z1)) ++ ++/* ++** write_za32_u32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_u32_0_w0_z0, svuint32_t, ++ svwrite_hor_za32_u32_m (0, w0, p0, z0), ++ svwrite_hor_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_f32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_f32_0_w0_z0, svfloat32_t, ++ svwrite_hor_za32_f32_m (0, w0, p0, z0), ++ svwrite_hor_za32_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c +new file mode 100644 +index 000000000..2b0a157d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za64_s64_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, 0, p0, z0), ++ svwrite_hor_za64_m (0, 0, p0, z0)) ++ ++/* ++** write_za64_s64_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, 1, p0, z0), ++ svwrite_hor_za64_m (0, 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0, p0, z0), ++ svwrite_hor_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.d\[\1, 1\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za64_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p2_z0: ++** add (w1[2-5]), w0, #?2 ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p2_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0 + 2, p0, z0), ++ svwrite_hor_za64_m (0, w0 + 2, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0m1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za64_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (1, w0, p0, z0), ++ svwrite_hor_za64_m (1, w0, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za1h\.d\[\1, 1\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0p1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (1, w0 + 1, p0, z0), ++ svwrite_hor_za64_m (1, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za7h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (7, w0, p0, z0), ++ svwrite_hor_za64_m (7, w0, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za7h\.d\[\1, 1\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0p1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (7, w0 + 1, p0, z0), ++ svwrite_hor_za64_m (7, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0h\.d\[\1, 0\], p0/m, z1\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z1, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0, p0, z1), ++ svwrite_hor_za64_m (0, w0, p0, z1)) ++ ++/* ++** write_za64_u64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_u64_0_w0_z0, svuint64_t, ++ svwrite_hor_za64_u64_m (0, w0, p0, z0), ++ svwrite_hor_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_f64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_f64_0_w0_z0, svfloat64_t, ++ svwrite_hor_za64_f64_m (0, w0, p0, z0), ++ svwrite_hor_za64_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c +new file mode 100644 +index 000000000..683e1a64a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c +@@ -0,0 +1,93 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za8_s8_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0h\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, 0, p0, z0), ++ svwrite_hor_za8_m (0, 0, p0, z0)) ++ ++/* ++** write_za8_s8_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0h\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, 1, p0, z0), ++ svwrite_hor_za8_m (0, 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0, p0, z0), ++ svwrite_hor_za8_m (0, w0, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.b\[\1, 1\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p1_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za8_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p15_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.b\[\1, 15\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p15_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 + 15, p0, z0), ++ svwrite_hor_za8_m (0, w0 + 15, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p16_z0: ++** add (w1[2-5]), w0, #?16 ++** mova za0h\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p16_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 + 16, p0, z0), ++ svwrite_hor_za8_m (0, w0 + 16, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0h\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0m1_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za8_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0h\.b\[\1, 0\], p0/m, z1\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0, p0, z1), ++ svwrite_hor_za8_m (0, w0, p0, z1)) ++ ++/* ++** write_za8_u8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0h\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, ++ svwrite_hor_za8_u8_m (0, w0, p0, z0), ++ svwrite_hor_za8_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c +new file mode 100644 +index 000000000..9622e99dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c +@@ -0,0 +1,193 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za128_s8_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, 0, p0, z0), ++ svwrite_ver_za128_m (0, 0, p0, z0)) ++ ++/* ++** write_za128_s8_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, 1, p0, z0), ++ svwrite_ver_za128_m (0, 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0p1_z0: ++** add (w1[2-5]), w0, #?1 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0p1_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za128_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0m1_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za128_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za128_s8_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_1_w0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (1, w0, p0, z0), ++ svwrite_ver_za128_m (1, w0, p0, z0)) ++ ++/* ++** write_za128_s8_15_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za15v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_15_w0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (15, w0, p0, z0), ++ svwrite_ver_za128_m (15, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z1\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z1, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0, p0, z1), ++ svwrite_ver_za128_m (0, w0, p0, z1)) ++ ++/* ++** write_za128_u8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, ++ svwrite_ver_za128_u8_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s16_0_w0_z0, svint16_t, ++ svwrite_ver_za128_s16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u16_0_w0_z0, svuint16_t, ++ svwrite_ver_za128_u16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f16_0_w0_z0, svfloat16_t, ++ svwrite_ver_za128_f16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_bf16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_ver_za128_bf16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s32_0_w0_z0, svint32_t, ++ svwrite_ver_za128_s32_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u32_0_w0_z0, svuint32_t, ++ svwrite_ver_za128_u32_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f32_0_w0_z0, svfloat32_t, ++ svwrite_ver_za128_f32_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s64_0_w0_z0, svint64_t, ++ svwrite_ver_za128_s64_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u64_0_w0_z0, svuint64_t, ++ svwrite_ver_za128_u64_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.q\[\1, 0\], p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f64_0_w0_z0, svfloat64_t, ++ svwrite_ver_za128_f64_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c +new file mode 100644 +index 000000000..5430f2307 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za16_s16_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, 0, p0, z0), ++ svwrite_ver_za16_m (0, 0, p0, z0)) ++ ++/* ++** write_za16_s16_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, 1, p0, z0), ++ svwrite_ver_za16_m (0, 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 1\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p1_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za16_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p7_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 7\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p7_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 + 7, p0, z0), ++ svwrite_ver_za16_m (0, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p8_z0: ++** add (w1[2-5]), w0, #?8 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p8_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 + 8, p0, z0), ++ svwrite_ver_za16_m (0, w0 + 8, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0m1_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za16_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0_z0, svint16_t, ++ svwrite_ver_za16_s16_m (1, w0, p0, z0), ++ svwrite_ver_za16_m (1, w0, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0p7_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.h\[\1, 7\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0p7_z0, svint16_t, ++ svwrite_ver_za16_s16_m (1, w0 + 7, p0, z0), ++ svwrite_ver_za16_m (1, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 0\], p0/m, z1\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z1, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0, p0, z1), ++ svwrite_ver_za16_m (0, w0, p0, z1)) ++ ++/* ++** write_za16_u16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_u16_0_w0_z0, svuint16_t, ++ svwrite_ver_za16_u16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_f16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_f16_0_w0_z0, svfloat16_t, ++ svwrite_ver_za16_f16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_bf16_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.h\[\1, 0\], p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_ver_za16_bf16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c +new file mode 100644 +index 000000000..960ce163d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c +@@ -0,0 +1,143 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za32_s32_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, 0, p0, z0), ++ svwrite_ver_za32_m (0, 0, p0, z0)) ++ ++/* ++** write_za32_s32_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, 1, p0, z0), ++ svwrite_ver_za32_m (0, 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0, p0, z0), ++ svwrite_ver_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.s\[\1, 1\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p1_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za32_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p3_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.s\[\1, 3\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p3_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 + 3, p0, z0), ++ svwrite_ver_za32_m (0, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p4_z0: ++** add (w1[2-5]), w0, #?4 ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p4_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 + 4, p0, z0), ++ svwrite_ver_za32_m (0, w0 + 4, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0m1_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za32_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (1, w0, p0, z0), ++ svwrite_ver_za32_m (1, w0, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0p3_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.s\[\1, 3\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0p3_z0, svint32_t, ++ svwrite_ver_za32_s32_m (1, w0 + 3, p0, z0), ++ svwrite_ver_za32_m (1, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za3v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (3, w0, p0, z0), ++ svwrite_ver_za32_m (3, w0, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0p3_z0: ++** mov (w1[2-5]), w0 ++** mova za3v\.s\[\1, 3\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0p3_z0, svint32_t, ++ svwrite_ver_za32_s32_m (3, w0 + 3, p0, z0), ++ svwrite_ver_za32_m (3, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0v\.s\[\1, 0\], p0/m, z1\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z1, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0, p0, z1), ++ svwrite_ver_za32_m (0, w0, p0, z1)) ++ ++/* ++** write_za32_u32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_u32_0_w0_z0, svuint32_t, ++ svwrite_ver_za32_u32_m (0, w0, p0, z0), ++ svwrite_ver_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_f32_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.s\[\1, 0\], p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_f32_0_w0_z0, svfloat32_t, ++ svwrite_ver_za32_f32_m (0, w0, p0, z0), ++ svwrite_ver_za32_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c +new file mode 100644 +index 000000000..962c4002e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za64_s64_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, 0, p0, z0), ++ svwrite_ver_za64_m (0, 0, p0, z0)) ++ ++/* ++** write_za64_s64_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, 1, p0, z0), ++ svwrite_ver_za64_m (0, 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0, p0, z0), ++ svwrite_ver_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.d\[\1, 1\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za64_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p2_z0: ++** add (w1[2-5]), w0, #?2 ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p2_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0 + 2, p0, z0), ++ svwrite_ver_za64_m (0, w0 + 2, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0m1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za64_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (1, w0, p0, z0), ++ svwrite_ver_za64_m (1, w0, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za1v\.d\[\1, 1\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0p1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (1, w0 + 1, p0, z0), ++ svwrite_ver_za64_m (1, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za7v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (7, w0, p0, z0), ++ svwrite_ver_za64_m (7, w0, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za7v\.d\[\1, 1\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0p1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (7, w0 + 1, p0, z0), ++ svwrite_ver_za64_m (7, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0v\.d\[\1, 0\], p0/m, z1\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z1, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0, p0, z1), ++ svwrite_ver_za64_m (0, w0, p0, z1)) ++ ++/* ++** write_za64_u64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_u64_0_w0_z0, svuint64_t, ++ svwrite_ver_za64_u64_m (0, w0, p0, z0), ++ svwrite_ver_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_f64_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.d\[\1, 0\], p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_f64_0_w0_z0, svfloat64_t, ++ svwrite_ver_za64_f64_m (0, w0, p0, z0), ++ svwrite_ver_za64_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c +new file mode 100644 +index 000000000..dd6182821 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c +@@ -0,0 +1,93 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za8_s8_0_0_z0: ++** mov (w1[2-5]), (?:wzr|#?0) ++** mova za0v\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, 0, p0, z0), ++ svwrite_ver_za8_m (0, 0, p0, z0)) ++ ++/* ++** write_za8_s8_0_1_z0: ++** mov (w1[2-5]), #?1 ++** mova za0v\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, 1, p0, z0), ++ svwrite_ver_za8_m (0, 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0, p0, z0), ++ svwrite_ver_za8_m (0, w0, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p1_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.b\[\1, 1\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p1_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za8_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p15_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.b\[\1, 15\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p15_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 + 15, p0, z0), ++ svwrite_ver_za8_m (0, w0 + 15, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p16_z0: ++** add (w1[2-5]), w0, #?16 ++** mova za0v\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p16_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 + 16, p0, z0), ++ svwrite_ver_za8_m (0, w0 + 16, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0m1_z0: ++** sub (w1[2-5]), w0, #?1 ++** mova za0v\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0m1_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za8_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z1: ++** mov (w1[2-5]), w0 ++** mova za0v\.b\[\1, 0\], p0/m, z1\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0, p0, z1), ++ svwrite_ver_za8_m (0, w0, p0, z1)) ++ ++/* ++** write_za8_u8_0_w0_z0: ++** mov (w1[2-5]), w0 ++** mova za0v\.b\[\1, 0\], p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, ++ svwrite_ver_za8_u8_m (0, w0, p0, z0), ++ svwrite_ver_za8_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c +new file mode 100644 +index 000000000..9ce7331eb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c +@@ -0,0 +1,130 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** zero_mask_za_0: ++** zero { *} ++** ret ++*/ ++PROTO (zero_mask_za_0, void, ()) { svzero_mask_za (0); } ++ ++/* ++** zero_mask_za_01: ++** zero { za0\.d } ++** ret ++*/ ++PROTO (zero_mask_za_01, void, ()) { svzero_mask_za (0x01); } ++ ++/* ++** zero_mask_za_80: ++** zero { za7\.d } ++** ret ++*/ ++PROTO (zero_mask_za_80, void, ()) { svzero_mask_za (0x80); } ++ ++/* ++** zero_mask_za_03: ++** zero { za0\.d, za1\.d } ++** ret ++*/ ++PROTO (zero_mask_za_03, void, ()) { svzero_mask_za (0x03); } ++ ++/* ++** zero_mask_za_09: ++** zero { za0\.d, za3\.d } ++** ret ++*/ ++PROTO (zero_mask_za_09, void, ()) { svzero_mask_za (0x09); } ++ ++/* ++** zero_mask_za_0d: ++** zero { za0\.d, za2\.d, za3\.d } ++** ret ++*/ ++PROTO (zero_mask_za_0d, void, ()) { svzero_mask_za (0x0d); } ++ ++/* ++** zero_mask_za_3c: ++** zero { za2\.d, za3\.d, za4\.d, za5\.d } ++** ret ++*/ ++PROTO (zero_mask_za_3c, void, ()) { svzero_mask_za (0x3c); } ++ ++/* ++** zero_mask_za_5a: ++** zero { za1\.d, za3\.d, za4\.d, za6\.d } ++** ret ++*/ ++PROTO (zero_mask_za_5a, void, ()) { svzero_mask_za (0x5a); } ++ ++/* ++** zero_mask_za_11: ++** zero { za0\.s } ++** ret ++*/ ++PROTO (zero_mask_za_11, void, ()) { svzero_mask_za (0x11); } ++ ++/* ++** zero_mask_za_88: ++** zero { za3\.s } ++** ret ++*/ ++PROTO (zero_mask_za_88, void, ()) { svzero_mask_za (0x88); } ++ ++/* ++** zero_mask_za_33: ++** zero { za0\.s, za1\.s } ++** ret ++*/ ++PROTO (zero_mask_za_33, void, ()) { svzero_mask_za (0x33); } ++ ++/* ++** zero_mask_za_cc: ++** zero { za2\.s, za3\.s } ++** ret ++*/ ++PROTO (zero_mask_za_cc, void, ()) { svzero_mask_za (0xcc); } ++ ++/* ++** zero_mask_za_55: ++** zero { za0\.h } ++** ret ++*/ ++PROTO (zero_mask_za_55, void, ()) { svzero_mask_za (0x55); } ++ ++/* ++** zero_mask_za_aa: ++** zero { za1\.h } ++** ret ++*/ ++PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); } ++ ++/* ++** zero_mask_za_ab: ++** zero { za1\.h, za0\.d } ++** ret ++*/ ++PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); } ++ ++/* ++** zero_mask_za_d7: ++** zero { za0\.h, za1\.d, za7\.d } ++** ret ++*/ ++PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); } ++ ++/* ++** zero_mask_za_bf: ++** zero { za1\.h, za0\.s, za2\.d } ++** ret ++*/ ++PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); } ++ ++/* ++** zero_mask_za_ff: ++** zero { za } ++** ret ++*/ ++PROTO (zero_mask_za_ff, void, ()) { svzero_mask_za (0xff); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c +new file mode 100644 +index 000000000..4688d0950 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c +@@ -0,0 +1,11 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** zero_za: ++** zero { za } ++** ret ++*/ ++PROTO (zero_za, void, ()) { svzero_za (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +index d8916809b..84925b9bd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +@@ -12,11 +12,21 @@ + #endif + + #ifdef STREAMING_COMPATIBLE +-#define ATTR __arm_streaming_compatible ++#define SM_ATTR __arm_streaming_compatible ++#elif defined(STREAMING) ++#define SM_ATTR __arm_streaming + #else +-#define ATTR ++#define SM_ATTR + #endif + ++#ifdef SHARED_ZA ++#define ZA_ATTR __arm_inout("za") ++#else ++#define ZA_ATTR ++#endif ++ ++#define ATTR SM_ATTR ZA_ATTR ++ + #ifdef __cplusplus + #define PROTO(NAME, RET, ARGS) \ + extern "C" RET NAME ARGS ATTR; RET NAME ARGS ATTR +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c +new file mode 100644 +index 000000000..fce1ef1dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svusmopa_za32_m (0, pg, pg, u8); /* { dg-error {too few arguments to function 'svusmopa_za32_m'} } */ ++ svusmopa_za32_m (0, pg, pg, u8, s8, 0); /* { dg-error {too many arguments to function 'svusmopa_za32_m'} } */ ++ svusmopa_za32_m (tile, pg, pg, u8, s8); /* { dg-error {argument 1 of 'svusmopa_za32_m' must be an integer constant expression} } */ ++ svusmopa_za32_m (-1, pg, pg, u8, s8); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svusmopa_za32_m (4, pg, pg, u8, s8); /* { dg-error {passing 4 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svusmopa_za32_m (0, u8, pg, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svusmopa_za32_m', which expects 'svbool_t'} } */ ++ svusmopa_za32_m (0, pg, u8, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmopa_za32_m', which expects 'svbool_t'} } */ ++ svusmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svusmopa_za32_m', which expects an SVE type} } */ ++ svusmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svint8_t' arguments} } */ ++ svusmopa_za32_m (0, pg, pg, pg, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svusmopa_za32_m (0, pg, pg, f16, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */ ++ svusmopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 5 of 'svusmopa_za32_m', which expects a vector of signed integers} } */ ++ svusmopa_za32_m (0, pg, pg, u8, s16); /* { dg-error {arguments 4 and 5 of 'svusmopa_za32_m' must have the same element size, but the values passed here have type 'svuint8_t' and 'svint16_t' respectively} } */ ++ svusmopa_za32_m (0, pg, pg, u16, s16); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svuint16_t' arguments} } */ ++ ++ svusmopa_za64_m (0, pg, pg, u16, s16); /* { dg-error {ACLE function 'svusmopa_za64_u16_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_streaming ++{ ++ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_inout("za") ++{ ++ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint16_t s16, svuint16_t u16) ++ __arm_streaming __arm_inout("za") ++{ ++ svusmopa_za64_m (-1, pg, pg, u16, s16); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \[0, 7\]} } */ ++ svusmopa_za64_m (8, pg, pg, u16, s16); /* { dg-error {passing 8 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \[0, 7\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c +new file mode 100644 +index 000000000..7e91a41cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svint32_t s32, ++ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svmopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svmopa_za32_m'} } */ ++ svmopa_za32_m (0, pg, pg, s8, s8, 0); /* { dg-error {too many arguments to function 'svmopa_za32_m'} } */ ++ svmopa_za32_m (tile, pg, pg, s8, s8); /* { dg-error {argument 1 of 'svmopa_za32_m' must be an integer constant expression} } */ ++ svmopa_za32_m (-1, pg, pg, s8, s8); /* { dg-error {passing -1 to argument 1 of 'svmopa_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svmopa_za32_m (4, pg, pg, s8, s8); /* { dg-error {passing 4 to argument 1 of 'svmopa_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svmopa_za32_m (0, u8, pg, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svmopa_za32_m', which expects 'svbool_t'} } */ ++ svmopa_za32_m (0, pg, u8, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svmopa_za32_m', which expects 'svbool_t'} } */ ++ svmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svmopa_za32_m', which expects an SVE type} } */ ++ svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svuint8_t'} } */ ++ svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svint8_t'} } */ ++ svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */ ++ svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */ ++ svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */ ++ ++ svmopa_za64_m (0, pg, pg, s16, s16); /* { dg-error {ACLE function 'svmopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8) __arm_streaming ++{ ++ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8) __arm_inout("za") ++{ ++ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint16_t s16) __arm_streaming __arm_inout("za") ++{ ++ svmopa_za64_m (-1, pg, pg, s16, s16); /* { dg-error {passing -1 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */ ++ svmopa_za64_m (8, pg, pg, s16, s16); /* { dg-error {passing 8 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c +new file mode 100644 +index 000000000..dfc1b737d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svfloat64_t f64) __arm_streaming __arm_inout("za") ++{ ++ svmopa_za64_m (0, pg, pg, f64, f64); /* { dg-error {ACLE function 'svmopa_za64_f64_m' requires ISA extension 'sme-f64f64'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c +new file mode 100644 +index 000000000..555f95a61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svsumopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svsumopa_za32_m'} } */ ++ svsumopa_za32_m (0, pg, pg, s8, u8, 0); /* { dg-error {too many arguments to function 'svsumopa_za32_m'} } */ ++ svsumopa_za32_m (tile, pg, pg, s8, u8); /* { dg-error {argument 1 of 'svsumopa_za32_m' must be an integer constant expression} } */ ++ svsumopa_za32_m (-1, pg, pg, s8, u8); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svsumopa_za32_m (4, pg, pg, s8, u8); /* { dg-error {passing 4 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svsumopa_za32_m (0, u8, pg, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsumopa_za32_m', which expects 'svbool_t'} } */ ++ svsumopa_za32_m (0, pg, u8, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsumopa_za32_m', which expects 'svbool_t'} } */ ++ svsumopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svsumopa_za32_m', which expects an SVE type} } */ ++ svsumopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svuint8_t' arguments} } */ ++ svsumopa_za32_m (0, pg, pg, pg, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svsumopa_za32_m (0, pg, pg, f16, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */ ++ svsumopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 5 of 'svsumopa_za32_m', which expects a vector of unsigned integers} } */ ++ svsumopa_za32_m (0, pg, pg, s8, u16); /* { dg-error {arguments 4 and 5 of 'svsumopa_za32_m' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ ++ svsumopa_za32_m (0, pg, pg, s16, u16); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svint16_t' arguments} } */ ++ ++ svsumopa_za64_m (0, pg, pg, s16, u16); /* { dg-error {ACLE function 'svsumopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_streaming ++{ ++ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_inout("za") ++{ ++ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint16_t s16, svuint16_t u16) ++ __arm_streaming __arm_inout("za") ++{ ++ svsumopa_za64_m (-1, pg, pg, s16, u16); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \[0, 7\]} } */ ++ svsumopa_za64_m (8, pg, pg, s16, u16); /* { dg-error {passing 8 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \[0, 7\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c +index 9591e3d01..5aa0ea671 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c +@@ -4,6 +4,7 @@ + to be diagnosed. Any attempt to call the function before including + arm_sve.h will lead to a link failure. (Same for taking its address, + etc.) */ +-extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); ++extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t) ++ __arm_streaming_compatible; + + #pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c +index 85923611d..ede9a8063 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c +@@ -8,6 +8,7 @@ + explicit definition "wins". This isn't supported behavior though. */ + __SVUint8_t + svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++ __arm_streaming_compatible + { + return x; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c +new file mode 100644 +index 000000000..421979ea0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16, ++ svfloat32_t f32, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svread_hor_za8_m (s8, pg, 0); /* { dg-error {too few arguments to function 'svread_hor_za8_m'} } */ ++ svread_hor_za8_m (s8, pg, 0, 0, 0); /* { dg-error {too many arguments to function 'svread_hor_za8_m'} } */ ++ svread_hor_za8_m (tile, pg, 0, 0); /* { dg-error {passing 'uint32_t'.* to argument 1 of 'svread_hor_za8_m', which expects an SVE type} } */ ++ svread_hor_za8_m (pg, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svbool_t' arguments} } */ ++ svread_hor_za8_m (u16, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svuint16_t' arguments} } */ ++ svread_hor_za8_m (s8, s8, 0, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svread_hor_za8_m', which expects 'svbool_t'} } */ ++ svread_hor_za8_m (s8, pg, tile, 0); /* { dg-error {argument 3 of 'svread_hor_za8_m' must be an integer constant expression} } */ ++ svread_hor_za8_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */ ++ svread_hor_za8_m (s8, pg, 1, 0); /* { dg-error {passing 1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */ ++ svread_hor_za8_m (s8, pg, 0, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svread_hor_za8_m', which expects 'uint32_t'} } */ ++ ++ svread_hor_za16_m (u16, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \[0, 1\]} } */ ++ svread_hor_za16_m (u16, pg, 2, 0); /* { dg-error {passing 2 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \[0, 1\]} } */ ++ ++ svread_hor_za32_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svread_hor_za32_m (f32, pg, 4, 0); /* { dg-error {passing 4 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \[0, 3\]} } */ ++ ++ svread_hor_za64_m (s64, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \[0, 7\]} } */ ++ svread_hor_za64_m (s64, pg, 8, 0); /* { dg-error {passing 8 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \[0, 7\]} } */ ++ ++ svread_hor_za128_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ ++ svread_hor_za128_m (s8, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ ++ svread_hor_za128_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ ++ svread_hor_za128_m (f32, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8) __arm_streaming ++{ ++ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8) __arm_inout("za") ++{ ++ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called when SME streaming mode is enabled} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c +new file mode 100644 +index 000000000..948ce2cb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svint16_t s16, svint32_t s32, svint64_t s64, ++ svfloat32_t f32, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svaddha_za32_m (0, pg, pg); /* { dg-error {too few arguments to function 'svaddha_za32_m'} } */ ++ svaddha_za32_m (0, pg, pg, s32, s32); /* { dg-error {too many arguments to function 'svaddha_za32_m'} } */ ++ svaddha_za32_m (tile, pg, pg, s32); /* { dg-error {argument 1 of 'svaddha_za32_m' must be an integer constant expression} } */ ++ svaddha_za32_m (-1, pg, pg, s32); /* { dg-error {passing -1 to argument 1 of 'svaddha_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svaddha_za32_m (4, pg, pg, s32); /* { dg-error {passing 4 to argument 1 of 'svaddha_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svaddha_za32_m (0, u8, pg, s32); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svaddha_za32_m', which expects 'svbool_t'} } */ ++ svaddha_za32_m (0, pg, u8, s32); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svaddha_za32_m', which expects 'svbool_t'} } */ ++ svaddha_za32_m (0, pg, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svaddha_za32_m', which expects an SVE type} } */ ++ svaddha_za32_m (0, pg, pg, pg); /* { dg-error {'svaddha_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, u8); /* { dg-error {'svaddha_za32_m' has no form that takes 'svuint8_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, s16); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint16_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, f32); /* { dg-error {'svaddha_za32_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, s64); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint64_t' arguments} } */ ++ ++ svaddha_za64_m (0, pg, pg, s64); /* { dg-error {ACLE function 'svaddha_za64_s64_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint32_t s32) __arm_streaming ++{ ++ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint32_t s32) __arm_inout("za") ++{ ++ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint64_t s64) ++ __arm_streaming __arm_inout("za") ++{ ++ svaddha_za64_m (-1, pg, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svaddha_za64_m', which expects a value in the range \[0, 7\]} } */ ++ svaddha_za64_m (8, pg, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svaddha_za64_m', which expects a value in the range \[0, 7\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c +new file mode 100644 +index 000000000..af79c406b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16, ++ svfloat32_t f32, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svwrite_ver_za8_m (0, 0, pg); /* { dg-error {too few arguments to function 'svwrite_ver_za8_m'} } */ ++ svwrite_ver_za8_m (0, 0, pg, s8, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_m'} } */ ++ svwrite_ver_za8_m (tile, 0, pg, s8); /* { dg-error {argument 1 of 'svwrite_ver_za8_m' must be an integer constant expression} } */ ++ svwrite_ver_za8_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */ ++ svwrite_ver_za8_m (1, 0, pg, s8); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */ ++ svwrite_ver_za8_m (0, u8, pg, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_m', which expects 'uint32_t'} } */ ++ svwrite_ver_za8_m (0, 0, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svwrite_ver_za8_m', which expects 'svbool_t'} } */ ++ svwrite_ver_za8_m (0, 0, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svwrite_ver_za8_m', which expects an SVE type} } */ ++ svwrite_ver_za8_m (0, 0, pg, pg); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svbool_t' arguments} } */ ++ svwrite_ver_za8_m (0, 0, pg, u16); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svuint16_t' arguments} } */ ++ ++ svwrite_ver_za16_m (-1, 0, pg, u16); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \[0, 1\]} } */ ++ svwrite_ver_za16_m (2, 0, pg, u16); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \[0, 1\]} } */ ++ ++ svwrite_ver_za32_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \[0, 3\]} } */ ++ svwrite_ver_za32_m (4, 0, pg, f32); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \[0, 3\]} } */ ++ ++ svwrite_ver_za64_m (-1, 0, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \[0, 7\]} } */ ++ svwrite_ver_za64_m (8, 0, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \[0, 7\]} } */ ++ ++ svwrite_ver_za128_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ ++ svwrite_ver_za128_m (16, 0, pg, s8); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ ++ svwrite_ver_za128_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ ++ svwrite_ver_za128_m (16, 0, pg, f32); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8) __arm_streaming ++{ ++ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8) __arm_inout("za") ++{ ++ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called when SME streaming mode is enabled} } */ ++} +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index e2a9ef5fa..292737dae 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10622,7 +10622,8 @@ proc check_effective_target_aarch64_tiny { } { + # various architecture extensions via the .arch_extension pseudo-op. + + foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" +- "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" } { ++ "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" ++ "sme" "sme-i16i64" } { + eval [string map [list FUNC $aarch64_ext] { + proc check_effective_target_aarch64_asm_FUNC_ok { } { + if { [istarget aarch64*-*-*] } { +-- +2.33.0 + |