diff options
Diffstat (limited to '0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch')
-rw-r--r-- | 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch | 4324 |
1 files changed, 4324 insertions, 0 deletions
diff --git a/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch b/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch new file mode 100644 index 0000000..f15e7f6 --- /dev/null +++ b/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch @@ -0,0 +1,4324 @@ +From 1efd433c779f66440facc8ba5cd23bdbdd6672ba Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:26 +0000 +Subject: [PATCH 107/157] [Backport][SME] aarch64: Add support for SME ZA + attributes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3af9ceb631b741095d8eabd055ff7c23d4a69e6f + +SME has an array called ZA that can be enabled and disabled separately +from streaming mode. A status bit called PSTATE.ZA indicates whether +ZA is currently enabled or not. + +In C and C++, the state of PSTATE.ZA is controlled using function +attributes. There are four attributes that can be attached to +function types to indicate that the function shares ZA with its +caller. These are: + +- arm::in("za") +- arm::out("za") +- arm::inout("za") +- arm::preserves("za") + +If a function's type has one of these shared-ZA attributes, +PSTATE.ZA is specified to be 1 on entry to the function and on return +from the function. Otherwise, the caller and callee have separate +ZA contexts; they do not use ZA to share data. + +Although normal non-shared-ZA functions have a separate ZA context +from their callers, nested uses of ZA are expected to be rare. +The ABI therefore defines a cooperative lazy saving scheme that +allows saves and restore of ZA to be kept to a minimum. +(Callers still have the option of doing a full save and restore +if they prefer.) + +Functions that want to use ZA internally have an arm::new("za") +attribute, which tells the compiler to enable PSTATE.ZA for +the duration of the function body. It also tells the compiler +to commit any lazy save initiated by a caller. + +The patch uses various abstract hard registers to track dataflow +relating to ZA. See the comments in the patch for details. + +The lazy save scheme is intended to be transparent to most normal +functions, so that they don't need to be recompiled for SME. +This is reflected in the way that most normal functions ignore +the new hard registers added in the patch. + +As with arm::streaming and arm::streaming_compatible, the attributes are +also available as __arm_<attr>. This has two advantages: it triggers an +error on compilers that don't understand the attributes, and it eases +use on C, where [[...]] attributes were only added in C23. + +gcc/ + * config/aarch64/aarch64-isa-modes.def (ZA_ON): New ISA mode. + * config/aarch64/aarch64-protos.h (aarch64_rdsvl_immediate_p) + (aarch64_output_rdsvl, aarch64_optimize_mode_switching) + (aarch64_restore_za): Declare. + * config/aarch64/constraints.md (UsR): New constraint. + * config/aarch64/aarch64.md (LOWERING_REGNUM, TPIDR_BLOCK_REGNUM) + (SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ZA_FREE_REGNUM) + (ZA_SAVED_REGNUM, ZA_REGNUM, FIRST_FAKE_REGNUM): New constants. + (LAST_FAKE_REGNUM): Likewise. + (UNSPEC_SAVE_NZCV, UNSPEC_RESTORE_NZCV, UNSPEC_SME_VQ): New unspecs. + (arches): Add sme. + (arch_enabled): Handle it. + (*cb<optab><mode>1): Rename to... + (aarch64_cb<optab><mode>1): ...this. + (*movsi_aarch64): Add an alternative for RDSVL. + (*movdi_aarch64): Likewise. + (aarch64_save_nzcv, aarch64_restore_nzcv): New insns. + * config/aarch64/aarch64-sme.md (UNSPEC_SMSTOP_ZA) + (UNSPEC_INITIAL_ZERO_ZA, UNSPEC_TPIDR2_SAVE, UNSPEC_TPIDR2_RESTORE) + (UNSPEC_READ_TPIDR2, UNSPEC_WRITE_TPIDR2, UNSPEC_SETUP_LOCAL_TPIDR2) + (UNSPEC_RESTORE_ZA, UNSPEC_START_PRIVATE_ZA_CALL): New unspecs. + (UNSPEC_END_PRIVATE_ZA_CALL, UNSPEC_COMMIT_LAZY_SAVE): Likewise. + (UNSPECV_ASM_UPDATE_ZA): New unspecv. + (aarch64_tpidr2_save, aarch64_smstart_za, aarch64_smstop_za) + (aarch64_initial_zero_za, aarch64_setup_local_tpidr2) + (aarch64_clear_tpidr2, aarch64_write_tpidr2, aarch64_read_tpidr2) + (aarch64_tpidr2_restore, aarch64_restore_za, aarch64_asm_update_za) + (aarch64_start_private_za_call, aarch64_end_private_za_call) + (aarch64_commit_lazy_save): New patterns. + * config/aarch64/aarch64.h (AARCH64_ISA_ZA_ON, TARGET_ZA): New macros. + (FIXED_REGISTERS, REGISTER_NAMES): Add the new fake ZA registers. + (CALL_USED_REGISTERS): Replace with... + (CALL_REALLY_USED_REGISTERS): ...this and add the fake ZA registers. + (FIRST_PSEUDO_REGISTER): Bump to include the fake ZA registers. + (FAKE_REGS): New register class. + (REG_CLASS_NAMES): Update accordingly. + (REG_CLASS_CONTENTS): Likewise. + (machine_function::tpidr2_block): New member variable. + (machine_function::tpidr2_block_ptr): Likewise. + (machine_function::za_save_buffer): Likewise. + (machine_function::next_asm_update_za_id): Likewise. + (CUMULATIVE_ARGS::shared_za_flags): Likewise. + (aarch64_mode_entity, aarch64_local_sme_state): New enums. + (aarch64_tristate_mode): Likewise. + (OPTIMIZE_MODE_SWITCHING, NUM_MODES_FOR_MODE_SWITCHING): Define. + * config/aarch64/aarch64.cc (AARCH64_STATE_SHARED, AARCH64_STATE_IN) + (AARCH64_STATE_OUT): New constants. + (aarch64_attribute_shared_state_flags): New function. + (aarch64_lookup_shared_state_flags, aarch64_fndecl_has_new_state) + (aarch64_check_state_string, cmp_string_csts): Likewise. + (aarch64_merge_string_arguments, aarch64_check_arm_new_against_type) + (handle_arm_new, handle_arm_shared): Likewise. + (handle_arm_new_za_attribute): New + (aarch64_arm_attribute_table): Add new, preserves, in, out, and inout. + (aarch64_hard_regno_nregs): Handle FAKE_REGS. + (aarch64_hard_regno_mode_ok): Likewise. + (aarch64_fntype_shared_flags, aarch64_fntype_pstate_za): New functions. + (aarch64_fntype_isa_mode): Include aarch64_fntype_pstate_za. + (aarch64_fndecl_has_state, aarch64_fndecl_pstate_za): New functions. + (aarch64_fndecl_isa_mode): Include aarch64_fndecl_pstate_za. + (aarch64_cfun_incoming_pstate_za, aarch64_cfun_shared_flags) + (aarch64_cfun_has_new_state, aarch64_cfun_has_state): New functions. + (aarch64_sme_vq_immediate, aarch64_sme_vq_unspec_p): Likewise. + (aarch64_rdsvl_immediate_p, aarch64_output_rdsvl): Likewise. + (aarch64_expand_mov_immediate): Handle RDSVL immediates. + (aarch64_function_arg): Add the ZA sharing flags as a third limb + of the PARALLEL. + (aarch64_init_cumulative_args): Record the ZA sharing flags. + (aarch64_extra_live_on_entry): New function. Handle the new + ZA-related fake registers. + (aarch64_epilogue_uses): Handle the new ZA-related fake registers. + (aarch64_cannot_force_const_mem): Handle UNSPEC_SME_VQ constants. + (aarch64_get_tpidr2_block, aarch64_get_tpidr2_ptr): New functions. + (aarch64_init_tpidr2_block, aarch64_restore_za): Likewise. + (aarch64_layout_frame): Check whether the current function creates + new ZA state. Record that it clobbers LR if so. + (aarch64_expand_prologue): Handle functions that create new ZA state. + (aarch64_expand_epilogue): Likewise. + (aarch64_create_tpidr2_block): New function. + (aarch64_restore_za): Likewise. + (aarch64_start_call_args): Disallow calls to shared-ZA functions + from functions that have no ZA state. Emit a marker instruction + before calls to private-ZA functions from functions that have + SME state. + (aarch64_expand_call): Add return registers for state that is + managed via attributes. Record the use and clobber information + for the ZA registers. + (aarch64_end_call_args): New function. + (aarch64_regno_regclass): Handle FAKE_REGS. + (aarch64_class_max_nregs): Likewise. + (aarch64_override_options_internal): Require TARGET_SME for + functions that have ZA state. + (aarch64_conditional_register_usage): Handle FAKE_REGS. + (aarch64_mov_operand_p): Handle RDSVL immediates. + (aarch64_comp_type_attributes): Check that the ZA sharing flags + are equal. + (aarch64_merge_decl_attributes): New function. + (aarch64_optimize_mode_switching, aarch64_mode_emit_za_save_buffer) + (aarch64_mode_emit_local_sme_state, aarch64_mode_emit): Likewise. + (aarch64_insn_references_sme_state_p): Likewise. + (aarch64_mode_needed_local_sme_state): Likewise. + (aarch64_mode_needed_za_save_buffer, aarch64_mode_needed): Likewise. + (aarch64_mode_after_local_sme_state, aarch64_mode_after): Likewise. + (aarch64_local_sme_confluence, aarch64_mode_confluence): Likewise. + (aarch64_one_shot_backprop, aarch64_local_sme_backprop): Likewise. + (aarch64_mode_backprop, aarch64_mode_entry): Likewise. + (aarch64_mode_exit, aarch64_mode_eh_handler): Likewise. + (aarch64_mode_priority, aarch64_md_asm_adjust): Likewise. + (TARGET_END_CALL_ARGS, TARGET_MERGE_DECL_ATTRIBUTES): Define. + (TARGET_MODE_EMIT, TARGET_MODE_NEEDED, TARGET_MODE_AFTER): Likewise. + (TARGET_MODE_CONFLUENCE, TARGET_MODE_BACKPROP): Likewise. + (TARGET_MODE_ENTRY, TARGET_MODE_EXIT): Likewise. + (TARGET_MODE_EH_HANDLER, TARGET_MODE_PRIORITY): Likewise. + (TARGET_EXTRA_LIVE_ON_ENTRY): Likewise. + (TARGET_MD_ASM_ADJUST): Use aarch64_md_asm_adjust. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_new, __arm_preserves,__arm_in, __arm_out, and __arm_inout. + +gcc/testsuite/ + * gcc.target/aarch64/sme/za_state_1.c: New test. + * gcc.target/aarch64/sme/za_state_2.c: Likewise. + * gcc.target/aarch64/sme/za_state_3.c: Likewise. + * gcc.target/aarch64/sme/za_state_4.c: Likewise. + * gcc.target/aarch64/sme/za_state_5.c: Likewise. + * gcc.target/aarch64/sme/za_state_6.c: Likewise. + * g++.target/aarch64/sme/exceptions_1.C: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Add ZA macros. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. +--- + gcc/config/aarch64/aarch64-c.cc | 32 + + gcc/config/aarch64/aarch64-isa-modes.def | 5 + + gcc/config/aarch64/aarch64-protos.h | 5 + + gcc/config/aarch64/aarch64-sme.md | 287 ++++ + gcc/config/aarch64/aarch64.cc | 1371 ++++++++++++++++- + gcc/config/aarch64/aarch64.h | 98 +- + gcc/config/aarch64/aarch64.md | 81 +- + gcc/config/aarch64/constraints.md | 6 + + .../g++.target/aarch64/sme/exceptions_1.C | 189 +++ + .../g++.target/aarch64/sme/keyword_macros_1.C | 5 + + .../gcc.target/aarch64/sme/keyword_macros_1.c | 5 + + .../gcc.target/aarch64/sme/za_state_1.c | 154 ++ + .../gcc.target/aarch64/sme/za_state_2.c | 73 + + .../gcc.target/aarch64/sme/za_state_3.c | 31 + + .../gcc.target/aarch64/sme/za_state_4.c | 585 +++++++ + .../gcc.target/aarch64/sme/za_state_5.c | 595 +++++++ + .../gcc.target/aarch64/sme/za_state_6.c | 23 + + 17 files changed, 3523 insertions(+), 22 deletions(-) + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 397745fbd..76c20848f 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -73,6 +73,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + + builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); + ++ builtin_define ("__ARM_STATE_ZA"); ++ + /* Define keyword attributes like __arm_streaming as macros that expand + to the associated [[...]] attribute. Use __extension__ in the attribute + for C, since the [[...]] syntax was only added in C23. */ +@@ -86,6 +88,36 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); + + #undef DEFINE_ARM_KEYWORD_MACRO ++ ++ /* Same for the keyword attributes that take arguments. The snag here ++ is that some old modes warn about or reject variadic arguments. */ ++ auto *cpp_opts = cpp_get_options (parse_in); ++ if (!cpp_opts->traditional) ++ { ++ auto old_warn_variadic_macros = cpp_opts->warn_variadic_macros; ++ auto old_cpp_warn_c90_c99_compat = cpp_opts->cpp_warn_c90_c99_compat; ++ ++ cpp_opts->warn_variadic_macros = false; ++ cpp_opts->cpp_warn_c90_c99_compat = 0; ++ ++#define DEFINE_ARM_KEYWORD_MACRO_ARGS(NAME) \ ++ builtin_define_with_value ("__arm_" NAME "(...)", \ ++ lang_GNU_CXX () \ ++ ? "[[arm::" NAME "(__VA_ARGS__)]]" \ ++ : "[[__extension__ arm::" NAME \ ++ "(__VA_ARGS__)]]", 0); ++ ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("new"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("preserves"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("in"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("out"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("inout"); ++ ++#undef DEFINE_ARM_KEYWORD_MACRO_ARGS ++ ++ cpp_opts->warn_variadic_macros = old_warn_variadic_macros; ++ cpp_opts->cpp_warn_c90_c99_compat = old_cpp_warn_c90_c99_compat; ++ } + } + + /* Undefine/redefine macros that depend on the current backend state and may +diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def +index 5915c98a8..c0ada35bd 100644 +--- a/gcc/config/aarch64/aarch64-isa-modes.def ++++ b/gcc/config/aarch64/aarch64-isa-modes.def +@@ -32,4 +32,9 @@ + DEF_AARCH64_ISA_MODE(SM_ON) + DEF_AARCH64_ISA_MODE(SM_OFF) + ++/* Indicates that PSTATE.ZA is known to be 1. The converse is that ++ PSTATE.ZA might be 0 or 1, depending on whether there is an uncommitted ++ lazy save. */ ++DEF_AARCH64_ISA_MODE(ZA_ON) ++ + #undef DEF_AARCH64_ISA_MODE +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 737f47026..0883ddd1a 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -808,6 +808,8 @@ bool aarch64_sve_addvl_addpl_immediate_p (rtx); + bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); + void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); ++bool aarch64_rdsvl_immediate_p (const_rtx); ++char *aarch64_output_rdsvl (const_rtx); + bool aarch64_mov_operand_p (rtx, machine_mode); + rtx aarch64_reverse_mask (machine_mode, unsigned int); + bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); +@@ -1083,4 +1085,7 @@ extern bool aarch64_harden_sls_blr_p (void); + + extern void aarch64_output_patchable_area (unsigned int, bool); + ++bool aarch64_optimize_mode_switching (aarch64_mode_entity); ++void aarch64_restore_za (rtx); ++ + #endif /* GCC_AARCH64_PROTOS_H */ +diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md +index 52427b4f1..d4973098e 100644 +--- a/gcc/config/aarch64/aarch64-sme.md ++++ b/gcc/config/aarch64/aarch64-sme.md +@@ -23,6 +23,7 @@ + ;; == State management + ;; ---- Test current state + ;; ---- PSTATE.SM management ++;; ---- PSTATE.ZA management + + ;; ========================================================================= + ;; == State management +@@ -169,3 +170,289 @@ + "" + "smstop\tsm" + ) ++ ++;; ------------------------------------------------------------------------- ++;; ---- PSTATE.ZA management ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMSTART ZA ++;; - SMSTOP ZA ++;; plus calls to support routines. ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" [ ++ UNSPEC_SMSTOP_ZA ++ UNSPEC_INITIAL_ZERO_ZA ++ UNSPEC_TPIDR2_SAVE ++ UNSPEC_TPIDR2_RESTORE ++ UNSPEC_READ_TPIDR2 ++ UNSPEC_WRITE_TPIDR2 ++ UNSPEC_SETUP_LOCAL_TPIDR2 ++ UNSPEC_RESTORE_ZA ++ UNSPEC_START_PRIVATE_ZA_CALL ++ UNSPEC_END_PRIVATE_ZA_CALL ++ UNSPEC_COMMIT_LAZY_SAVE ++]) ++ ++(define_c_enum "unspecv" [ ++ UNSPECV_ASM_UPDATE_ZA ++]) ++ ++;; Use the ABI-defined routine to commit an uncommitted lazy save. ++;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM. ++;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming ++;; value of the architected TPIDR2_EL0. ++(define_insn "aarch64_tpidr2_save" ++ [(set (reg:DI ZA_FREE_REGNUM) ++ (unspec:DI [(reg:DI SME_STATE_REGNUM) ++ (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM))] ++ "" ++ "bl\t__arm_tpidr2_save" ++) ++ ++;; Set PSTATE.ZA to 1. If ZA was previously dormant or active, ++;; it remains in the same state afterwards, with the same contents. ++;; Otherwise, it goes from off to on with zeroed contents. ++;; ++;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved ++;; up past this instruction, since that could create an invalid ++;; combination of having an active lazy save while ZA is off. ++;; Create an anti-dependence by reading the current contents ++;; of TPIDR2_SETUP_REGNUM. ++;; ++;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging ++;; to the caller have already been saved. That isn't necessary for this ++;; instruction itself, since PSTATE.ZA is already 1 if it contains data. ++;; But doing this here means that other uses of ZA can just depend on ++;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM. ++(define_insn "aarch64_smstart_za" ++ [(set (reg:DI SME_STATE_REGNUM) ++ (const_int 1)) ++ (use (reg:DI TPIDR2_SETUP_REGNUM)) ++ (use (reg:DI ZA_FREE_REGNUM))] ++ "" ++ "smstart\tza" ++) ++ ++;; Disable ZA and discard its current contents. ++;; ++;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA ++;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past ++;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this. ++;; ++;; We can only turn ZA off once we know that it is free (i.e. doesn't ++;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM ++;; to ensure this. ++;; ++;; We only turn ZA off when the current function's ZA state is dead, ++;; or perhaps if we're sure that the contents are saved. Either way, ++;; we know whether ZA is saved or not. ++(define_insn "aarch64_smstop_za" ++ [(set (reg:DI SME_STATE_REGNUM) ++ (const_int 0)) ++ (set (reg:DI ZA_SAVED_REGNUM) ++ (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))] ++ "" ++ "smstop\tza" ++) ++ ++;; Zero ZA after committing a lazy save. The sequencing is enforced ++;; by reading ZA_FREE_REGNUM. ++(define_insn "aarch64_initial_zero_za" ++ [(set (reg:DI ZA_REGNUM) ++ (unspec:DI [(reg:DI SME_STATE_REGNUM) ++ (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))] ++ "" ++ "zero\t{ za }" ++) ++ ++;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of ++;; the current function's TPIDR2 block. Other instructions can then ++;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block. ++(define_insn "aarch64_setup_local_tpidr2" ++ [(set (reg:DI TPIDR2_BLOCK_REGNUM) ++ (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")] ++ UNSPEC_SETUP_LOCAL_TPIDR2))] ++ "" ++ "" ++ [(set_attr "type" "no_insn")] ++) ++ ++;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save. ++(define_insn "aarch64_clear_tpidr2" ++ [(set (reg:DI TPIDR2_SETUP_REGNUM) ++ (const_int 0))] ++ "" ++ "msr\ttpidr2_el0, xzr" ++) ++ ++;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address ++;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the ++;; pointed-to block. ++(define_insn "aarch64_write_tpidr2" ++ [(set (reg:DI TPIDR2_SETUP_REGNUM) ++ (unspec:DI [(match_operand 0 "pmode_register_operand" "r") ++ (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))] ++ "" ++ "msr\ttpidr2_el0, %0" ++) ++ ++;; Check whether ZA has been saved. The system depends on the value that ++;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM. ++(define_insn "aarch64_read_tpidr2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))] ++ "" ++ "mrs\t%0, tpidr2_el0" ++) ++ ++;; Use the ABI-defined routine to restore lazy-saved ZA contents ++;; from the TPIDR2 block pointed to by X0. ZA must already be active. ++(define_insn "aarch64_tpidr2_restore" ++ [(set (reg:DI ZA_SAVED_REGNUM) ++ (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE)) ++ (set (reg:DI SME_STATE_REGNUM) ++ (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM))] ++ "" ++ "bl\t__arm_tpidr2_restore" ++) ++ ++;; Check whether a lazy save set up by aarch64_save_za was committed ++;; and restore the saved contents if so. ++;; ++;; Operand 0 is the address of the current function's TPIDR2 block. ++(define_insn_and_split "aarch64_restore_za" ++ [(set (reg:DI ZA_SAVED_REGNUM) ++ (unspec:DI [(match_operand 0 "pmode_register_operand" "r") ++ (reg:DI SME_STATE_REGNUM) ++ (reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA)) ++ (clobber (reg:DI R0_REGNUM)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM))] ++ "" ++ "#" ++ "&& epilogue_completed" ++ [(const_int 0)] ++ { ++ auto label = gen_label_rtx (); ++ auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); ++ emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); ++ auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label)); ++ JUMP_LABEL (jump) = label; ++ ++ aarch64_restore_za (operands[0]); ++ emit_label (label); ++ DONE; ++ } ++) ++ ++;; This instruction is emitted after asms that alter ZA, in order to model ++;; the effect on dataflow. The asm itself can't have ZA as an input or ++;; an output, since there is no associated data type. Instead it retains ++;; the original "za" clobber, which on its own would indicate that ZA ++;; is dead. ++;; ++;; The operand is a unique identifier. ++(define_insn "aarch64_asm_update_za" ++ [(set (reg:VNx16QI ZA_REGNUM) ++ (unspec_volatile:VNx16QI ++ [(reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand 0 "const_int_operand")] ++ UNSPECV_ASM_UPDATE_ZA))] ++ "" ++ "" ++ [(set_attr "type" "no_insn")] ++) ++ ++;; This pseudo-instruction is emitted as part of a call to a private-ZA ++;; function from a function with ZA state. It marks a natural place to set ++;; up a lazy save, if that turns out to be necessary. The save itself ++;; is managed by the mode-switching pass. ++(define_insn "aarch64_start_private_za_call" ++ [(set (reg:DI LOWERING_REGNUM) ++ (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))] ++ "" ++ "" ++ [(set_attr "type" "no_insn")] ++) ++ ++;; This pseudo-instruction is emitted as part of a call to a private-ZA ++;; function from a function with ZA state. It marks a natural place to restore ++;; the current function's ZA contents from the lazy save buffer, if that ++;; turns out to be necessary. The save itself is managed by the ++;; mode-switching pass. ++(define_insn "aarch64_end_private_za_call" ++ [(set (reg:DI LOWERING_REGNUM) ++ (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))] ++ "" ++ "" ++ [(set_attr "type" "no_insn")] ++) ++ ++;; This pseudo-instruction is emitted before a private-ZA function uses ++;; PSTATE.ZA state for the first time. The instruction checks whether ++;; ZA currently contains data belonging to a caller and commits the ++;; lazy save if so. ++;; ++;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero ++;; if ZA is live, and should therefore be zeroed after committing a save. ++;; ++;; The instruction is generated by the mode-switching pass. It is a ++;; define_insn_and_split rather than a define_expand because of the ++;; internal control flow. ++(define_insn_and_split "aarch64_commit_lazy_save" ++ [(set (reg:DI ZA_FREE_REGNUM) ++ (unspec:DI [(match_operand 0 "pmode_register_operand" "r") ++ (match_operand 1 "const_int_operand") ++ (reg:DI SME_STATE_REGNUM) ++ (reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE)) ++ (set (reg:DI ZA_REGNUM) ++ (unspec:DI [(reg:DI SME_STATE_REGNUM) ++ (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM))] ++ "" ++ "#" ++ "true" ++ [(const_int 0)] ++ { ++ auto label = gen_label_rtx (); ++ auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label)); ++ JUMP_LABEL (jump) = label; ++ emit_insn (gen_aarch64_tpidr2_save ()); ++ emit_insn (gen_aarch64_clear_tpidr2 ()); ++ if (INTVAL (operands[1]) != 0) ++ emit_insn (gen_aarch64_initial_zero_za ()); ++ emit_label (label); ++ DONE; ++ } ++) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 82f8e574e..a6e996c5b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -91,6 +91,26 @@ + /* Defined for convenience. */ + #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) + ++/* Flags that describe how a function shares certain architectural state ++ with its callers. ++ ++ - AARCH64_STATE_SHARED indicates that the function does share the state ++ with callers. ++ ++ - AARCH64_STATE_IN indicates that the function reads (or might read) the ++ incoming state. The converse is that the function ignores the incoming ++ state. ++ ++ - AARCH64_STATE_OUT indicates that the function returns new state. ++ The converse is that the state on return is the same as it was on entry. ++ ++ A function that partially modifies the state treats it as both IN ++ and OUT (because the value on return depends to some extent on the ++ value on input). */ ++constexpr auto AARCH64_STATE_SHARED = 1U << 0; ++constexpr auto AARCH64_STATE_IN = 1U << 1; ++constexpr auto AARCH64_STATE_OUT = 1U << 2; ++ + /* Information about a legitimate vector immediate operand. */ + struct simd_immediate_info + { +@@ -2959,6 +2979,151 @@ static const struct processor all_cores[] = + /* The current tuning set. */ + struct tune_params aarch64_tune_params = generic_tunings; + ++/* If NAME is the name of an arm:: attribute that describes shared state, ++ return its associated AARCH64_STATE_* flags, otherwise return 0. */ ++static unsigned int ++aarch64_attribute_shared_state_flags (const char *name) ++{ ++ if (strcmp (name, "in") == 0) ++ return AARCH64_STATE_SHARED | AARCH64_STATE_IN; ++ if (strcmp (name, "inout") == 0) ++ return AARCH64_STATE_SHARED | AARCH64_STATE_IN | AARCH64_STATE_OUT; ++ if (strcmp (name, "out") == 0) ++ return AARCH64_STATE_SHARED | AARCH64_STATE_OUT; ++ if (strcmp (name, "preserves") == 0) ++ return AARCH64_STATE_SHARED; ++ return 0; ++} ++ ++/* See whether attribute list ATTRS has any sharing information ++ for state STATE_NAME. Return the associated state flags if so, ++ otherwise return 0. */ ++static unsigned int ++aarch64_lookup_shared_state_flags (tree attrs, const char *state_name) ++{ ++ for (tree attr = attrs; attr; attr = TREE_CHAIN (attr)) ++ { ++ if (!cxx11_attribute_p (attr)) ++ continue; ++ ++ auto ns = IDENTIFIER_POINTER (TREE_PURPOSE (TREE_PURPOSE (attr))); ++ if (strcmp (ns, "arm") != 0) ++ continue; ++ ++ auto attr_name = IDENTIFIER_POINTER (TREE_VALUE (TREE_PURPOSE (attr))); ++ auto flags = aarch64_attribute_shared_state_flags (attr_name); ++ if (!flags) ++ continue; ++ ++ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (TREE_CODE (value) == STRING_CST ++ && strcmp (TREE_STRING_POINTER (value), state_name) == 0) ++ return flags; ++ } ++ } ++ return 0; ++} ++ ++/* Return true if DECL creates a new scope for state STATE_STRING. */ ++static bool ++aarch64_fndecl_has_new_state (const_tree decl, const char *state_name) ++{ ++ if (tree attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl))) ++ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (TREE_CODE (value) == STRING_CST ++ && strcmp (TREE_STRING_POINTER (value), state_name) == 0) ++ return true; ++ } ++ return false; ++} ++ ++/* Return true if attribute argument VALUE is a recognized state string, ++ otherwise report an error. NAME is the name of the attribute to which ++ VALUE is being passed. */ ++static bool ++aarch64_check_state_string (tree name, tree value) ++{ ++ if (TREE_CODE (value) != STRING_CST) ++ { ++ error ("the arguments to %qE must be constant strings", name); ++ return false; ++ } ++ ++ const char *state_name = TREE_STRING_POINTER (value); ++ if (strcmp (state_name, "za") != 0) ++ { ++ error ("unrecognized state string %qs", state_name); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* qsort callback to compare two STRING_CSTs. */ ++static int ++cmp_string_csts (const void *a, const void *b) ++{ ++ return strcmp (TREE_STRING_POINTER (*(const_tree const *) a), ++ TREE_STRING_POINTER (*(const_tree const *) b)); ++} ++ ++/* Canonicalize a list of state strings. ARGS contains the arguments to ++ a new attribute while OLD_ATTR, if nonnull, contains a previous attribute ++ of the same type. If CAN_MERGE_IN_PLACE, it is safe to adjust OLD_ATTR's ++ arguments and drop the new attribute. Otherwise, the new attribute must ++ be kept and ARGS must include the information in OLD_ATTR. ++ ++ In both cases, the new arguments must be a sorted list of state strings ++ with duplicates removed. ++ ++ Return true if new attribute should be kept, false if it should be ++ dropped. */ ++static bool ++aarch64_merge_string_arguments (tree args, tree old_attr, ++ bool can_merge_in_place) ++{ ++ /* Get a sorted list of all state strings (including duplicates). */ ++ auto add_args = [](vec<tree> &strings, const_tree args) ++ { ++ for (const_tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ if (TREE_CODE (TREE_VALUE (arg)) == STRING_CST) ++ strings.safe_push (TREE_VALUE (arg)); ++ }; ++ auto_vec<tree, 16> strings; ++ add_args (strings, args); ++ if (old_attr) ++ add_args (strings, TREE_VALUE (old_attr)); ++ strings.qsort (cmp_string_csts); ++ ++ /* The list can be empty if there was no previous attribute and if all ++ the new arguments are erroneous. Drop the attribute in that case. */ ++ if (strings.is_empty ()) ++ return false; ++ ++ /* Destructively modify one of the argument lists, removing duplicates ++ on the fly. */ ++ bool use_old_attr = old_attr && can_merge_in_place; ++ tree *end = use_old_attr ? &TREE_VALUE (old_attr) : &args; ++ tree prev = NULL_TREE; ++ for (tree arg : strings) ++ { ++ if (prev && simple_cst_equal (arg, prev)) ++ continue; ++ prev = arg; ++ if (!*end) ++ *end = tree_cons (NULL_TREE, arg, NULL_TREE); ++ else ++ TREE_VALUE (*end) = arg; ++ end = &TREE_CHAIN (*end); ++ } ++ *end = NULL_TREE; ++ return !use_old_attr; ++} ++ + /* Check whether an 'aarch64_vector_pcs' attribute is valid. */ + + static tree +@@ -2987,6 +3152,101 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, + gcc_unreachable (); + } + ++/* Return true if arm::new(ARGS) is compatible with the type of decl DECL, ++ otherwise report an error. */ ++static bool ++aarch64_check_arm_new_against_type (tree args, tree decl) ++{ ++ tree type_attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); ++ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (TREE_CODE (value) == STRING_CST) ++ { ++ const char *state_name = TREE_STRING_POINTER (value); ++ if (aarch64_lookup_shared_state_flags (type_attrs, state_name)) ++ { ++ error_at (DECL_SOURCE_LOCATION (decl), ++ "cannot create a new %qs scope since %qs is shared" ++ " with callers", state_name, state_name); ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ ++/* Callback for arm::new attributes. */ ++static tree ++handle_arm_new (tree *node, tree name, tree args, int, bool *no_add_attrs) ++{ ++ tree decl = *node; ++ if (TREE_CODE (decl) != FUNCTION_DECL) ++ { ++ error ("%qE attribute applies only to function definitions", name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ if (TREE_TYPE (decl) == error_mark_node) ++ { ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ aarch64_check_state_string (name, TREE_VALUE (arg)); ++ ++ if (!aarch64_check_arm_new_against_type (args, decl)) ++ { ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ /* If there is an old attribute, we should try to update it in-place, ++ so that there is only one (definitive) arm::new attribute on the decl. */ ++ tree old_attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl)); ++ if (!aarch64_merge_string_arguments (args, old_attr, true)) ++ *no_add_attrs = true; ++ ++ return NULL_TREE; ++} ++ ++/* Callback for arm::{in,out,inout,preserves} attributes. */ ++static tree ++handle_arm_shared (tree *node, tree name, tree args, ++ int, bool *no_add_attrs) ++{ ++ tree type = *node; ++ tree old_attrs = TYPE_ATTRIBUTES (type); ++ auto flags = aarch64_attribute_shared_state_flags (IDENTIFIER_POINTER (name)); ++ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (aarch64_check_state_string (name, value)) ++ { ++ const char *state_name = TREE_STRING_POINTER (value); ++ auto old_flags = aarch64_lookup_shared_state_flags (old_attrs, ++ state_name); ++ if (old_flags && old_flags != flags) ++ { ++ error ("inconsistent attributes for state %qs", state_name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ } ++ } ++ ++ /* We can't update an old attribute in-place, since types are shared. ++ Instead make sure that this new attribute contains all the ++ information, so that the old attribute becomes redundant. */ ++ tree old_attr = lookup_attribute ("arm", IDENTIFIER_POINTER (name), ++ old_attrs); ++ if (!aarch64_merge_string_arguments (args, old_attr, false)) ++ *no_add_attrs = true; ++ ++ return NULL_TREE; ++} ++ + /* Mutually-exclusive function type attributes for controlling PSTATE.SM. */ + static const struct attribute_spec::exclusions attr_streaming_exclusions[] = + { +@@ -3023,6 +3283,16 @@ static const attribute_spec aarch64_arm_attributes[] = + NULL, attr_streaming_exclusions }, + { "streaming_compatible", 0, 0, false, true, true, true, + NULL, attr_streaming_exclusions }, ++ { "new", 1, -1, true, false, false, false, ++ handle_arm_new, NULL }, ++ { "preserves", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL }, ++ { "in", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL }, ++ { "out", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL }, ++ { "inout", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL } + }; + + static const scoped_attribute_specs aarch64_arm_attribute_table = +@@ -4202,6 +4472,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) + case PR_HI_REGS: + case FFR_REGS: + case PR_AND_FFR_REGS: ++ case FAKE_REGS: + return 1; + default: + return CEIL (lowest_size, UNITS_PER_WORD); +@@ -4232,6 +4503,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) + if (pr_or_ffr_regnum_p (regno)) + return false; + ++ /* These registers are abstract; their modes don't matter. */ ++ if (FAKE_REGNUM_P (regno)) ++ return true; ++ + if (regno == SP_REGNUM) + /* The purpose of comparing with ptr_mode is to support the + global register variable associated with the stack pointer +@@ -4352,12 +4627,34 @@ aarch64_fntype_pstate_sm (const_tree fntype) + return AARCH64_FL_SM_OFF; + } + ++/* Return state flags that describe whether and how functions of type ++ FNTYPE share state STATE_NAME with their callers. */ ++ ++static unsigned int ++aarch64_fntype_shared_flags (const_tree fntype, const char *state_name) ++{ ++ return aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (fntype), ++ state_name); ++} ++ ++/* Return the state of PSTATE.ZA on entry to functions of type FNTYPE. */ ++ ++static aarch64_feature_flags ++aarch64_fntype_pstate_za (const_tree fntype) ++{ ++ if (aarch64_fntype_shared_flags (fntype, "za")) ++ return AARCH64_FL_ZA_ON; ++ ++ return 0; ++} ++ + /* Return the ISA mode on entry to functions of type FNTYPE. */ + + static aarch64_feature_flags + aarch64_fntype_isa_mode (const_tree fntype) + { +- return aarch64_fntype_pstate_sm (fntype); ++ return (aarch64_fntype_pstate_sm (fntype) ++ | aarch64_fntype_pstate_za (fntype)); + } + + /* Return the state of PSTATE.SM when compiling the body of +@@ -4370,13 +4667,37 @@ aarch64_fndecl_pstate_sm (const_tree fndecl) + return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); + } + ++/* Return true if function FNDECL has state STATE_NAME, either by creating ++ new state itself or by sharing state with callers. */ ++ ++static bool ++aarch64_fndecl_has_state (tree fndecl, const char *state_name) ++{ ++ return (aarch64_fndecl_has_new_state (fndecl, state_name) ++ || aarch64_fntype_shared_flags (TREE_TYPE (fndecl), ++ state_name) != 0); ++} ++ ++/* Return the state of PSTATE.ZA when compiling the body of function FNDECL. ++ This might be different from the state of PSTATE.ZA on entry. */ ++ ++static aarch64_feature_flags ++aarch64_fndecl_pstate_za (const_tree fndecl) ++{ ++ if (aarch64_fndecl_has_new_state (fndecl, "za")) ++ return AARCH64_FL_ZA_ON; ++ ++ return aarch64_fntype_pstate_za (TREE_TYPE (fndecl)); ++} ++ + /* Return the ISA mode that should be used to compile the body of + function FNDECL. */ + + static aarch64_feature_flags + aarch64_fndecl_isa_mode (const_tree fndecl) + { +- return aarch64_fndecl_pstate_sm (fndecl); ++ return (aarch64_fndecl_pstate_sm (fndecl) ++ | aarch64_fndecl_pstate_za (fndecl)); + } + + /* Return the state of PSTATE.SM on entry to the current function. +@@ -4389,6 +4710,44 @@ aarch64_cfun_incoming_pstate_sm () + return aarch64_fntype_pstate_sm (TREE_TYPE (cfun->decl)); + } + ++/* Return the state of PSTATE.ZA on entry to the current function. ++ This might be different from the state of PSTATE.ZA in the function ++ body. */ ++ ++static aarch64_feature_flags ++aarch64_cfun_incoming_pstate_za () ++{ ++ return aarch64_fntype_pstate_za (TREE_TYPE (cfun->decl)); ++} ++ ++/* Return state flags that describe whether and how the current function shares ++ state STATE_NAME with callers. */ ++ ++static unsigned int ++aarch64_cfun_shared_flags (const char *state_name) ++{ ++ return aarch64_fntype_shared_flags (TREE_TYPE (cfun->decl), state_name); ++} ++ ++/* Return true if the current function creates new state of type STATE_NAME ++ (as opposed to sharing the state with its callers or ignoring the state ++ altogether). */ ++ ++static bool ++aarch64_cfun_has_new_state (const char *state_name) ++{ ++ return aarch64_fndecl_has_new_state (cfun->decl, state_name); ++} ++ ++/* Return true if the current function has state STATE_NAME, either by ++ creating new state itself or by sharing state with callers. */ ++ ++static bool ++aarch64_cfun_has_state (const char *state_name) ++{ ++ return aarch64_fndecl_has_state (cfun->decl, state_name); ++} ++ + /* Return true if a call from the current function to a function with + ISA mode CALLEE_MODE would involve a change to PSTATE.SM around + the BL instruction. */ +@@ -5952,6 +6311,74 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + factor, nelts_per_vq); + } + ++/* Return a constant that represents FACTOR multiplied by the ++ number of 128-bit quadwords in an SME vector. ISA_MODE is the ++ ISA mode in which the calculation is being performed. */ ++ ++static rtx ++aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor, ++ aarch64_feature_flags isa_mode) ++{ ++ gcc_assert (aarch64_sve_rdvl_factor_p (factor)); ++ if (isa_mode & AARCH64_FL_SM_ON) ++ /* We're in streaming mode, so we can use normal poly-int values. */ ++ return gen_int_mode ({ factor, factor }, mode); ++ ++ rtvec vec = gen_rtvec (1, gen_int_mode (factor, SImode)); ++ rtx unspec = gen_rtx_UNSPEC (mode, vec, UNSPEC_SME_VQ); ++ return gen_rtx_CONST (mode, unspec); ++} ++ ++/* Return true if X is a constant that represents some number X ++ multiplied by the number of quadwords in an SME vector. Store this X ++ in *FACTOR if so. */ ++ ++static bool ++aarch64_sme_vq_unspec_p (const_rtx x, HOST_WIDE_INT *factor) ++{ ++ if (!TARGET_SME || GET_CODE (x) != CONST) ++ return false; ++ ++ x = XEXP (x, 0); ++ if (GET_CODE (x) != UNSPEC ++ || XINT (x, 1) != UNSPEC_SME_VQ ++ || XVECLEN (x, 0) != 1) ++ return false; ++ ++ x = XVECEXP (x, 0, 0); ++ if (!CONST_INT_P (x)) ++ return false; ++ ++ *factor = INTVAL (x); ++ return true; ++} ++ ++/* Return true if X is a constant that represents some number Y ++ multiplied by the number of quadwords in an SME vector, and if ++ that Y is in the range of RDSVL. */ ++ ++bool ++aarch64_rdsvl_immediate_p (const_rtx x) ++{ ++ HOST_WIDE_INT factor; ++ return (aarch64_sme_vq_unspec_p (x, &factor) ++ && aarch64_sve_rdvl_factor_p (factor)); ++} ++ ++/* Return the asm string for an RDSVL instruction that calculates X, ++ which is a constant that satisfies aarch64_rdsvl_immediate_p. */ ++ ++char * ++aarch64_output_rdsvl (const_rtx x) ++{ ++ gcc_assert (aarch64_rdsvl_immediate_p (x)); ++ static char buffer[sizeof ("rdsvl\t%x0, #-") + 3 * sizeof (int)]; ++ x = XVECEXP (XEXP (x, 0), 0, 0); ++ snprintf (buffer, sizeof (buffer), "rdsvl\t%%x0, #%d", ++ (int) INTVAL (x) / 16); ++ return buffer; ++} ++ + /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ + + static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = +@@ -7717,6 +8144,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + return; + } + ++ if (aarch64_rdsvl_immediate_p (base)) ++ { ++ /* We could handle non-constant offsets if they are ever ++ generated. */ ++ gcc_assert (const_offset == 0); ++ emit_insn (gen_rtx_SET (dest, imm)); ++ return; ++ } ++ + sty = aarch64_classify_symbol (base, const_offset); + switch (sty) + { +@@ -8732,8 +9168,10 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) + rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode, + pcum->pcs_variant); + rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum); +- return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, abi_cookie, +- sme_mode_switch_args)); ++ rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode); ++ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, abi_cookie, ++ sme_mode_switch_args, ++ shared_za_flags)); + } + + aarch64_layout_arg (pcum_v, arg); +@@ -8744,7 +9182,7 @@ void + aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + const_tree fntype, + rtx libname ATTRIBUTE_UNUSED, +- const_tree fndecl ATTRIBUTE_UNUSED, ++ const_tree fndecl, + unsigned n_named ATTRIBUTE_UNUSED, + bool silent_p) + { +@@ -8769,6 +9207,8 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + pcum->aapcs_stack_words = 0; + pcum->aapcs_stack_size = 0; + pcum->silent_p = silent_p; ++ pcum->shared_za_flags ++ = (fntype ? aarch64_fntype_shared_flags (fntype, "za") : 0U); + pcum->num_sme_mode_switch_args = 0; + + if (!silent_p +@@ -10803,14 +11243,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + } + } + ++/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. */ ++ ++void ++aarch64_extra_live_on_entry (bitmap regs) ++{ ++ if (TARGET_ZA) ++ { ++ bitmap_set_bit (regs, LOWERING_REGNUM); ++ bitmap_set_bit (regs, SME_STATE_REGNUM); ++ bitmap_set_bit (regs, TPIDR2_SETUP_REGNUM); ++ bitmap_set_bit (regs, ZA_FREE_REGNUM); ++ bitmap_set_bit (regs, ZA_SAVED_REGNUM); ++ ++ /* The only time ZA can't have live contents on entry is when ++ the function explicitly treats it as a pure output. */ ++ auto za_flags = aarch64_cfun_shared_flags ("za"); ++ if (za_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT)) ++ bitmap_set_bit (regs, ZA_REGNUM); ++ } ++} ++ + /* Return 1 if the register is used by the epilogue. We need to say the + return register is used, but only after epilogue generation is complete. + Note that in the case of sibcalls, the values "used by the epilogue" are +- considered live at the start of the called function. +- +- For SIMD functions we need to return 1 for FP registers that are saved and +- restored by a function but are not zero in call_used_regs. If we do not do +- this optimizations may remove the restore of the register. */ ++ considered live at the start of the called function. */ + + int + aarch64_epilogue_uses (int regno) +@@ -10820,6 +11277,18 @@ aarch64_epilogue_uses (int regno) + if (regno == LR_REGNUM) + return 1; + } ++ if (regno == LOWERING_REGNUM && TARGET_ZA) ++ return 1; ++ if (regno == SME_STATE_REGNUM && TARGET_ZA) ++ return 1; ++ if (regno == TPIDR2_SETUP_REGNUM && TARGET_ZA) ++ return 1; ++ /* If the function shares SME state with its caller, ensure that that ++ data is not in the lazy save buffer on exit. */ ++ if (regno == ZA_SAVED_REGNUM && aarch64_cfun_incoming_pstate_za () != 0) ++ return 1; ++ if (regno == ZA_REGNUM && aarch64_cfun_shared_flags ("za") != 0) ++ return 1; + return 0; + } + +@@ -11501,8 +11970,10 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + + /* There's no way to calculate VL-based values using relocations. */ + subrtx_iterator::array_type array; ++ HOST_WIDE_INT factor; + FOR_EACH_SUBRTX (iter, array, x, ALL) +- if (GET_CODE (*iter) == CONST_POLY_INT) ++ if (GET_CODE (*iter) == CONST_POLY_INT ++ || aarch64_sme_vq_unspec_p (x, &factor)) + return true; + + poly_int64 offset; +@@ -12364,6 +12835,72 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) + return true; + } + ++/* Return a fresh memory reference to the current function's TPIDR2 block, ++ creating a block if necessary. */ ++ ++static rtx ++aarch64_get_tpidr2_block () ++{ ++ if (!cfun->machine->tpidr2_block) ++ /* The TPIDR2 block is 16 bytes in size and must be aligned to a 128-bit ++ boundary. */ ++ cfun->machine->tpidr2_block = assign_stack_local (V16QImode, 16, 128); ++ return copy_rtx (cfun->machine->tpidr2_block); ++} ++ ++/* Return a fresh register that points to the current function's ++ TPIDR2 block, creating a block if necessary. */ ++ ++static rtx ++aarch64_get_tpidr2_ptr () ++{ ++ rtx block = aarch64_get_tpidr2_block (); ++ return force_reg (Pmode, XEXP (block, 0)); ++} ++ ++/* Emit instructions to allocate a ZA lazy save buffer and initialize the ++ current function's TPIDR2 block. */ ++ ++static void ++aarch64_init_tpidr2_block () ++{ ++ rtx block = aarch64_get_tpidr2_block (); ++ ++ /* The ZA save buffer is SVL.B*SVL.B bytes in size. */ ++ rtx svl_bytes = aarch64_sme_vq_immediate (Pmode, 16, AARCH64_ISA_MODE); ++ rtx svl_bytes_reg = force_reg (DImode, svl_bytes); ++ rtx za_size = expand_simple_binop (Pmode, MULT, svl_bytes_reg, ++ svl_bytes_reg, NULL, 0, OPTAB_LIB_WIDEN); ++ rtx za_save_buffer = allocate_dynamic_stack_space (za_size, 128, ++ BITS_PER_UNIT, -1, true); ++ za_save_buffer = force_reg (Pmode, za_save_buffer); ++ cfun->machine->za_save_buffer = za_save_buffer; ++ ++ /* The first word of the block points to the save buffer and the second ++ word is the number of ZA slices to save. */ ++ rtx block_0 = adjust_address (block, DImode, 0); ++ rtx block_8 = adjust_address (block, DImode, 8); ++ emit_insn (gen_store_pair_dw_didi (block_0, za_save_buffer, ++ block_8, svl_bytes_reg)); ++ ++ if (!memory_operand (block, V16QImode)) ++ block = replace_equiv_address (block, force_reg (Pmode, XEXP (block, 0))); ++ emit_insn (gen_aarch64_setup_local_tpidr2 (block)); ++} ++ ++/* Restore the contents of ZA from the lazy save buffer, given that ++ register TPIDR2_BLOCK points to the current function's TPIDR2 block. ++ PSTATE.ZA is known to be 0 and TPIDR2_EL0 is known to be null. */ ++ ++void ++aarch64_restore_za (rtx tpidr2_block) ++{ ++ emit_insn (gen_aarch64_smstart_za ()); ++ if (REGNO (tpidr2_block) != R0_REGNUM) ++ emit_move_insn (gen_rtx_REG (Pmode, R0_REGNUM), tpidr2_block); ++ emit_insn (gen_aarch64_tpidr2_restore ()); ++} ++ + /* Implement TARGET_START_CALL_ARGS. */ + + static void +@@ -12379,6 +12916,20 @@ aarch64_start_call_args (cumulative_args_t ca_v) + " option %<-march%>, or by using the %<target%>" + " attribute or pragma", "sme"); + } ++ ++ if ((ca->shared_za_flags & (AARCH64_STATE_IN | AARCH64_STATE_OUT)) ++ && !aarch64_cfun_has_state ("za")) ++ error ("call to a function that shares %qs state from a function" ++ " that has no %qs state", "za", "za"); ++ else if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON)) ++ error ("call to a function that shares SME state from a function" ++ " that has no SME state"); ++ ++ /* If this is a call to a private ZA function, emit a marker to ++ indicate where any necessary set-up code could be inserted. ++ The code itself is inserted by the mode-switching pass. */ ++ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON)) ++ emit_insn (gen_aarch64_start_private_za_call ()); + } + + /* This function is used by the call expanders of the machine description. +@@ -12391,6 +12942,8 @@ aarch64_start_call_args (cumulative_args_t ca_v) + The second element is a PARALLEL that lists all the argument + registers that need to be saved and restored around a change + in PSTATE.SM, or const0_rtx if no such switch is needed. ++ The third element is a const_int that contains the sharing flags ++ for ZA. + SIBCALL indicates whether this function call is normal call or sibling call. + It will generate different pattern accordingly. */ + +@@ -12403,10 +12956,12 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + + rtx callee_abi = cookie; + rtx sme_mode_switch_args = const0_rtx; ++ unsigned int shared_za_flags = 0; + if (GET_CODE (cookie) == PARALLEL) + { + callee_abi = XVECEXP (cookie, 0, 0); + sme_mode_switch_args = XVECEXP (cookie, 0, 1); ++ shared_za_flags = INTVAL (XVECEXP (cookie, 0, 2)); + } + + gcc_assert (CONST_INT_P (callee_abi)); +@@ -12426,6 +12981,41 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + : !REG_P (callee)) + XEXP (mem, 0) = force_reg (mode, callee); + ++ /* Accumulate the return values, including state that is shared via ++ attributes. */ ++ auto_vec<rtx, 8> return_values; ++ if (result) ++ { ++ if (GET_CODE (result) == PARALLEL) ++ for (int i = 0; i < XVECLEN (result, 0); ++i) ++ return_values.safe_push (XVECEXP (result, 0, i)); ++ else ++ return_values.safe_push (result); ++ } ++ unsigned int orig_num_return_values = return_values.length (); ++ if (shared_za_flags & AARCH64_STATE_OUT) ++ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_REGNUM)); ++ /* When calling private-ZA functions from functions with ZA state, ++ we want to know whether the call committed a lazy save. */ ++ if (TARGET_ZA && !shared_za_flags) ++ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM)); ++ ++ /* Create the new return value, if necessary. */ ++ if (orig_num_return_values != return_values.length ()) ++ { ++ if (return_values.length () == 1) ++ result = return_values[0]; ++ else ++ { ++ for (rtx &x : return_values) ++ if (GET_CODE (x) != EXPR_LIST) ++ x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); ++ rtvec v = gen_rtvec_v (return_values.length (), ++ return_values.address ()); ++ result = gen_rtx_PARALLEL (VOIDmode, v); ++ } ++ } ++ + call = gen_rtx_CALL (VOIDmode, mem, const0_rtx); + + if (result != NULL_RTX) +@@ -12492,6 +13082,50 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + + cfun->machine->call_switches_pstate_sm = true; + } ++ ++ /* Add any ZA-related information. ++ ZA_REGNUM represents the current function's ZA state, rather than ++ the contents of the ZA register itself. We ensure that the function's ++ ZA state is preserved by private-ZA call sequences, so the call itself ++ does not use or clobber ZA_REGNUM. */ ++ if (TARGET_ZA) ++ { ++ /* The callee requires ZA to be active if the callee is shared-ZA, ++ otherwise it requires ZA to be dormant or off. The state of ZA is ++ captured by a combination of SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ++ and ZA_SAVED_REGNUM. */ ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (DImode, SME_STATE_REGNUM)); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (DImode, TPIDR2_SETUP_REGNUM)); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM)); ++ ++ /* Keep the aarch64_start/end_private_za_call markers live. */ ++ if (!(callee_isa_mode & AARCH64_FL_ZA_ON)) ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, LOWERING_REGNUM)); ++ ++ /* If the callee is a shared-ZA function, record whether it uses the ++ current value of ZA. */ ++ if (shared_za_flags & AARCH64_STATE_IN) ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, ZA_REGNUM)); ++ } ++} ++ ++/* Implement TARGET_END_CALL_ARGS. */ ++ ++static void ++aarch64_end_call_args (cumulative_args_t ca_v) ++{ ++ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); ++ ++ /* If this is a call to a private ZA function, emit a marker to ++ indicate where any necessary restoration code could be inserted. ++ The code itself is inserted by the mode-switching pass. */ ++ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON)) ++ emit_insn (gen_aarch64_end_private_za_call ()); + } + + /* Emit call insn with PAT and do aarch64-specific handling. */ +@@ -13602,6 +14236,9 @@ aarch64_regno_regclass (unsigned regno) + if (regno == FFR_REGNUM || regno == FFRT_REGNUM) + return FFR_REGS; + ++ if (FAKE_REGNUM_P (regno)) ++ return FAKE_REGS; ++ + return NO_REGS; + } + +@@ -13957,12 +14594,14 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) + return (vec_flags & VEC_ADVSIMD + ? CEIL (lowest_size, UNITS_PER_VREG) + : CEIL (lowest_size, UNITS_PER_WORD)); ++ + case STACK_REG: + case PR_REGS: + case PR_LO_REGS: + case PR_HI_REGS: + case FFR_REGS: + case PR_AND_FFR_REGS: ++ case FAKE_REGS: + return 1; + + case NO_REGS: +@@ -19002,10 +19641,14 @@ aarch64_override_options_internal (struct gcc_options *opts) + && !fixed_regs[R18_REGNUM]) + error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); + +- if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) ++ if ((opts->x_aarch64_isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON)) + && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME)) + { +- error ("streaming functions require the ISA extension %qs", "sme"); ++ if (opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) ++ error ("streaming functions require the ISA extension %qs", "sme"); ++ else ++ error ("functions with SME state require the ISA extension %qs", ++ "sme"); + inform (input_location, "you can enable %qs using the command-line" + " option %<-march%>, or by using the %<target%>" + " attribute or pragma", "sme"); +@@ -21341,6 +21984,8 @@ aarch64_conditional_register_usage (void) + CLEAR_HARD_REG_BIT (operand_reg_set, VG_REGNUM); + CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); + CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); ++ for (int i = FIRST_FAKE_REGNUM; i <= LAST_FAKE_REGNUM; ++i) ++ CLEAR_HARD_REG_BIT (operand_reg_set, i); + + /* When tracking speculation, we need a couple of call-clobbered registers + to track the speculation state. It would be nice to just use +@@ -22795,6 +23440,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) + || aarch64_sve_rdvl_immediate_p (x))) + return true; + ++ if (aarch64_rdsvl_immediate_p (x)) ++ return true; ++ + return aarch64_classify_symbolic_expression (x) + == SYMBOL_TINY_ABSOLUTE; + } +@@ -28266,9 +28914,45 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2) + return 0; + if (!check_attr ("arm", "streaming_compatible")) + return 0; ++ if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "za") ++ != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "za")) ++ return 0; + return 1; + } + ++/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ ++ ++static tree ++aarch64_merge_decl_attributes (tree olddecl, tree newdecl) ++{ ++ tree old_attrs = DECL_ATTRIBUTES (olddecl); ++ tree old_new = lookup_attribute ("arm", "new", old_attrs); ++ ++ tree new_attrs = DECL_ATTRIBUTES (newdecl); ++ tree new_new = lookup_attribute ("arm", "new", new_attrs); ++ ++ if (DECL_INITIAL (olddecl) && new_new) ++ { ++ error ("cannot apply attribute %qs to %q+D after the function" ++ " has been defined", "new", newdecl); ++ inform (DECL_SOURCE_LOCATION (olddecl), "%q+D defined here", ++ newdecl); ++ } ++ else ++ { ++ if (old_new && new_new) ++ { ++ old_attrs = remove_attribute ("arm", "new", old_attrs); ++ TREE_VALUE (new_new) = chainon (TREE_VALUE (new_new), ++ TREE_VALUE (old_new)); ++ } ++ if (new_new) ++ aarch64_check_arm_new_against_type (TREE_VALUE (new_new), newdecl); ++ } ++ ++ return merge_attributes (old_attrs, new_attrs); ++} ++ + /* Implement TARGET_GET_MULTILIB_ABI_NAME */ + + static const char * +@@ -28634,6 +29318,629 @@ aarch64_indirect_call_asm (rtx addr) + return ""; + } + ++/* Implement OPTIMIZE_MODE_SWITCHING. */ ++ ++bool ++aarch64_optimize_mode_switching (aarch64_mode_entity entity) ++{ ++ bool have_sme_state = (aarch64_cfun_incoming_pstate_za () != 0 ++ || (aarch64_cfun_has_new_state ("za") ++ && df_regs_ever_live_p (ZA_REGNUM))); ++ ++ if (have_sme_state && nonlocal_goto_handler_labels) ++ { ++ static bool reported; ++ if (!reported) ++ { ++ sorry ("non-local gotos in functions with SME state"); ++ reported = true; ++ } ++ } ++ ++ switch (entity) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return have_sme_state && !nonlocal_goto_handler_labels; ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EMIT for ZA_SAVE_BUFFER. */ ++ ++static void ++aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode mode, ++ aarch64_tristate_mode prev_mode) ++{ ++ if (mode == aarch64_tristate_mode::YES) ++ { ++ gcc_assert (prev_mode == aarch64_tristate_mode::NO); ++ aarch64_init_tpidr2_block (); ++ } ++ else ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EMIT for LOCAL_SME_STATE. */ ++ ++static void ++aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode, ++ aarch64_local_sme_state prev_mode) ++{ ++ /* Back-propagation should ensure that we're always starting from ++ a known mode. */ ++ gcc_assert (prev_mode != aarch64_local_sme_state::ANY); ++ ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) ++ { ++ /* Commit any uncommitted lazy save. This leaves ZA either active ++ and zero (lazy save case) or off (normal case). ++ ++ The sequence is: ++ ++ mrs <temp>, tpidr2_el0 ++ cbz <temp>, no_save ++ bl __arm_tpidr2_save ++ msr tpidr2_el0, xzr ++ zero { za } // Only if ZA is live ++ no_save: */ ++ bool is_active = (mode == aarch64_local_sme_state::ACTIVE_LIVE ++ || mode == aarch64_local_sme_state::ACTIVE_DEAD); ++ auto tmp_reg = gen_reg_rtx (DImode); ++ auto active_flag = gen_int_mode (is_active, DImode); ++ emit_insn (gen_aarch64_read_tpidr2 (tmp_reg)); ++ emit_insn (gen_aarch64_commit_lazy_save (tmp_reg, active_flag)); ++ } ++ ++ if (mode == aarch64_local_sme_state::ACTIVE_LIVE ++ || mode == aarch64_local_sme_state::ACTIVE_DEAD) ++ { ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL) ++ { ++ /* Make ZA active after being inactive. ++ ++ First handle the case in which the lazy save we set up was ++ committed by a callee. If the function's source-level ZA state ++ is live then we must conditionally restore it from the lazy ++ save buffer. Otherwise we can just force PSTATE.ZA to 1. */ ++ if (mode == aarch64_local_sme_state::ACTIVE_LIVE) ++ emit_insn (gen_aarch64_restore_za (aarch64_get_tpidr2_ptr ())); ++ else ++ emit_insn (gen_aarch64_smstart_za ()); ++ ++ /* Now handle the case in which the lazy save was not committed. ++ In that case, ZA still contains the current function's ZA state, ++ and we just need to cancel the lazy save. */ ++ emit_insn (gen_aarch64_clear_tpidr2 ()); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL) ++ { ++ /* Retrieve the current function's ZA state from the lazy save ++ buffer. */ ++ aarch64_restore_za (aarch64_get_tpidr2_ptr ()); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER ++ || prev_mode == aarch64_local_sme_state::OFF) ++ { ++ /* INACTIVE_CALLER means that we are enabling ZA for the first ++ time in this function. The code above means that ZA is either ++ active and zero (if we committed a lazy save) or off. Handle ++ the latter case by forcing ZA on. ++ ++ OFF means that PSTATE.ZA is guaranteed to be 0. We just need ++ to force it to 1. ++ ++ Both cases leave ZA zeroed. */ ++ emit_insn (gen_aarch64_smstart_za ()); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::ACTIVE_DEAD ++ || prev_mode == aarch64_local_sme_state::ACTIVE_LIVE) ++ /* A simple change in liveness, such as in a CFG structure where ++ ZA is only conditionally defined. No code is needed. */ ++ return; ++ ++ gcc_unreachable (); ++ } ++ ++ if (mode == aarch64_local_sme_state::INACTIVE_LOCAL) ++ { ++ if (prev_mode == aarch64_local_sme_state::ACTIVE_LIVE ++ || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD ++ || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) ++ { ++ /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual ++ case of setting up a lazy save buffer before a call. ++ A transition from INACTIVE_CALLER is similar, except that ++ the contents of ZA are known to be zero. ++ ++ A transition from ACTIVE_DEAD means that ZA is live at the ++ point of the transition, but is dead on at least one incoming ++ edge. (That is, ZA is only conditionally initialized.) ++ For efficiency, we want to set up a lazy save even for ++ dead contents, since forcing ZA off would make later code ++ restore ZA from the lazy save buffer. */ ++ emit_insn (gen_aarch64_write_tpidr2 (aarch64_get_tpidr2_ptr ())); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL ++ || prev_mode == aarch64_local_sme_state::OFF) ++ /* We're simply discarding the information about which inactive ++ state applies. */ ++ return; ++ ++ gcc_unreachable (); ++ } ++ ++ if (mode == aarch64_local_sme_state::INACTIVE_CALLER ++ || mode == aarch64_local_sme_state::OFF) ++ { ++ /* The transition to INACTIVE_CALLER is used before returning from ++ new("za") functions. Any state in ZA belongs to the current ++ function rather than a caller, but that state is no longer ++ needed. Clear any pending lazy save and turn ZA off. ++ ++ The transition to OFF is used before calling a private-ZA function. ++ We committed any incoming lazy save above, so at this point any ++ contents in ZA belong to the current function. */ ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL) ++ emit_insn (gen_aarch64_clear_tpidr2 ()); ++ ++ if (prev_mode != aarch64_local_sme_state::OFF ++ && prev_mode != aarch64_local_sme_state::SAVED_LOCAL) ++ emit_insn (gen_aarch64_smstop_za ()); ++ ++ return; ++ } ++ ++ if (mode == aarch64_local_sme_state::SAVED_LOCAL) ++ { ++ /* This is a transition to an exception handler. */ ++ gcc_assert (prev_mode == aarch64_local_sme_state::OFF ++ || prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL); ++ return; ++ } ++ ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EMIT. */ ++ ++static void ++aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live) ++{ ++ if (mode == prev_mode) ++ return; ++ ++ start_sequence (); ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode (mode), ++ aarch64_tristate_mode (prev_mode)); ++ break; ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state (mode), ++ aarch64_local_sme_state (prev_mode)); ++ break; ++ } ++ rtx_insn *seq = get_insns (); ++ end_sequence (); ++ ++ /* Get the set of clobbered registers that are currently live. */ ++ HARD_REG_SET clobbers = {}; ++ for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) ++ { ++ vec_rtx_properties properties; ++ properties.add_insn (insn, false); ++ for (rtx_obj_reference ref : properties.refs ()) ++ if (ref.is_write () && HARD_REGISTER_NUM_P (ref.regno)) ++ SET_HARD_REG_BIT (clobbers, ref.regno); ++ } ++ clobbers &= live; ++ ++ /* Emit instructions to save clobbered registers to pseudos. Queue ++ instructions to restore the registers afterwards. ++ ++ This should only needed in rare situations. */ ++ auto_vec<rtx, 33> after; ++ for (unsigned int regno = R0_REGNUM; regno < R30_REGNUM; ++regno) ++ if (TEST_HARD_REG_BIT (clobbers, regno)) ++ { ++ rtx hard_reg = gen_rtx_REG (DImode, regno); ++ rtx pseudo_reg = gen_reg_rtx (DImode); ++ emit_move_insn (pseudo_reg, hard_reg); ++ after.quick_push (gen_move_insn (hard_reg, pseudo_reg)); ++ } ++ if (TEST_HARD_REG_BIT (clobbers, CC_REGNUM)) ++ { ++ rtx pseudo_reg = gen_reg_rtx (DImode); ++ emit_insn (gen_aarch64_save_nzcv (pseudo_reg)); ++ after.quick_push (gen_aarch64_restore_nzcv (pseudo_reg)); ++ } ++ ++ /* Emit the transition instructions themselves. */ ++ emit_insn (seq); ++ ++ /* Restore the clobbered registers. */ ++ for (auto *insn : after) ++ emit_insn (insn); ++} ++ ++/* Return true if INSN references the SME state represented by hard register ++ REGNO. */ ++ ++static bool ++aarch64_insn_references_sme_state_p (rtx_insn *insn, unsigned int regno) ++{ ++ df_ref ref; ++ FOR_EACH_INSN_DEF (ref, insn) ++ if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) ++ && DF_REF_REGNO (ref) == regno) ++ return true; ++ FOR_EACH_INSN_USE (ref, insn) ++ if (DF_REF_REGNO (ref) == regno) ++ return true; ++ return false; ++} ++ ++/* Implement TARGET_MODE_NEEDED for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_mode_needed_local_sme_state (rtx_insn *insn, HARD_REG_SET live) ++{ ++ if (!CALL_P (insn) ++ && find_reg_note (insn, REG_EH_REGION, NULL_RTX)) ++ { ++ static bool reported; ++ if (!reported) ++ { ++ sorry ("catching non-call exceptions in functions with SME state"); ++ reported = true; ++ } ++ /* Aim for graceful error recovery by picking the value that is ++ least likely to generate an ICE. */ ++ return aarch64_local_sme_state::INACTIVE_LOCAL; ++ } ++ ++ /* A non-local goto is equivalent to a return. We disallow non-local ++ receivers in functions with SME state, so we know that the target ++ expects ZA to be dormant or off. */ ++ if (JUMP_P (insn) ++ && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX)) ++ return aarch64_local_sme_state::INACTIVE_CALLER; ++ ++ /* start_private_za_call and end_private_za_call bracket a sequence ++ that calls a private-ZA function. Force ZA to be turned off if the ++ function doesn't have any live ZA state, otherwise require ZA to be ++ inactive. */ ++ auto icode = recog_memoized (insn); ++ if (icode == CODE_FOR_aarch64_start_private_za_call ++ || icode == CODE_FOR_aarch64_end_private_za_call) ++ return (TEST_HARD_REG_BIT (live, ZA_REGNUM) ++ ? aarch64_local_sme_state::INACTIVE_LOCAL ++ : aarch64_local_sme_state::OFF); ++ ++ /* Force ZA to contain the current function's ZA state if INSN wants ++ to access it. */ ++ if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM)) ++ return (TEST_HARD_REG_BIT (live, ZA_REGNUM) ++ ? aarch64_local_sme_state::ACTIVE_LIVE ++ : aarch64_local_sme_state::ACTIVE_DEAD); ++ ++ return aarch64_local_sme_state::ANY; ++} ++ ++/* Implement TARGET_MODE_NEEDED for ZA_SAVE_BUFFER. */ ++ ++static aarch64_tristate_mode ++aarch64_mode_needed_za_save_buffer (rtx_insn *insn, HARD_REG_SET live) ++{ ++ /* We need to set up a lazy save buffer no later than the first ++ transition to INACTIVE_LOCAL (which involves setting up a lazy save). */ ++ if (aarch64_mode_needed_local_sme_state (insn, live) ++ == aarch64_local_sme_state::INACTIVE_LOCAL) ++ return aarch64_tristate_mode::YES; ++ ++ /* Also make sure that the lazy save buffer is set up before the first ++ insn that throws internally. The exception handler will sometimes ++ load from it. */ ++ if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) ++ return aarch64_tristate_mode::YES; ++ ++ return aarch64_tristate_mode::MAYBE; ++} ++ ++/* Implement TARGET_MODE_NEEDED. */ ++ ++static int ++aarch64_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET live) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_mode_needed_za_save_buffer (insn, live)); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_mode_needed_local_sme_state (insn, live)); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_AFTER for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_mode_after_local_sme_state (aarch64_local_sme_state mode, ++ HARD_REG_SET live) ++{ ++ /* Note places where ZA dies, so that we can try to avoid saving and ++ restoring state that isn't needed. */ ++ if (mode == aarch64_local_sme_state::ACTIVE_LIVE ++ && !TEST_HARD_REG_BIT (live, ZA_REGNUM)) ++ return aarch64_local_sme_state::ACTIVE_DEAD; ++ ++ /* Note where ZA is born, e.g. when moving past an __arm_out("za") ++ function. */ ++ if (mode == aarch64_local_sme_state::ACTIVE_DEAD ++ && TEST_HARD_REG_BIT (live, ZA_REGNUM)) ++ return aarch64_local_sme_state::ACTIVE_LIVE; ++ ++ return mode; ++} ++ ++/* Implement TARGET_MODE_AFTER. */ ++ ++static int ++aarch64_mode_after (int entity, int mode, rtx_insn *, HARD_REG_SET live) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return mode; ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_mode_after_local_sme_state ++ (aarch64_local_sme_state (mode), live)); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_CONFLUENCE for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_local_sme_confluence (aarch64_local_sme_state mode1, ++ aarch64_local_sme_state mode2) ++{ ++ /* Perform a symmetrical check for two values. */ ++ auto is_pair = [&](aarch64_local_sme_state val1, ++ aarch64_local_sme_state val2) ++ { ++ return ((mode1 == val1 && mode2 == val2) ++ || (mode1 == val2 && mode2 == val1)); ++ }; ++ ++ /* INACTIVE_CALLER means ZA is off or it has dormant contents belonging ++ to a caller. OFF is one of the options. */ ++ if (is_pair (aarch64_local_sme_state::INACTIVE_CALLER, ++ aarch64_local_sme_state::OFF)) ++ return aarch64_local_sme_state::INACTIVE_CALLER; ++ ++ /* Similarly for dormant contents belonging to the current function. */ ++ if (is_pair (aarch64_local_sme_state::INACTIVE_LOCAL, ++ aarch64_local_sme_state::OFF)) ++ return aarch64_local_sme_state::INACTIVE_LOCAL; ++ ++ /* Treat a conditionally-initialized value as a fully-initialized value. */ ++ if (is_pair (aarch64_local_sme_state::ACTIVE_LIVE, ++ aarch64_local_sme_state::ACTIVE_DEAD)) ++ return aarch64_local_sme_state::ACTIVE_LIVE; ++ ++ return aarch64_local_sme_state::ANY; ++} ++ ++/* Implement TARGET_MODE_CONFLUENCE. */ ++ ++static int ++aarch64_mode_confluence (int entity, int mode1, int mode2) ++{ ++ gcc_assert (mode1 != mode2); ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_tristate_mode::MAYBE); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_local_sme_confluence ++ (aarch64_local_sme_state (mode1), ++ aarch64_local_sme_state (mode2))); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_BACKPROP for an entity that either stays ++ NO throughput, or makes one transition from NO to YES. */ ++ ++static aarch64_tristate_mode ++aarch64_one_shot_backprop (aarch64_tristate_mode mode1, ++ aarch64_tristate_mode mode2) ++{ ++ /* Keep bringing the transition forward until it starts from NO. */ ++ if (mode1 == aarch64_tristate_mode::MAYBE ++ && mode2 == aarch64_tristate_mode::YES) ++ return mode2; ++ ++ return aarch64_tristate_mode::MAYBE; ++} ++ ++/* Implement TARGET_MODE_BACKPROP for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_local_sme_backprop (aarch64_local_sme_state mode1, ++ aarch64_local_sme_state mode2) ++{ ++ /* We always need to know what the current state is when transitioning ++ to a new state. Force any location with indeterminate starting state ++ to be active. */ ++ if (mode1 == aarch64_local_sme_state::ANY) ++ switch (mode2) ++ { ++ case aarch64_local_sme_state::INACTIVE_CALLER: ++ case aarch64_local_sme_state::OFF: ++ case aarch64_local_sme_state::ACTIVE_DEAD: ++ /* The current function's ZA state is not live. */ ++ return aarch64_local_sme_state::ACTIVE_DEAD; ++ ++ case aarch64_local_sme_state::INACTIVE_LOCAL: ++ case aarch64_local_sme_state::ACTIVE_LIVE: ++ /* The current function's ZA state is live. */ ++ return aarch64_local_sme_state::ACTIVE_LIVE; ++ ++ case aarch64_local_sme_state::SAVED_LOCAL: ++ /* This is a transition to an exception handler. Since we don't ++ support non-call exceptions for SME functions, the source of ++ the transition must be known. We'll assert later if that's ++ not the case. */ ++ return aarch64_local_sme_state::ANY; ++ ++ case aarch64_local_sme_state::ANY: ++ return aarch64_local_sme_state::ANY; ++ } ++ ++ return aarch64_local_sme_state::ANY; ++} ++ ++/* Implement TARGET_MODE_BACKPROP. */ ++ ++static int ++aarch64_mode_backprop (int entity, int mode1, int mode2) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_one_shot_backprop (aarch64_tristate_mode (mode1), ++ aarch64_tristate_mode (mode2))); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_local_sme_backprop ++ (aarch64_local_sme_state (mode1), ++ aarch64_local_sme_state (mode2))); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_ENTRY. */ ++ ++static int ++aarch64_mode_entry (int entity) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_tristate_mode::NO); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_cfun_shared_flags ("za") != 0 ++ ? aarch64_local_sme_state::ACTIVE_LIVE ++ : aarch64_local_sme_state::INACTIVE_CALLER); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EXIT. */ ++ ++static int ++aarch64_mode_exit (int entity) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_tristate_mode::MAYBE); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_cfun_shared_flags ("za") != 0 ++ ? aarch64_local_sme_state::ACTIVE_LIVE ++ : aarch64_local_sme_state::INACTIVE_CALLER); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EH_HANDLER. */ ++ ++static int ++aarch64_mode_eh_handler (int entity) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ /* Require a lazy save buffer to be allocated before the first ++ insn that can throw. */ ++ return int (aarch64_tristate_mode::YES); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_local_sme_state::SAVED_LOCAL); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_PRIORITY. */ ++ ++static int ++aarch64_mode_priority (int, int n) ++{ ++ return n; ++} ++ ++/* Implement TARGET_MD_ASM_ADJUST. */ ++ ++static rtx_insn * ++aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, ++ vec<machine_mode> &input_modes, ++ vec<const char *> &constraints, ++ vec<rtx> &uses, vec<rtx> &clobbers, ++ HARD_REG_SET &clobbered_regs, location_t loc) ++{ ++ rtx_insn *seq = arm_md_asm_adjust (outputs, inputs, input_modes, constraints, ++ uses, clobbers, clobbered_regs, loc); ++ ++ /* "za" in the clobber list of a function with ZA state is defined to ++ mean that the asm can read from and write to ZA. We can model the ++ read using a USE, but unfortunately, it's not possible to model the ++ write directly. Use a separate insn to model the effect. ++ ++ We must ensure that ZA is active on entry, which is enforced by using ++ SME_STATE_REGNUM. The asm must ensure that ZA is active on return. */ ++ if (TARGET_ZA) ++ for (unsigned int i = clobbers.length (); i-- > 0; ) ++ { ++ rtx x = clobbers[i]; ++ if (REG_P (x) && REGNO (x) == ZA_REGNUM) ++ { ++ auto id = cfun->machine->next_asm_update_za_id++; ++ ++ start_sequence (); ++ if (seq) ++ emit_insn (seq); ++ emit_insn (gen_aarch64_asm_update_za (gen_int_mode (id, SImode))); ++ seq = get_insns (); ++ end_sequence (); ++ ++ uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM)); ++ uses.safe_push (gen_rtx_REG (DImode, SME_STATE_REGNUM)); ++ ++ clobbers.ordered_remove (i); ++ CLEAR_HARD_REG_BIT (clobbered_regs, ZA_REGNUM); ++ } ++ } ++ return seq; ++} ++ + /* If CALL involves a change in PSTATE.SM, emit the instructions needed + to switch to the new mode and the instructions needed to restore the + original mode. Return true if something changed. */ +@@ -29108,6 +30415,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_START_CALL_ARGS + #define TARGET_START_CALL_ARGS aarch64_start_call_args + ++#undef TARGET_END_CALL_ARGS ++#define TARGET_END_CALL_ARGS aarch64_end_call_args ++ + #undef TARGET_GIMPLE_FOLD_BUILTIN + #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin + +@@ -29473,6 +30783,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_COMP_TYPE_ATTRIBUTES + #define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes + ++#undef TARGET_MERGE_DECL_ATTRIBUTES ++#define TARGET_MERGE_DECL_ATTRIBUTES aarch64_merge_decl_attributes ++ + #undef TARGET_GET_MULTILIB_ABI_NAME + #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name + +@@ -29493,8 +30806,35 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_STRICT_ARGUMENT_NAMING + #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + ++#undef TARGET_MODE_EMIT ++#define TARGET_MODE_EMIT aarch64_mode_emit ++ ++#undef TARGET_MODE_NEEDED ++#define TARGET_MODE_NEEDED aarch64_mode_needed ++ ++#undef TARGET_MODE_AFTER ++#define TARGET_MODE_AFTER aarch64_mode_after ++ ++#undef TARGET_MODE_CONFLUENCE ++#define TARGET_MODE_CONFLUENCE aarch64_mode_confluence ++ ++#undef TARGET_MODE_BACKPROP ++#define TARGET_MODE_BACKPROP aarch64_mode_backprop ++ ++#undef TARGET_MODE_ENTRY ++#define TARGET_MODE_ENTRY aarch64_mode_entry ++ ++#undef TARGET_MODE_EXIT ++#define TARGET_MODE_EXIT aarch64_mode_exit ++ ++#undef TARGET_MODE_EH_HANDLER ++#define TARGET_MODE_EH_HANDLER aarch64_mode_eh_handler ++ ++#undef TARGET_MODE_PRIORITY ++#define TARGET_MODE_PRIORITY aarch64_mode_priority ++ + #undef TARGET_MD_ASM_ADJUST +-#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust ++#define TARGET_MD_ASM_ADJUST aarch64_md_asm_adjust + + #undef TARGET_ASM_FILE_END + #define TARGET_ASM_FILE_END aarch64_asm_file_end +@@ -29505,6 +30845,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_HAVE_SHADOW_CALL_STACK + #define TARGET_HAVE_SHADOW_CALL_STACK true + ++#undef TARGET_EXTRA_LIVE_ON_ENTRY ++#define TARGET_EXTRA_LIVE_ON_ENTRY aarch64_extra_live_on_entry ++ + #undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL + #define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 6bfe55968..89d30b9bf 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* Macros to test ISA flags. */ + + #define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) ++#define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON) + #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) + #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) +@@ -259,6 +260,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define TARGET_STREAMING_COMPATIBLE \ + ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) + ++/* PSTATE.ZA is enabled in the current function body. */ ++#define TARGET_ZA (AARCH64_ISA_ZA_ON) ++ + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) + +@@ -445,7 +449,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ +- 1, 1 /* FFR and FFRT */ \ ++ 1, 1, /* FFR and FFRT */ \ ++ 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ + } + + /* X30 is marked as caller-saved which is in line with regular function call +@@ -455,7 +460,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + true but not until function epilogues have been generated. This ensures + that X30 is available for use in leaf functions if needed. */ + +-#define CALL_USED_REGISTERS \ ++#define CALL_REALLY_USED_REGISTERS \ + { \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R8 - R15 */ \ +@@ -468,7 +473,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ +- 1, 1 /* FFR and FFRT */ \ ++ 1, 1, /* FFR and FFRT */ \ ++ 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ + } + + #define REGISTER_NAMES \ +@@ -484,7 +490,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + "sfp", "ap", "cc", "vg", \ + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ +- "ffr", "ffrt" \ ++ "ffr", "ffrt", \ ++ "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ ++ "za_free", "za_saved", "za" \ + } + + /* Generate the register aliases for core register N */ +@@ -533,7 +541,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define FRAME_POINTER_REGNUM SFP_REGNUM + #define STACK_POINTER_REGNUM SP_REGNUM + #define ARG_POINTER_REGNUM AP_REGNUM +-#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1) ++#define FIRST_PSEUDO_REGISTER (LAST_FAKE_REGNUM + 1) + + /* The number of argument registers available for each class. */ + #define NUM_ARG_REGS 8 +@@ -657,6 +665,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + + #define FP_SIMD_SAVED_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM)) ++ ++#define FAKE_REGNUM_P(REGNO) \ ++ IN_RANGE (REGNO, FIRST_FAKE_REGNUM, LAST_FAKE_REGNUM) + + /* Register and constant classes. */ + +@@ -677,6 +688,7 @@ enum reg_class + PR_REGS, + FFR_REGS, + PR_AND_FFR_REGS, ++ FAKE_REGS, + ALL_REGS, + LIM_REG_CLASSES /* Last */ + }; +@@ -700,6 +712,7 @@ enum reg_class + "PR_REGS", \ + "FFR_REGS", \ + "PR_AND_FFR_REGS", \ ++ "FAKE_REGS", \ + "ALL_REGS" \ + } + +@@ -720,6 +733,7 @@ enum reg_class + { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ + { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ + { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ ++ { 0x00000000, 0x00000000, 0x1fc00000 }, /* FAKE_REGS */ \ + { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ + } + +@@ -920,6 +934,15 @@ typedef struct GTY (()) machine_function + bool reg_is_wrapped_separately[LAST_SAVED_REGNUM]; + /* One entry for each general purpose register. */ + rtx call_via[SP_REGNUM]; ++ ++ /* A pseudo register that points to the function's TPIDR2 block, or null ++ if the function doesn't have a TPIDR2 block. */ ++ rtx tpidr2_block; ++ ++ /* A pseudo register that points to the function's ZA save buffer, ++ or null if none. */ ++ rtx za_save_buffer; ++ + bool label_is_assembled; + + /* True if we've expanded at least one call to a function that changes +@@ -927,6 +950,10 @@ typedef struct GTY (()) machine_function + guarantees that no such mode switch exists. */ + bool call_switches_pstate_sm; + ++ /* Used to generated unique identifiers for each update to ZA by an ++ asm statement. */ ++ unsigned int next_asm_update_za_id; ++ + /* A set of all decls that have been passed to a vld1 intrinsic in the + current function. This is used to help guide the vector cost model. */ + hash_set<tree> *vector_load_decls; +@@ -996,6 +1023,10 @@ typedef struct + bool silent_p; /* True if we should act silently, rather than + raise an error for invalid calls. */ + ++ /* AARCH64_STATE_* flags that describe whether the function shares ZA ++ with its callers. */ ++ unsigned int shared_za_flags; ++ + /* A list of registers that need to be saved and restored around a + change to PSTATE.SM. An auto_vec would be more convenient, but those + can't be copied. */ +@@ -1344,4 +1375,61 @@ extern poly_uint16 aarch64_sve_vg; + STACK_BOUNDARY / BITS_PER_UNIT) \ + : (crtl->outgoing_args_size + STACK_POINTER_OFFSET)) + ++#ifndef USED_FOR_TARGET ++ ++/* Enumerates the mode-switching "entities" for AArch64. */ ++enum class aarch64_mode_entity : int ++{ ++ /* An aarch64_tristate_mode that says whether we have created a local ++ save buffer for the current function's ZA state. The only transition ++ is from NO to YES. */ ++ HAVE_ZA_SAVE_BUFFER, ++ ++ /* An aarch64_local_sme_state that reflects the state of all data ++ controlled by PSTATE.ZA. */ ++ LOCAL_SME_STATE ++}; ++ ++/* Describes the state of all data controlled by PSTATE.ZA */ ++enum class aarch64_local_sme_state : int ++{ ++ /* ZA is in the off or dormant state. If it is dormant, the contents ++ of ZA belong to a caller. */ ++ INACTIVE_CALLER, ++ ++ /* ZA is in the off state: PSTATE.ZA is 0 and TPIDR2_EL0 is null. */ ++ OFF, ++ ++ /* ZA is in the off or dormant state. If it is dormant, the contents ++ of ZA belong to the current function. */ ++ INACTIVE_LOCAL, ++ ++ /* ZA is in the off state and the current function's ZA contents are ++ stored in the lazy save buffer. This is the state on entry to ++ exception handlers. */ ++ SAVED_LOCAL, ++ ++ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null. ++ The contents of ZA are live. */ ++ ACTIVE_LIVE, ++ ++ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null. ++ The contents of ZA are dead. */ ++ ACTIVE_DEAD, ++ ++ /* ZA could be in multiple states. */ ++ ANY ++}; ++ ++enum class aarch64_tristate_mode : int { NO, YES, MAYBE }; ++ ++#define OPTIMIZE_MODE_SWITCHING(ENTITY) \ ++ aarch64_optimize_mode_switching (aarch64_mode_entity (ENTITY)) ++ ++#define NUM_MODES_FOR_MODE_SWITCHING \ ++ { int (aarch64_tristate_mode::MAYBE), \ ++ int (aarch64_local_sme_state::ANY) } ++ ++#endif ++ + #endif /* GCC_AARCH64_H */ +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index bb867de74..05a7c6675 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -111,6 +111,56 @@ + ;; "FFR token": a fake register used for representing the scheduling + ;; restrictions on FFR-related operations. + (FFRT_REGNUM 85) ++ ++ ;; ---------------------------------------------------------------- ++ ;; Fake registers ++ ;; ---------------------------------------------------------------- ++ ;; These registers represent abstract things, rather than real ++ ;; architected registers. ++ ++ ;; Sometimes we use placeholder instructions to mark where later ++ ;; ABI-related lowering is needed. These placeholders read and ++ ;; write this register. Instructions that depend on the lowering ++ ;; read the register. ++ (LOWERING_REGNUM 86) ++ ++ ;; Represents the contents of the current function's TPIDR2 block, ++ ;; in abstract form. ++ (TPIDR2_BLOCK_REGNUM 87) ++ ++ ;; Holds the value that the current function wants PSTATE.ZA to be. ++ ;; The actual value can sometimes vary, because it does not track ++ ;; changes to PSTATE.ZA that happen during a lazy save and restore. ++ ;; Those effects are instead tracked by ZA_SAVED_REGNUM. ++ (SME_STATE_REGNUM 88) ++ ++ ;; Instructions write to this register if they set TPIDR2_EL0 to a ++ ;; well-defined value. Instructions read from the register if they ++ ;; depend on the result of such writes. ++ ;; ++ ;; The register does not model the architected TPIDR2_ELO, just the ++ ;; current function's management of it. ++ (TPIDR2_SETUP_REGNUM 89) ++ ++ ;; Represents the property "has an incoming lazy save been committed?". ++ (ZA_FREE_REGNUM 90) ++ ++ ;; Represents the property "are the current function's ZA contents ++ ;; stored in the lazy save buffer, rather than in ZA itself?". ++ (ZA_SAVED_REGNUM 91) ++ ++ ;; Represents the contents of the current function's ZA state in ++ ;; abstract form. At various times in the function, these contents ++ ;; might be stored in ZA itself, or in the function's lazy save buffer. ++ ;; ++ ;; The contents persist even when the architected ZA is off. Private-ZA ++ ;; functions have no effect on its contents. ++ (ZA_REGNUM 92) ++ ;; ---------------------------------------------------------------- ++ (FIRST_FAKE_REGNUM LOWERING_REGNUM) ++ (LAST_FAKE_REGNUM ZA_REGNUM) ++ ;; ---------------------------------------------------------------- ++ + ;; The pair of scratch registers used for stack probing with -fstack-check. + ;; Leave R9 alone as a possible choice for the static chain. + ;; Note that the use of these registers is mutually exclusive with the use +@@ -303,7 +353,12 @@ + UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. + UNSPEC_LD1RO + UNSPEC_SALT_ADDR ++ UNSPEC_SAVE_NZCV ++ UNSPEC_RESTORE_NZCV + UNSPECV_PATCHABLE_AREA ++ ;; Wraps a constant integer that should be multiplied by the number ++ ;; of quadwords in an SME vector. ++ UNSPEC_SME_VQ + ]) + + (define_c_enum "unspecv" [ +@@ -379,7 +434,7 @@ + ;; Q registers and is equivalent to "simd". + + (define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd +- simd nosimd sve fp16]) ++ simd nosimd sve fp16 sme]) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -423,7 +478,10 @@ + (match_test "TARGET_FP_F16INST")) + + (and (eq_attr "arch" "sve") +- (match_test "TARGET_SVE"))) ++ (match_test "TARGET_SVE")) ++ ++ (and (eq_attr "arch" "sme") ++ (match_test "TARGET_SME"))) + (const_string "yes") + (const_string "no"))) + +@@ -928,7 +986,7 @@ + (set_attr "sls_length" "retbr")] + ) + +-(define_insn "*cb<optab><mode>1" ++(define_insn "aarch64_cb<optab><mode>1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) +@@ -1291,6 +1349,7 @@ + /* The "mov_imm" type for CNT is just a placeholder. */ + [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); + [r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]); ++ [r , UsR; mov_imm , sme, 4] << aarch64_output_rdsvl (operands[1]); + [r , m ; load_4 , * , 4] ldr\t%w0, %1 + [w , m ; load_4 , fp , 4] ldr\t%s0, %1 + [m , r Z; store_4 , * , 4] str\t%w1, %0 +@@ -1326,6 +1385,7 @@ + /* The "mov_imm" type for CNT is just a placeholder. */ + [r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); + [r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]); ++ [r, UsR; mov_imm , sme, 4] << aarch64_output_rdsvl (operands[1]); + [r, m ; load_8 , * , 4] ldr\t%x0, %1 + [w, m ; load_8 , fp , 4] ldr\t%d0, %1 + [m, r Z; store_8 , * , 4] str\t%x1, %0 +@@ -7733,6 +7793,21 @@ + [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))] + ) + ++(define_insn "aarch64_save_nzcv" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(reg:CC CC_REGNUM)] UNSPEC_SAVE_NZCV))] ++ "" ++ "mrs\t%0, nzcv" ++) ++ ++(define_insn "aarch64_restore_nzcv" ++ [(set (reg:CC CC_REGNUM) ++ (unspec:CC [(match_operand:DI 0 "register_operand" "r")] ++ UNSPEC_RESTORE_NZCV))] ++ "" ++ "msr\tnzcv, %0" ++) ++ + ;; AdvSIMD Stuff + (include "aarch64-simd.md") + +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index 212a73416..88fb9a07c 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -220,6 +220,12 @@ + (and (match_code "const_poly_int") + (match_test "aarch64_sve_rdvl_immediate_p (op)"))) + ++(define_constraint "UsR" ++ "@internal ++ A constraint that matches a value produced by RDSVL." ++ (and (match_code "const") ++ (match_test "aarch64_rdsvl_immediate_p (op)"))) ++ + (define_constraint "Usv" + "@internal + A constraint that matches a VG-based constant that can be loaded by +diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C +new file mode 100644 +index 000000000..a245546d8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C +@@ -0,0 +1,189 @@ ++// { dg-options "-O -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void callee_inout() __arm_inout("za"); ++void callee_in() noexcept __arm_in("za"); ++void callee_out() noexcept __arm_out("za"); ++void callee_normal(); ++ ++/* ++** _Z5test1v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** bl __cxa_begin_catch ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test1 () ++{ ++ try ++ { ++ callee_inout(); ++ return 1; ++ } ++ catch (...) ++ { ++ return 2; ++ } ++} ++ ++/* ++** _Z5test2v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** bl __cxa_begin_catch ++** smstart za ++** bl _Z10callee_outv ++** bl _Z9callee_inv ++** smstop za ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test2 () ++{ ++ try ++ { ++ callee_inout(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_out(); ++ callee_in(); ++ return 2; ++ } ++} ++ ++/* ++** _Z5test3v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstop za ++** ... ++** bl _Z13callee_normalv ++** ... ++** bl __cxa_begin_catch ++** smstart za ++** bl _Z10callee_outv ++** bl _Z9callee_inv ++** smstop za ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test3 () ++{ ++ try ++ { ++ callee_normal(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_out(); ++ callee_in(); ++ return 2; ++ } ++} ++ ++__arm_new("za") int ++test4 () ++{ ++ try ++ { ++ // No lazy save set up because this is a shared-ZA function. ++ callee_inout(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_inout(); ++ return 2; ++ } ++} ++// { dg-final { scan-assembler {_Z5test4v:(?:(?!msr\ttpidr2_el0, x[0-9]+).)*\tret} } } ++ ++/* ++** _Z5test5v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** ... ++** bl _Z12callee_inoutv ++** add (x[0-9]+), [^\n]+ ++** msr tpidr2_el0, \1 ++** bl _Z13callee_normalv ++** msr tpidr2_el0, xzr ++** smstop za ++** ... ++** bl __cxa_begin_catch ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** msr tpidr2_el0, xzr ++** bl _Z12callee_inoutv ++** smstop za ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test5 () ++{ ++ try ++ { ++ callee_inout(); ++ callee_normal(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_inout(); ++ return 2; ++ } ++} ++ ++/* ++** _Z5test6v: ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** bl _Z13callee_normalv ++** msr tpidr2_el0, xzr ++** ... ++** bl __cxa_begin_catch ++** bl __cxa_end_catch ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** msr tpidr2_el0, xzr ++** ... ++*/ ++int ++test6 () __arm_inout("za") ++{ ++ try ++ { ++ callee_normal(); ++ callee_out(); ++ return 1; ++ } ++ catch (...) ++ { ++ return 2; ++ } ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +index 032485adf..8b0755014 100644 +--- a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C ++++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +@@ -2,3 +2,8 @@ + + void f1 () __arm_streaming; + void f2 () __arm_streaming_compatible; ++void f3 () __arm_in("za"); ++void f4 () __arm_out("za"); ++void f5 () __arm_inout("za"); ++void f6 () __arm_preserves("za"); ++__arm_new("za") void f7 () {} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +index 8f1b83676..fcabe3edc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +@@ -2,3 +2,8 @@ + + void f1 () __arm_streaming; + void f2 () __arm_streaming_compatible; ++void f3 () __arm_in("za"); ++void f4 () __arm_out("za"); ++void f5 () __arm_inout("za"); ++void f6 () __arm_preserves("za"); ++__arm_new("za") void f7 () {} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c +new file mode 100644 +index 000000000..856880e21 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c +@@ -0,0 +1,154 @@ ++// { dg-options "" } ++ ++void shared_a () [[arm::inout("za")]]; ++void shared_a (); // { dg-error "conflicting types" } ++ ++void shared_b (); ++void shared_b () [[arm::inout("za")]]; // { dg-error "conflicting types" } ++ ++void shared_c () [[arm::inout("za")]]; ++void shared_c () {} // Inherits attribute from declaration (confusingly). ++ ++void shared_d (); ++void shared_d () [[arm::inout("za")]] {} // { dg-error "conflicting types" } ++ ++void shared_e () [[arm::inout("za")]] {} ++void shared_e (); // { dg-error "conflicting types" } ++ ++void shared_f () {} ++void shared_f () [[arm::inout("za")]]; // { dg-error "conflicting types" } ++ ++extern void (*shared_g) (); ++extern void (*shared_g) () [[arm::inout("za")]]; // { dg-error "conflicting types" } ++ ++extern void (*shared_h) () [[arm::inout("za")]]; ++extern void (*shared_h) (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void preserved_a () [[arm::preserves("za")]]; ++void preserved_a (); // { dg-error "conflicting types" } ++ ++void preserved_b (); ++void preserved_b () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++void preserved_c () [[arm::preserves("za")]]; ++void preserved_c () {} // Inherits attribute from declaration (confusingly). ++ ++void preserved_d (); ++void preserved_d () [[arm::preserves("za")]] {} // { dg-error "conflicting types" } ++ ++void preserved_e () [[arm::preserves("za")]] {} ++void preserved_e (); // { dg-error "conflicting types" } ++ ++void preserved_f () {} ++void preserved_f () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++extern void (*preserved_g) (); ++extern void (*preserved_g) () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++extern void (*preserved_h) () [[arm::preserves("za")]]; ++extern void (*preserved_h) (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void replicated_1 () [[arm::in("za", "za"), arm::in("za")]]; ++void replicated_2 () [[arm::out("za", "za"), arm::out("za")]]; ++void replicated_3 () [[arm::inout("za", "za"), arm::inout("za")]]; ++void replicated_4 () [[arm::preserves("za", "za"), arm::preserves("za")]]; ++ ++//---------------------------------------------------------------------------- ++ ++void invalid_1 () [[arm::in]]; // { dg-error "wrong number of arguments" } ++void invalid_2 () [[arm::in()]]; // { dg-error "parentheses must be omitted" } ++ // { dg-error "wrong number of arguments" "" { target *-*-* } .-1 } ++void invalid_3 () [[arm::in("")]]; // { dg-error "unrecognized state string ''" } ++void invalid_4 () [[arm::in("foo")]]; // { dg-error "unrecognized state string 'foo'" } ++void invalid_5 () [[arm::in(42)]]; // { dg-error "the arguments to 'in' must be constant strings" } ++void invalid_6 () [[arm::in(*(int *)0 ? "za" : "za")]]; // { dg-error "the arguments to 'in' must be constant strings" } ++ ++//---------------------------------------------------------------------------- ++ ++void mixed_a () [[arm::preserves("za")]]; ++void mixed_a () [[arm::inout("za")]]; // { dg-error "conflicting types" } ++ ++void mixed_b () [[arm::inout("za")]]; ++void mixed_b () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++void mixed_c () [[arm::preserves("za")]]; ++void mixed_c () [[arm::in("za")]] {} // { dg-error "conflicting types" } ++ ++void mixed_d () [[arm::inout("za")]]; ++void mixed_d () [[arm::in("za")]] {} // { dg-error "conflicting types" } ++ ++void mixed_e () [[arm::out("za")]] {} ++void mixed_e () [[arm::in("za")]]; // { dg-error "conflicting types" } ++ ++void mixed_f () [[arm::inout("za")]] {} ++void mixed_f () [[arm::out("za")]]; // { dg-error "conflicting types" } ++ ++extern void (*mixed_g) () [[arm::in("za")]]; ++extern void (*mixed_g) () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++extern void (*mixed_h) () [[arm::preserves("za")]]; ++extern void (*mixed_h) () [[arm::out("za")]]; // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void contradiction_1 () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++void contradiction_2 () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++ ++int [[arm::inout("za")]] int_attr; // { dg-warning "only applies to function types" } ++void *[[arm::preserves("za")]] ptr_attr; // { dg-warning "only applies to function types" } ++ ++typedef void preserved_callback () [[arm::preserves("za")]]; ++typedef void shared_callback () [[arm::inout("za")]]; ++ ++void (*preserved_callback_ptr) () [[arm::preserves("za")]]; ++void (*shared_callback_ptr) () [[arm::inout("za")]]; ++ ++typedef void contradiction_callback_1 () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++typedef void contradiction_callback_2 () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++ ++void (*contradiction_callback_ptr_1) () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++void (*contradiction_callback_ptr_2) () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++ ++struct s { ++ void (*contradiction_callback_ptr_1) () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++ void (*contradiction_callback_ptr_2) () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } ++}; ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_ok_1 () __arm_inout("za"); ++void keyword_ok_1 () __arm_inout("za"); ++ ++void keyword_ok_2 () __arm_in("za"); ++void keyword_ok_2 () [[arm::in("za")]]; ++ ++void keyword_ok_3 () [[arm::out("za")]]; ++void keyword_ok_3 () __arm_out("za"); ++ ++void keyword_ok_4 () __arm_inout("za") [[arm::inout("za")]]; ++ ++void keyword_ok_5 () __arm_preserves("za"); ++void keyword_ok_5 () [[arm::preserves("za")]]; ++ ++__arm_new("za") void keyword_ok_6 () {} ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_conflict_1 () __arm_inout("za"); ++void keyword_conflict_1 (); // { dg-error "conflicting types" } ++ ++void keyword_conflict_2 (); ++void keyword_conflict_2 () __arm_inout("za"); // { dg-error "conflicting types" } ++ ++void keyword_conflict_3 () __arm_inout("za"); ++void keyword_conflict_3 () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++void keyword_conflict_4 () [[arm::preserves("za")]]; ++void keyword_conflict_4 () __arm_inout("za"); // { dg-error "conflicting types" } ++ ++__arm_new("za") void keyword_conflict_5 () __arm_inout("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++__arm_new("za") void keyword_conflict_6 () __arm_preserves("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c +new file mode 100644 +index 000000000..572ff309f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c +@@ -0,0 +1,73 @@ ++// { dg-options "" } ++ ++[[arm::new("za")]] void new_za_a (); ++void new_za_a (); ++ ++void new_za_b (); ++[[arm::new("za")]] void new_za_b (); ++ ++[[arm::new("za")]] void new_za_c (); ++void new_za_c () {} ++ ++void new_za_d (); ++[[arm::new("za")]] void new_za_d () {} ++ ++[[arm::new("za")]] void new_za_e () {} ++void new_za_e (); ++ ++void new_za_f () {} ++[[arm::new("za")]] void new_za_f (); // { dg-error "cannot apply attribute 'new' to 'new_za_f' after the function has been defined" } ++ ++//---------------------------------------------------------------------------- ++ ++[[arm::new("za")]] void shared_a (); ++void shared_a () [[arm::inout("za")]]; // { dg-error "conflicting types" } ++ ++void shared_b () [[arm::inout("za")]]; ++[[arm::new("za")]] void shared_b (); // { dg-error "conflicting types" } ++ ++[[arm::new("za")]] void shared_c (); ++void shared_c () [[arm::in("za")]] {} // { dg-error "conflicting types" } ++ ++void shared_d () [[arm::in("za")]]; ++[[arm::new("za")]] void shared_d () {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++ ++[[arm::new("za")]] void shared_e () {} ++void shared_e () [[arm::out("za")]]; // { dg-error "conflicting types" } ++ ++void shared_f () [[arm::out("za")]] {} ++[[arm::new("za")]] void shared_f (); // { dg-error "conflicting types" } ++ ++[[arm::new("za")]] void shared_g () {} ++void shared_g () [[arm::preserves("za")]]; // { dg-error "conflicting types" } ++ ++void shared_h () [[arm::preserves("za")]] {} ++[[arm::new("za")]] void shared_h (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++[[arm::new("za")]] void contradiction_1 () [[arm::inout("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++void contradiction_2 [[arm::new("za")]] () [[arm::inout("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++[[arm::new("za")]] void contradiction_3 () [[arm::preserves("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++void contradiction_4 [[arm::new("za")]] () [[arm::preserves("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++ ++int [[arm::new("za")]] int_attr; // { dg-warning "does not apply to types" } ++[[arm::new("za")]] int int_var_attr; // { dg-error "applies only to function definitions" } ++typedef void new_za_callback () [[arm::new("za")]]; // { dg-warning "does not apply to types" } ++[[arm::new("za")]] void (*new_za_var_callback) (); // { dg-error "applies only to function definitions" } ++ ++//---------------------------------------------------------------------------- ++ ++[[arm::new("za")]] void complementary_1 () [[arm::streaming]] {} ++void complementary_2 [[arm::new("za")]] () [[arm::streaming]] {} ++[[arm::new("za")]] void complementary_3 () [[arm::streaming_compatible]] {} ++void complementary_4 [[arm::new("za")]] () [[arm::streaming_compatible]] {} ++ ++//---------------------------------------------------------------------------- ++ ++#pragma GCC target "+nosme" ++ ++[[arm::new("za")]] void bereft_1 (); ++[[arm::new("za")]] void bereft_2 () {} // { dg-error "functions with SME state require the ISA extension 'sme'" } ++void bereft_3 () [[arm::inout("za")]]; ++void bereft_4 () [[arm::inout("za")]] {} // { dg-error "functions with SME state require the ISA extension 'sme'" } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c +new file mode 100644 +index 000000000..203f6ae8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c +@@ -0,0 +1,31 @@ ++// { dg-options "" } ++ ++void normal_callee (); ++void in_callee () [[arm::in("za")]]; ++void out_callee () [[arm::out("za")]]; ++void inout_callee () [[arm::inout("za")]]; ++void preserves_callee () [[arm::preserves("za")]]; ++ ++struct callbacks { ++ void (*normal_ptr) (); ++ void (*in_ptr) () [[arm::in("za")]]; ++ void (*out_ptr) () [[arm::out("za")]]; ++ void (*inout_ptr) () [[arm::inout("za")]]; ++ void (*preserves_ptr) () [[arm::preserves("za")]]; ++}; ++ ++void ++normal_caller (struct callbacks *c) ++{ ++ normal_callee (); ++ in_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ out_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ inout_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ preserves_callee (); // { dg-error {call to a function that shares SME state from a function that has no SME state} } ++ ++ c->normal_ptr (); ++ c->in_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ c->out_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ c->inout_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ c->preserves_ptr (); // { dg-error {call to a function that shares SME state from a function that has no SME state} } ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +new file mode 100644 +index 000000000..cec0abf0e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +@@ -0,0 +1,585 @@ ++// { dg-options "-O -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void private_za(); ++void out_za() __arm_out("za"); ++void in_za() __arm_in("za"); ++void inout_za() __arm_inout("za"); ++void preserves_za() __arm_preserves("za"); ++ ++/* ++** test1: ++** ret ++*/ ++__arm_new("za") void test1() ++{ ++} ++ ++/* ++** test2: ++** ldr w0, \[x0\] ++** ret ++*/ ++__arm_new("za") int test2(int *ptr) ++{ ++ return *ptr; ++} ++ ++/* ++** test3: ++** stp [^\n]+ ++** mov x29, sp ++** bl private_za ++** ( ++** mov w0, 0 ++** ldp [^\n]+ ++** | ++** ldp [^\n]+ ++** mov w0, 0 ++** ) ++** ret ++*/ ++__arm_new("za") int test3() ++{ ++ private_za(); ++ return 0; ++} ++ ++/* ++** test4: ++** ... ++** mrs x0, tpidr2_el0 ++** cbz x0, [^\n]+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** zero { za } ++** smstart za ++** bl in_za ++** smstop za ++** ldp [^\n]+ ++** ret ++*/ ++__arm_new("za") void test4() ++{ ++ in_za(); // Uses zeroed contents. ++} ++ ++/* ++** test5: ++** ... ++** mrs x0, tpidr2_el0 ++** cbz x0, [^\n]+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** ldp [^\n]+ ++** ret ++*/ ++__arm_new("za") void test5() ++{ ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++// Despite the long test, there shouldn't be too much scope for variation ++// here. The point is both to test correctness and code quality. ++/* ++** test6: ++** stp [^\n]+ ++** mov x29, sp ++** mrs x0, tpidr2_el0 ++** cbz x0, [^\n]+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** smstart za ++** bl out_za ++** rdsvl (x[0-9]+), #1 ++** mul (x[0-9]+), \1, \1 ++** sub sp, sp, \2 ++** mov (x[0-9]+), sp ++** stp \3, \1, \[x29, #?16\] ++** add (x[0-9]+), x29, #?16 ++** msr tpidr2_el0, \4 ++** bl private_za ++** ( ++** add (x[0-9]+), x29, #?16 ++** mrs (x[0-9]+), tpidr2_el0 ++** cbnz \6, [^\n]+ ++** smstart za ++** mov x0, \5 ++** | ++** add x0, x29, #?16 ++** mrs (x[0-9]+), tpidr2_el0 ++** cbnz \6, [^\n]+ ++** smstart za ++** ) ++** bl __arm_tpidr2_restore ++** msr tpidr2_el0, xzr ++** bl in_za ++** smstop za ++** mov sp, x29 ++** ldp [^\n]+ ++** ret ++*/ ++__arm_new("za") void test6() ++{ ++ out_za(); ++ private_za(); ++ in_za(); ++} ++ ++// Rely on previous tests for the part leading up to the smstart. ++/* ++** test7: ++** ... ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** ldp [^\n]+ ++** ret ++*/ ++__arm_new("za") void test7() ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test8: ++** ... ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** ldp [^\n]+ ++** ret ++*/ ++__arm_new("za") void test8() ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++/* ++** test9: ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** bl private_za ++** bl private_za ++** bl private_za ++** bl private_za ++** add x[0-9]+, x29, #?16 ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++*/ ++__arm_new("za") void test9() ++{ ++ out_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test10: ++** ldr (w[0-9]+), \[x0\] ++** cbz \1, [^\n]+ ++** ldr [^\n]+ ++** add [^\n]+ ++** str [^\n]+ ++** ret ++** ... ++*/ ++__arm_new("za") void test10(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ *ptr = *ptr + 1; ++ else ++ inout_za(); ++} ++ ++/* ++** test11: ++** ... ++** ldr w[0-9]+, [^\n]+ ++** add (w[0-9]+), [^\n]+ ++** str \1, [^\n]+ ++** ... ++** ret ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** bl inout_za ++** ldr (w[0-9]+), [^\n]+ ++** cbnz \2, [^\n]+ ++** smstop za ++** ... ++*/ ++__arm_new("za") void test11(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr == 0, 0)) ++ do ++ inout_za(); ++ while (*ptr); ++ else ++ *ptr += 1; ++} ++ ++__arm_new("za") void test12(volatile int *ptr) ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test13: ++** stp [^\n]+ ++** ... ++** stp [^\n]+ ++** ... ++** bl __arm_tpidr2_save ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** cbnz [^\n]+ ++** smstart za ++** msr tpidr2_el0, xzr ++** bl out_za ++** bl in_za ++** ... ++** smstop za ++** ... ++*/ ++__arm_new("za") void test13(volatile int *ptr) ++{ ++ do ++ { ++ private_za(); ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test14: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** bl inout_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** bl out_za ++** bl in_za ++** smstop za ++** ... ++*/ ++__arm_new("za") void test14(volatile int *ptr) ++{ ++ do ++ inout_za(); ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test15: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** bl out_za ++** bl in_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** smstop za ++** bl private_za ++** ldr [^\n]+ ++** ldp [^\n]+ ++** ret ++*/ ++__arm_new("za") void test15(volatile int *ptr) ++{ ++ do ++ { ++ out_za(); ++ in_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test16: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** b [^\n]+ ++-- loop: ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** msr tpidr2_el0, xzr ++-- loop_entry: ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** bl private_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** msr tpidr2_el0, xzr ++** smstop za ++** bl private_za ++** ... ++*/ ++__arm_new("za") void test16(volatile int *ptr) ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test17: ++** ... ++** bl private_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** smstop za ++** ... ++*/ ++__arm_new("za") void test17(volatile int *ptr) ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++} ++ ++/* ++** test18: ++** ldr w[0-9]+, [^\n]+ ++** cbnz w[0-9]+, [^\n]+ ++** ret ++** ... ++** smstop za ++** bl private_za ++** ... ++*/ ++__arm_new("za") void test18(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr, 0)) ++ { ++ out_za(); ++ in_za(); ++ private_za(); ++ } ++} ++ ++/* ++** test19: ++** ... ++** ldr w[0-9]+, [^\n]+ ++** cbz w[0-9]+, [^\n]+ ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstop za ++** bl private_za ++** ... ++*/ ++__arm_new("za") void test19(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ private_za(); ++ else ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++} ++ ++/* ++** test20: ++** ... ++** bl a20 ++** (?:(?!x0).)* ++** bl b20 ++** ... ++** mov ([wx][0-9]+), [wx]0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov [wx]0, \1 ++** ... ++** bl c20 ++** ... ++*/ ++__arm_new("za") void test20() ++{ ++ extern int a20() __arm_inout("za"); ++ extern int b20(int); ++ extern void c20(int) __arm_inout("za"); ++ c20(b20(a20())); ++} ++ ++/* ++** test21: ++** ... ++** bl a21 ++** (?:(?!x0).)* ++** bl b21 ++** ... ++** mov (x[0-9]+), x0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov x0, \1 ++** ... ++** bl c21 ++** ... ++*/ ++__arm_new("za") void test21() ++{ ++ extern __UINT64_TYPE__ a21() __arm_inout("za"); ++ extern __UINT64_TYPE__ b21(__UINT64_TYPE__); ++ extern void c21(__UINT64_TYPE__) __arm_inout("za"); ++ c21(b21(a21())); ++} ++ ++/* ++** test22: ++** (?:(?!rdsvl).)* ++** rdsvl x[0-9]+, #1 ++** (?:(?!rdsvl).)* ++*/ ++__arm_new("za") void test22(volatile int *ptr) ++{ ++ inout_za(); ++ if (*ptr) ++ *ptr += 1; ++ else ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test23: ++** (?:(?!__arm_tpidr2_save).)* ++** bl __arm_tpidr2_save ++** (?:(?!__arm_tpidr2_save).)* ++*/ ++__arm_new("za") void test23(volatile int *ptr) ++{ ++ if (*ptr) ++ *ptr += 1; ++ else ++ inout_za(); ++ inout_za(); ++} ++ ++/* ++** test24: ++** ... ++** bl in_za ++** ... ++** incb x1 ++** ... ++** bl out_za ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** smstop za ++** ... ++** bl private_za ++** ... ++** ret ++*/ ++__arm_new("za") void test24() ++{ ++ in_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ out_za(); ++ inout_za(); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ in_za(); ++ private_za(); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c +new file mode 100644 +index 000000000..d54840d3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c +@@ -0,0 +1,595 @@ ++// { dg-options "-O2 -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void private_za(); ++void out_za() __arm_out("za"); ++void in_za() __arm_in("za"); ++void inout_za() __arm_inout("za"); ++void preserves_za() __arm_preserves("za"); ++ ++/* ++** test1: ++** ret ++*/ ++void test1() __arm_inout("za") ++{ ++} ++ ++/* ++** test2: ++** ldr w0, \[x0\] ++** ret ++*/ ++int test2(int *ptr) __arm_inout("za") ++{ ++ return *ptr; ++} ++ ++/* ++** test3: ++** ... ++** sub sp, sp, x[0-9]+ ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++int test3() __arm_inout("za") ++{ ++ private_za(); ++ return 0; ++} ++ ++/* ++** test4: ++** stp [^\n]+ ++** [^\n]+ ++** bl in_za ++** ldp [^\n]+ ++** ret ++*/ ++void test4() __arm_inout("za") ++{ ++ in_za(); ++} ++ ++/* ++** test5: ++** ... ++** smstop za ++** ... ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** ... ++** sub sp, sp, x[0-9]+ ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test5() __arm_inout("za") ++{ ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++/* ++** test6: ++** ... ++** bl out_za ++** ... ++** sub sp, sp, x[0-9]+ ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** bl in_za ++** ... ++*/ ++void test6() __arm_inout("za") ++{ ++ out_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test7: ++** stp [^\n]+ ++** [^\n]+ ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** ldp [^\n]+ ++** ret ++*/ ++void test7() __arm_inout("za") ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test8: ++** stp [^\n]+ ++** [^\n]+ ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** ... ++** sub sp, sp, x[0-9]+ ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** ret ++*/ ++void test8() __arm_inout("za") ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++/* ++** test9: ++** stp [^\n]+ ++** [^\n]+ ++** bl out_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** bl private_za ++** bl private_za ++** bl private_za ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test9() __arm_inout("za") ++{ ++ out_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test10: ++** ldr (w[0-9]+), \[x0\] ++** cbz \1, [^\n]+ ++** ldr [^\n]+ ++** add [^\n]+ ++** str [^\n]+ ++** ret ++** ... ++*/ ++void test10(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ *ptr = *ptr + 1; ++ else ++ inout_za(); ++} ++ ++/* ++** test11: ++** (?!.*(\t__arm|\tza|tpidr2_el0)).* ++*/ ++void test11(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr == 0, 0)) ++ do ++ inout_za(); ++ while (*ptr); ++ else ++ *ptr += 1; ++} ++ ++void test12(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test13: ++** stp [^\n]+ ++** ... ++** stp [^\n]+ ++** ... ++-- loop: ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** smstart za ++** msr tpidr2_el0, xzr ++** bl out_za ++** bl in_za ++** [^\n]+ ++** [^\n]+ ++** ldp [^\n]+ ++** ret ++*/ ++void test13(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ private_za(); ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test14: ++** ... ++** bl inout_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** bl out_za ++** bl in_za ++** ... ++*/ ++void test14(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ inout_za(); ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test15: ++** ... ++** bl out_za ++** bl in_za ++** ldr [^\n]+ ++** cbnz [^\n]+ ++** ... ++** stp [^\n]+ ++** ... ++** msr tpidr2_el0, [^\n]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test15(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ out_za(); ++ in_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test16: ++** stp [^\n]+ ++** ... ++** stp [^\n]+ ++** ... ++** b [^\n]+ ++-- loop: ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** msr tpidr2_el0, xzr ++-- loop_entry: ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test16(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test17: ++** ... ++-- loop: ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** cbnz [^\n]+ ++** [^\n]+ ++** [^\n]+ ++** ldp [^\n]+ ++** ret ++*/ ++void test17(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ while (*ptr) ++ ptr += 1; ++ } ++ while (*ptr); ++} ++ ++/* ++** test18: ++** ldr w[0-9]+, [^\n]+ ++** cbnz w[0-9]+, [^\n]+ ++** ret ++** ... ++** bl out_za ++** bl in_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test18(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr, 0)) ++ { ++ out_za(); ++ in_za(); ++ private_za(); ++ } ++} ++ ++void test19(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ private_za(); ++ else ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++} ++ ++/* ++** test20: ++** ... ++** bl a20 ++** (?:(?!x0).)* ++** bl b20 ++** ... ++** mov ([wx][0-9]+), [wx]0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov [wx]0, \1 ++** ... ++** bl c20 ++** ... ++*/ ++void test20() __arm_inout("za") ++{ ++ extern int a20() __arm_inout("za"); ++ extern int b20(int); ++ extern void c20(int) __arm_inout("za"); ++ c20(b20(a20())); ++} ++ ++/* ++** test21: ++** ... ++** bl a21 ++** (?:(?!x0).)* ++** bl b21 ++** ... ++** mov (x[0-9]+), x0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov x0, \1 ++** ... ++** bl c21 ++** ... ++*/ ++void test21() __arm_inout("za") ++{ ++ extern __UINT64_TYPE__ a21() __arm_inout("za"); ++ extern __UINT64_TYPE__ b21(__UINT64_TYPE__); ++ extern void c21(__UINT64_TYPE__) __arm_inout("za"); ++ c21(b21(a21())); ++} ++ ++/* ++** test22: ++** (?:(?!rdsvl).)* ++** rdsvl x[0-9]+, #1 ++** (?:(?!rdsvl).)* ++*/ ++void test22(volatile int *ptr) __arm_inout("za") ++{ ++ inout_za(); ++ if (*ptr) ++ *ptr += 1; ++ else ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++void test23(volatile int *ptr) __arm_inout("za") ++{ ++ if (*ptr) ++ *ptr += 1; ++ else ++ inout_za(); ++ inout_za(); ++} ++ ++/* ++** test24: ++** ... ++** bl in_za ++** ... ++** incb x1 ++** ... ++** bl out_za ++** bl inout_za ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** msr tpidr2_el0, x[0-9]+ ++** ... ++** bl private_za ++** ... ++** mrs x[0-9]+, tpidr2_el0 ++** ... ++** ret ++*/ ++void test24() __arm_inout("za") ++{ ++ in_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ out_za(); ++ inout_za(); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ in_za(); ++ private_za(); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c +new file mode 100644 +index 000000000..d5b226ae1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c +@@ -0,0 +1,23 @@ ++// { dg-options "-O -fno-optimize-sibling-calls -fomit-frame-pointer" } ++ ++void private_za(); ++void out_za() __arm_out("za"); ++void in_za() __arm_in("za"); ++ ++__arm_new("za") void test20(volatile int *ptr) ++{ ++ if (*ptr) ++ out_za(); ++ else ++ *ptr += 1; ++ *ptr += 1; ++ if (*ptr) ++ in_za(); ++ else ++ *ptr += 1; ++} ++ ++// { dg-final { scan-assembler {\tbl\t__arm_tpidr2_save\n} } } ++// { dg-final { scan-assembler {\tsmstart\tza\n} } } ++// { dg-final { scan-assembler {\tsmstop\tza\n} } } ++// { dg-final { scan-assembler-not {\tsub\tsp, sp, x[0-9]+\n} } } +-- +2.33.0 + |