summaryrefslogtreecommitdiff
path: root/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
diff options
context:
space:
mode:
Diffstat (limited to '0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch')
-rw-r--r--0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch381
1 files changed, 381 insertions, 0 deletions
diff --git a/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch b/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
new file mode 100644
index 0000000..257cdb3
--- /dev/null
+++ b/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
@@ -0,0 +1,381 @@
+From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:04 +0100
+Subject: [PATCH 083/157] [Backport][SME] aarch64: Use local frame vars in
+ shrink-wrapping code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd
+
+aarch64_layout_frame uses a shorthand for referring to
+cfun->machine->frame:
+
+ aarch64_frame &frame = cfun->machine->frame;
+
+This patch does the same for some other heavy users of the structure.
+No functional change intended.
+
+gcc/
+ * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
+ a local shorthand for cfun->machine->frame.
+ (aarch64_restore_callee_saves, aarch64_get_separate_components):
+ (aarch64_process_components): Likewise.
+ (aarch64_allocate_and_probe_stack_space): Likewise.
+ (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
+ (aarch64_layout_frame): Use existing shorthand for one more case.
+---
+ gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++----------------
+ 1 file changed, 64 insertions(+), 59 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 08a98f8ba..b7da1d0be 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void)
+ frame.is_scs_enabled
+ = (!crtl->calls_eh_return
+ && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
+- && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0));
++ && known_ge (frame.reg_offset[LR_REGNUM], 0));
+
+ /* When shadow call stack is enabled, the scs_pop in the epilogue will
+ restore x30, and we don't need to pop x30 again in the traditional
+@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ unsigned start, unsigned limit, bool skip_wb,
+ bool hard_fp_valid_p)
+ {
++ aarch64_frame &frame = cfun->machine->frame;
+ rtx_insn *insn;
+ unsigned regno;
+ unsigned regno2;
+@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
+
+ if (skip_wb
+- && (regno == cfun->machine->frame.wb_push_candidate1
+- || regno == cfun->machine->frame.wb_push_candidate2))
++ && (regno == frame.wb_push_candidate1
++ || regno == frame.wb_push_candidate2))
+ continue;
+
+ if (cfun->machine->reg_is_wrapped_separately[regno])
+@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = start_offset + cfun->machine->frame.reg_offset[regno];
++ offset = start_offset + frame.reg_offset[regno];
+ rtx base_rtx = stack_pointer_rtx;
+ poly_int64 sp_offset = offset;
+
+@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ {
+ gcc_assert (known_eq (start_offset, 0));
+ poly_int64 fp_offset
+- = cfun->machine->frame.below_hard_fp_saved_regs_size;
++ = frame.below_hard_fp_saved_regs_size;
+ if (hard_fp_valid_p)
+ base_rtx = hard_frame_pointer_rtx;
+ else
+@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
+ && !cfun->machine->reg_is_wrapped_separately[regno2]
+ && known_eq (GET_MODE_SIZE (mode),
+- cfun->machine->frame.reg_offset[regno2]
+- - cfun->machine->frame.reg_offset[regno]))
++ frame.reg_offset[regno2] - frame.reg_offset[regno]))
+ {
+ rtx reg2 = gen_rtx_REG (mode, regno2);
+ rtx mem2;
+@@ -9472,6 +9472,7 @@ static void
+ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+ unsigned limit, bool skip_wb, rtx *cfi_ops)
+ {
++ aarch64_frame &frame = cfun->machine->frame;
+ unsigned regno;
+ unsigned regno2;
+ poly_int64 offset;
+@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+ rtx reg, mem;
+
+ if (skip_wb
+- && (regno == cfun->machine->frame.wb_pop_candidate1
+- || regno == cfun->machine->frame.wb_pop_candidate2))
++ && (regno == frame.wb_pop_candidate1
++ || regno == frame.wb_pop_candidate2))
+ continue;
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = start_offset + cfun->machine->frame.reg_offset[regno];
++ offset = start_offset + frame.reg_offset[regno];
+ rtx base_rtx = stack_pointer_rtx;
+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+ && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
+ && !cfun->machine->reg_is_wrapped_separately[regno2]
+ && known_eq (GET_MODE_SIZE (mode),
+- cfun->machine->frame.reg_offset[regno2]
+- - cfun->machine->frame.reg_offset[regno]))
++ frame.reg_offset[regno2] - frame.reg_offset[regno]))
+ {
+ rtx reg2 = gen_rtx_REG (mode, regno2);
+ rtx mem2;
+@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
+ static sbitmap
+ aarch64_get_separate_components (void)
+ {
++ aarch64_frame &frame = cfun->machine->frame;
+ sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
+ bitmap_clear (components);
+
+@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void)
+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ continue;
+
+- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
++ poly_int64 offset = frame.reg_offset[regno];
+
+ /* If the register is saved in the first SVE save slot, we use
+ it as a stack probe for -fstack-clash-protection. */
+ if (flag_stack_clash_protection
+- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
++ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
+ && known_eq (offset, 0))
+ continue;
+
+ /* Get the offset relative to the register we'll use. */
+ if (frame_pointer_needed)
+- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
++ offset -= frame.below_hard_fp_saved_regs_size;
+ else
+ offset += crtl->outgoing_args_size;
+
+@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void)
+ /* If the spare predicate register used by big-endian SVE code
+ is call-preserved, it must be saved in the main prologue
+ before any saves that use it. */
+- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
+- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
++ if (frame.spare_pred_reg != INVALID_REGNUM)
++ bitmap_clear_bit (components, frame.spare_pred_reg);
+
+- unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
+- unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
++ unsigned reg1 = frame.wb_push_candidate1;
++ unsigned reg2 = frame.wb_push_candidate2;
+ /* If registers have been chosen to be stored/restored with
+ writeback don't interfere with them to avoid having to output explicit
+ stack adjustment instructions. */
+@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
+ static void
+ aarch64_process_components (sbitmap components, bool prologue_p)
+ {
++ aarch64_frame &frame = cfun->machine->frame;
+ rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
+ ? HARD_FRAME_POINTER_REGNUM
+ : STACK_POINTER_REGNUM);
+@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ machine_mode mode = aarch64_reg_save_mode (regno);
+
+ rtx reg = gen_rtx_REG (mode, regno);
+- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
++ poly_int64 offset = frame.reg_offset[regno];
+ if (frame_pointer_needed)
+- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
++ offset -= frame.below_hard_fp_saved_regs_size;
+ else
+ offset += crtl->outgoing_args_size;
+
+@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ break;
+ }
+
+- poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
++ poly_int64 offset2 = frame.reg_offset[regno2];
+ /* The next register is not of the same class or its offset is not
+ mergeable with the current one into a pair. */
+ if (aarch64_sve_mode_p (mode)
+ || !satisfies_constraint_Ump (mem)
+ || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
+ || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
+- || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
++ || maybe_ne ((offset2 - frame.reg_offset[regno]),
+ GET_MODE_SIZE (mode)))
+ {
+ insn = emit_insn (set);
+@@ -9840,7 +9842,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ /* REGNO2 can be saved/restored in a pair with REGNO. */
+ rtx reg2 = gen_rtx_REG (mode, regno2);
+ if (frame_pointer_needed)
+- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
++ offset2 -= frame.below_hard_fp_saved_regs_size;
+ else
+ offset2 += crtl->outgoing_args_size;
+ rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+@@ -9935,6 +9937,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ bool frame_related_p,
+ bool final_adjustment_p)
+ {
++ aarch64_frame &frame = cfun->machine->frame;
+ HOST_WIDE_INT guard_size
+ = 1 << param_stack_clash_protection_guard_size;
+ HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
+@@ -9955,25 +9958,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ register as a probe. We can't assume that LR was saved at position 0
+ though, so treat any space below it as unprobed. */
+ if (final_adjustment_p
+- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
++ && known_eq (frame.below_hard_fp_saved_regs_size, 0))
+ {
+- poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
++ poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
+ if (known_ge (lr_offset, 0))
+ min_probe_threshold -= lr_offset.to_constant ();
+ else
+ gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
+ }
+
+- poly_int64 frame_size = cfun->machine->frame.frame_size;
++ poly_int64 frame_size = frame.frame_size;
+
+ /* We should always have a positive probe threshold. */
+ gcc_assert (min_probe_threshold > 0);
+
+ if (flag_stack_clash_protection && !final_adjustment_p)
+ {
+- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
+- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
+- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
++ poly_int64 initial_adjust = frame.initial_adjust;
++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
++ poly_int64 final_adjust = frame.final_adjust;
+
+ if (known_eq (frame_size, 0))
+ {
+@@ -10262,17 +10265,18 @@ aarch64_epilogue_uses (int regno)
+ void
+ aarch64_expand_prologue (void)
+ {
+- poly_int64 frame_size = cfun->machine->frame.frame_size;
+- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
+- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
+- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
+- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
+- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
++ aarch64_frame &frame = cfun->machine->frame;
++ poly_int64 frame_size = frame.frame_size;
++ poly_int64 initial_adjust = frame.initial_adjust;
++ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
++ poly_int64 final_adjust = frame.final_adjust;
++ poly_int64 callee_offset = frame.callee_offset;
++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+ poly_int64 below_hard_fp_saved_regs_size
+- = cfun->machine->frame.below_hard_fp_saved_regs_size;
+- unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
+- unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
+- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
++ = frame.below_hard_fp_saved_regs_size;
++ unsigned reg1 = frame.wb_push_candidate1;
++ unsigned reg2 = frame.wb_push_candidate2;
++ bool emit_frame_chain = frame.emit_frame_chain;
+ rtx_insn *insn;
+
+ if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
+@@ -10303,7 +10307,7 @@ aarch64_expand_prologue (void)
+ }
+
+ /* Push return address to shadow call stack. */
+- if (cfun->machine->frame.is_scs_enabled)
++ if (frame.is_scs_enabled)
+ emit_insn (gen_scs_push ());
+
+ if (flag_stack_usage_info)
+@@ -10342,7 +10346,7 @@ aarch64_expand_prologue (void)
+
+ /* The offset of the frame chain record (if any) from the current SP. */
+ poly_int64 chain_offset = (initial_adjust + callee_adjust
+- - cfun->machine->frame.hard_fp_offset);
++ - frame.hard_fp_offset);
+ gcc_assert (known_ge (chain_offset, 0));
+
+ /* The offset of the bottom of the save area from the current SP. */
+@@ -10445,16 +10449,17 @@ aarch64_use_return_insn_p (void)
+ void
+ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ {
+- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
+- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
+- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
+- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
+- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
++ aarch64_frame &frame = cfun->machine->frame;
++ poly_int64 initial_adjust = frame.initial_adjust;
++ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
++ poly_int64 final_adjust = frame.final_adjust;
++ poly_int64 callee_offset = frame.callee_offset;
++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+ poly_int64 below_hard_fp_saved_regs_size
+- = cfun->machine->frame.below_hard_fp_saved_regs_size;
+- unsigned reg1 = cfun->machine->frame.wb_pop_candidate1;
+- unsigned reg2 = cfun->machine->frame.wb_pop_candidate2;
+- unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled
++ = frame.below_hard_fp_saved_regs_size;
++ unsigned reg1 = frame.wb_pop_candidate1;
++ unsigned reg2 = frame.wb_pop_candidate2;
++ unsigned int last_gpr = (frame.is_scs_enabled
+ ? R29_REGNUM : R30_REGNUM);
+ rtx cfi_ops = NULL;
+ rtx_insn *insn;
+@@ -10488,7 +10493,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ /* We need to add memory barrier to prevent read from deallocated stack. */
+ bool need_barrier_p
+ = maybe_ne (get_frame_size ()
+- + cfun->machine->frame.saved_varargs_size, 0);
++ + frame.saved_varargs_size, 0);
+
+ /* Emit a barrier to prevent loads from a deallocated stack. */
+ if (maybe_gt (final_adjust, crtl->outgoing_args_size)
+@@ -10569,7 +10574,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ }
+
+ /* Pop return address from shadow call stack. */
+- if (cfun->machine->frame.is_scs_enabled)
++ if (frame.is_scs_enabled)
+ {
+ machine_mode mode = aarch64_reg_save_mode (R30_REGNUM);
+ rtx reg = gen_rtx_REG (mode, R30_REGNUM);
+@@ -13023,24 +13028,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+ poly_int64
+ aarch64_initial_elimination_offset (unsigned from, unsigned to)
+ {
++ aarch64_frame &frame = cfun->machine->frame;
++
+ if (to == HARD_FRAME_POINTER_REGNUM)
+ {
+ if (from == ARG_POINTER_REGNUM)
+- return cfun->machine->frame.hard_fp_offset;
++ return frame.hard_fp_offset;
+
+ if (from == FRAME_POINTER_REGNUM)
+- return cfun->machine->frame.hard_fp_offset
+- - cfun->machine->frame.locals_offset;
++ return frame.hard_fp_offset - frame.locals_offset;
+ }
+
+ if (to == STACK_POINTER_REGNUM)
+ {
+ if (from == FRAME_POINTER_REGNUM)
+- return cfun->machine->frame.frame_size
+- - cfun->machine->frame.locals_offset;
++ return frame.frame_size - frame.locals_offset;
+ }
+
+- return cfun->machine->frame.frame_size;
++ return frame.frame_size;
+ }
+
+
+--
+2.33.0
+