summaryrefslogtreecommitdiff
path: root/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
diff options
context:
space:
mode:
Diffstat (limited to '0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch')
-rw-r--r--0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch236
1 files changed, 236 insertions, 0 deletions
diff --git a/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch b/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
new file mode 100644
index 0000000..f754f97
--- /dev/null
+++ b/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
@@ -0,0 +1,236 @@
+From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:05 +0100
+Subject: [PATCH 086/157] [Backport][SME] aarch64: Add bytes_below_saved_regs
+ to frame info
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976
+
+The frame layout code currently hard-codes the assumption that
+the number of bytes below the saved registers is equal to the
+size of the outgoing arguments. This patch abstracts that
+value into a new field of aarch64_frame.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
+ field.
+ * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
+ and use it instead of crtl->outgoing_args_size.
+ (aarch64_get_separate_components): Use bytes_below_saved_regs instead
+ of outgoing_args_size.
+ (aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++-----------------
+ gcc/config/aarch64/aarch64.h | 5 +++
+ 2 files changed, 41 insertions(+), 35 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index c59af6b1c..5533dd85b 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void)
+ gcc_assert (crtl->is_leaf
+ || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
+
++ frame.bytes_below_saved_regs = crtl->outgoing_args_size;
++
+ /* Now assign stack slots for the registers. Start with the predicate
+ registers, since predicate LDR and STR have a relatively small
+ offset range. These saves happen below the hard frame pointer. */
+@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void)
+
+ poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
+
+- poly_int64 above_outgoing_args
++ poly_int64 saved_regs_and_above
+ = aligned_upper_bound (varargs_and_saved_regs_size
+ + get_frame_size (),
+ STACK_BOUNDARY / BITS_PER_UNIT);
+
+ frame.hard_fp_offset
+- = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
++ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
+
+ /* Both these values are already aligned. */
+- gcc_assert (multiple_p (crtl->outgoing_args_size,
++ gcc_assert (multiple_p (frame.bytes_below_saved_regs,
+ STACK_BOUNDARY / BITS_PER_UNIT));
+- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
++ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
+
+ frame.locals_offset = frame.saved_varargs_size;
+
+@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void)
+ else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
+ max_push_offset = 256;
+
+- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
++ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
+ HOST_WIDE_INT const_saved_regs_size;
+ if (known_eq (frame.saved_regs_size, 0))
+ frame.initial_adjust = frame.frame_size;
+@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void)
+ && const_size < max_push_offset
+ && known_eq (frame.hard_fp_offset, const_size))
+ {
+- /* Simple, small frame with no outgoing arguments:
++ /* Simple, small frame with no data below the saved registers.
+
+ stp reg1, reg2, [sp, -frame_size]!
+ stp reg3, reg4, [sp, 16] */
+ frame.callee_adjust = const_size;
+ }
+- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
++ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
+ && frame.saved_regs_size.is_constant (&const_saved_regs_size)
+- && const_outgoing_args_size + const_saved_regs_size < 512
+- /* We could handle this case even with outgoing args, provided
+- that the number of args left us with valid offsets for all
+- predicate and vector save slots. It's such a rare case that
+- it hardly seems worth the effort though. */
+- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
++ && const_below_saved_regs + const_saved_regs_size < 512
++ /* We could handle this case even with data below the saved
++ registers, provided that that data left us with valid offsets
++ for all predicate and vector save slots. It's such a rare
++ case that it hardly seems worth the effort though. */
++ && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
+ && !(cfun->calls_alloca
+ && frame.hard_fp_offset.is_constant (&const_fp_offset)
+ && const_fp_offset < max_push_offset))
+ {
+- /* Frame with small outgoing arguments:
++ /* Frame with small area below the saved registers:
+
+ sub sp, sp, frame_size
+- stp reg1, reg2, [sp, outgoing_args_size]
+- stp reg3, reg4, [sp, outgoing_args_size + 16] */
++ stp reg1, reg2, [sp, bytes_below_saved_regs]
++ stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
+ frame.initial_adjust = frame.frame_size;
+- frame.callee_offset = const_outgoing_args_size;
++ frame.callee_offset = const_below_saved_regs;
+ }
+ else if (saves_below_hard_fp_p
+ && known_eq (frame.saved_regs_size,
+@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void)
+
+ sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
+ save SVE registers relative to SP
+- sub sp, sp, outgoing_args_size */
++ sub sp, sp, bytes_below_saved_regs */
+ frame.initial_adjust = (frame.hard_fp_offset
+ + frame.below_hard_fp_saved_regs_size);
+- frame.final_adjust = crtl->outgoing_args_size;
++ frame.final_adjust = frame.bytes_below_saved_regs;
+ }
+ else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
+ && const_fp_offset < max_push_offset)
+ {
+- /* Frame with large outgoing arguments or SVE saves, but with
+- a small local area:
++ /* Frame with large area below the saved registers, or with SVE saves,
++ but with a small area above:
+
+ stp reg1, reg2, [sp, -hard_fp_offset]!
+ stp reg3, reg4, [sp, 16]
+ [sub sp, sp, below_hard_fp_saved_regs_size]
+ [save SVE registers relative to SP]
+- sub sp, sp, outgoing_args_size */
++ sub sp, sp, bytes_below_saved_regs */
+ frame.callee_adjust = const_fp_offset;
+ frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+- frame.final_adjust = crtl->outgoing_args_size;
++ frame.final_adjust = frame.bytes_below_saved_regs;
+ }
+ else
+ {
+- /* Frame with large local area and outgoing arguments or SVE saves,
+- using frame pointer:
++ /* General case:
+
+ sub sp, sp, hard_fp_offset
+ stp x29, x30, [sp, 0]
+@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void)
+ stp reg3, reg4, [sp, 16]
+ [sub sp, sp, below_hard_fp_saved_regs_size]
+ [save SVE registers relative to SP]
+- sub sp, sp, outgoing_args_size */
++ sub sp, sp, bytes_below_saved_regs */
+ frame.initial_adjust = frame.hard_fp_offset;
+ frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+- frame.final_adjust = crtl->outgoing_args_size;
++ frame.final_adjust = frame.bytes_below_saved_regs;
+ }
+
+ /* Make sure the individual adjustments add up to the full frame size. */
+@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void)
+ if (frame_pointer_needed)
+ offset -= frame.below_hard_fp_saved_regs_size;
+ else
+- offset += crtl->outgoing_args_size;
++ offset += frame.bytes_below_saved_regs;
+
+ /* Check that we can access the stack slot of the register with one
+ direct load with no adjustments needed. */
+@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ if (frame_pointer_needed)
+ offset -= frame.below_hard_fp_saved_regs_size;
+ else
+- offset += crtl->outgoing_args_size;
++ offset += frame.bytes_below_saved_regs;
+
+ rtx addr = plus_constant (Pmode, ptr_reg, offset);
+ rtx mem = gen_frame_mem (mode, addr);
+@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ if (frame_pointer_needed)
+ offset2 -= frame.below_hard_fp_saved_regs_size;
+ else
+- offset2 += crtl->outgoing_args_size;
++ offset2 += frame.bytes_below_saved_regs;
+ rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+ rtx mem2 = gen_frame_mem (mode, addr2);
+ rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
+ registers. If POLY_SIZE is not large enough to require a probe this function
+ will only adjust the stack. When allocating the stack space
+ FRAME_RELATED_P is then used to indicate if the allocation is frame related.
+- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
+- arguments. If we are then we ensure that any allocation larger than the ABI
+- defined buffer needs a probe so that the invariant of having a 1KB buffer is
+- maintained.
++ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
++ the saved registers. If we are then we ensure that any allocation
++ larger than the ABI defined buffer needs a probe so that the
++ invariant of having a 1KB buffer is maintained.
+
+ We emit barriers after each stack adjustment to prevent optimizations from
+ breaking the invariant that we never drop the stack more than a page. This
+@@ -10132,7 +10133,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
+ be probed. This maintains the requirement that each page is probed at
+ least once. For initial probing we probe only if the allocation is
+- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
++ more than GUARD_SIZE - buffer, and below the saved registers we probe
+ if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
+ GUARD_SIZE. This works that for any allocation that is large enough to
+ trigger a probe here, we'll have at least one, and if they're not large
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 8f0ac2cde..9e0ca380e 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -801,6 +801,11 @@ struct GTY (()) aarch64_frame
+ /* The size of the callee-save registers with a slot in REG_OFFSET. */
+ poly_int64 saved_regs_size;
+
++ /* The number of bytes between the bottom of the static frame (the bottom
++ of the outgoing arguments) and the bottom of the register save area.
++ This value is always a multiple of STACK_BOUNDARY. */
++ poly_int64 bytes_below_saved_regs;
++
+ /* The size of the callee-save registers with a slot in REG_OFFSET that
+ are saved below the hard frame pointer. */
+ poly_int64 below_hard_fp_saved_regs_size;
+--
+2.33.0
+