summaryrefslogtreecommitdiff
path: root/0114-aarch64-Measure-reg_offset-from-the-bottom-of-the-frame.patch
diff options
context:
space:
mode:
Diffstat (limited to '0114-aarch64-Measure-reg_offset-from-the-bottom-of-the-frame.patch')
-rw-r--r--0114-aarch64-Measure-reg_offset-from-the-bottom-of-the-frame.patch194
1 files changed, 194 insertions, 0 deletions
diff --git a/0114-aarch64-Measure-reg_offset-from-the-bottom-of-the-frame.patch b/0114-aarch64-Measure-reg_offset-from-the-bottom-of-the-frame.patch
new file mode 100644
index 0000000..da5ad6a
--- /dev/null
+++ b/0114-aarch64-Measure-reg_offset-from-the-bottom-of-the-frame.patch
@@ -0,0 +1,194 @@
+From ae7192e24b8fd6b3f14b44de03bebc6af92c13ed Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 27 Jun 2023 11:36:08 +0100
+Subject: [PATCH] aarch64: Measure reg_offset from the bottom of the frame
+
+reg_offset was measured from the bottom of the saved register area.
+This made perfect sense with the original layout, since the bottom
+of the saved register area was also the hard frame pointer address.
+It became slightly less obvious with SVE, since we save SVE
+registers below the hard frame pointer, but it still made sense.
+
+However, if we want to allow different frame layouts, it's more
+convenient and obvious to measure reg_offset from the bottom of
+the frame. After previous patches, it's also a slight simplification
+in its own right.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame): Add comment above
+ reg_offset.
+ * config/aarch64/aarch64.c (aarch64_layout_frame): Walk offsets
+ from the bottom of the frame, rather than the bottom of the saved
+ register area. Measure reg_offset from the bottom of the frame
+ rather than the bottom of the saved register area.
+ (aarch64_save_callee_saves): Update accordingly.
+ (aarch64_restore_callee_saves): Likewise.
+ (aarch64_get_separate_components): Likewise.
+ (aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 53 ++++++++++++++++--------------------
+ gcc/config/aarch64/aarch64.h | 3 ++
+ 2 files changed, 27 insertions(+), 29 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 81ddb99c165..1ed335f5487 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -6826,7 +6826,6 @@ aarch64_needs_frame_chain (void)
+ static void
+ aarch64_layout_frame (void)
+ {
+- poly_int64 offset = 0;
+ int regno, last_fp_reg = INVALID_REGNUM;
+ machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
+ poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
+@@ -6902,7 +6901,9 @@ aarch64_layout_frame (void)
+ gcc_assert (crtl->is_leaf
+ || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
+
+- frame.bytes_below_saved_regs = crtl->outgoing_args_size;
++ poly_int64 offset = crtl->outgoing_args_size;
++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
++ frame.bytes_below_saved_regs = offset;
+
+ /* Now assign stack slots for the registers. Start with the predicate
+ registers, since predicate LDR and STR have a relatively small
+@@ -6914,7 +6915,8 @@ aarch64_layout_frame (void)
+ offset += BYTES_PER_SVE_PRED;
+ }
+
+- if (maybe_ne (offset, 0))
++ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
++ if (maybe_ne (saved_prs_size, 0))
+ {
+ /* If we have any vector registers to save above the predicate registers,
+ the offset of the vector register save slots need to be a multiple
+@@ -6932,10 +6934,10 @@ aarch64_layout_frame (void)
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+ else
+ {
+- if (known_le (offset, vector_save_size))
+- offset = vector_save_size;
+- else if (known_le (offset, vector_save_size * 2))
+- offset = vector_save_size * 2;
++ if (known_le (saved_prs_size, vector_save_size))
++ offset = frame.bytes_below_saved_regs + vector_save_size;
++ else if (known_le (saved_prs_size, vector_save_size * 2))
++ offset = frame.bytes_below_saved_regs + vector_save_size * 2;
+ else
+ gcc_unreachable ();
+ }
+@@ -6952,9 +6954,10 @@ aarch64_layout_frame (void)
+
+ /* OFFSET is now the offset of the hard frame pointer from the bottom
+ of the callee save area. */
+- bool saves_below_hard_fp_p = maybe_ne (offset, 0);
+- frame.below_hard_fp_saved_regs_size = offset;
+- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
++ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
++ bool saves_below_hard_fp_p
++ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
++ frame.bytes_below_hard_fp = offset;
+ if (frame.emit_frame_chain)
+ {
+ /* FP and LR are placed in the linkage record. */
+@@ -7005,9 +7008,10 @@ aarch64_layout_frame (void)
+
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+
+- frame.saved_regs_size = offset;
++ frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
+
+- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
++ poly_int64 varargs_and_saved_regs_size
++ = frame.saved_regs_size + frame.saved_varargs_size;
+
+ poly_int64 saved_regs_and_above
+ = aligned_upper_bound (varargs_and_saved_regs_size
+@@ -7430,9 +7434,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = (frame.reg_offset[regno]
+- + frame.bytes_below_saved_regs
+- - bytes_below_sp);
++ offset = frame.reg_offset[regno] - bytes_below_sp;
+ rtx base_rtx = stack_pointer_rtx;
+ poly_int64 sp_offset = offset;
+
+@@ -7539,9 +7541,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = (frame.reg_offset[regno]
+- + frame.bytes_below_saved_regs
+- - bytes_below_sp);
++ offset = frame.reg_offset[regno] - bytes_below_sp;
+ rtx base_rtx = stack_pointer_rtx;
+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -7669,14 +7669,12 @@ aarch64_get_separate_components (void)
+ it as a stack probe for -fstack-clash-protection. */
+ if (flag_stack_clash_protection
+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
+- && known_eq (offset, 0))
++ && known_eq (offset, frame.bytes_below_saved_regs))
+ continue;
+
+ /* Get the offset relative to the register we'll use. */
+ if (frame_pointer_needed)
+- offset -= frame.below_hard_fp_saved_regs_size;
+- else
+- offset += frame.bytes_below_saved_regs;
++ offset -= frame.bytes_below_hard_fp;
+
+ /* Check that we can access the stack slot of the register with one
+ direct load with no adjustments needed. */
+@@ -7823,9 +7821,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ rtx reg = gen_rtx_REG (mode, regno);
+ poly_int64 offset = frame.reg_offset[regno];
+ if (frame_pointer_needed)
+- offset -= frame.below_hard_fp_saved_regs_size;
+- else
+- offset += frame.bytes_below_saved_regs;
++ offset -= frame.bytes_below_hard_fp;
+
+ rtx addr = plus_constant (Pmode, ptr_reg, offset);
+ rtx mem = gen_frame_mem (mode, addr);
+@@ -7877,9 +7873,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ /* REGNO2 can be saved/restored in a pair with REGNO. */
+ rtx reg2 = gen_rtx_REG (mode, regno2);
+ if (frame_pointer_needed)
+- offset2 -= frame.below_hard_fp_saved_regs_size;
+- else
+- offset2 += frame.bytes_below_saved_regs;
++ offset2 -= frame.bytes_below_hard_fp;
+ rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+ rtx mem2 = gen_frame_mem (mode, addr2);
+ rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -7995,7 +7989,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ if (final_adjustment_p
+ && known_eq (frame.below_hard_fp_saved_regs_size, 0))
+ {
+- poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
++ poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
++ - frame.bytes_below_saved_regs);
+ if (known_ge (lr_offset, 0))
+ min_probe_threshold -= lr_offset.to_constant ();
+ else
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 8d8916c7227..1c3c62639cb 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -809,6 +809,9 @@ extern enum aarch64_processor aarch64_tune;
+ #ifdef HAVE_POLY_INT_H
+ struct GTY (()) aarch64_frame
+ {
++ /* The offset from the bottom of the static frame (the bottom of the
++ outgoing arguments) of each register save slot, or -2 if no save is
++ needed. */
+ poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
+
+ /* The number of extra stack bytes taken up by register varargs.
+--
+2.39.3