summaryrefslogtreecommitdiff
path: root/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
diff options
context:
space:
mode:
Diffstat (limited to '0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch')
-rw-r--r--0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch198
1 files changed, 198 insertions, 0 deletions
diff --git a/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch b/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
new file mode 100644
index 0000000..c036731
--- /dev/null
+++ b/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
@@ -0,0 +1,198 @@
+From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:09 +0100
+Subject: [PATCH 094/157] [Backport][SME] aarch64: Measure reg_offset from the
+ bottom of the frame
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54
+
+reg_offset was measured from the bottom of the saved register area.
+This made perfect sense with the original layout, since the bottom
+of the saved register area was also the hard frame pointer address.
+It became slightly less obvious with SVE, since we save SVE
+registers below the hard frame pointer, but it still made sense.
+
+However, if we want to allow different frame layouts, it's more
+convenient and obvious to measure reg_offset from the bottom of
+the frame. After previous patches, it's also a slight simplification
+in its own right.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame): Add comment above
+ reg_offset.
+ * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
+ from the bottom of the frame, rather than the bottom of the saved
+ register area. Measure reg_offset from the bottom of the frame
+ rather than the bottom of the saved register area.
+ (aarch64_save_callee_saves): Update accordingly.
+ (aarch64_restore_callee_saves): Likewise.
+ (aarch64_get_separate_components): Likewise.
+ (aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++-------------------
+ gcc/config/aarch64/aarch64.h | 3 ++
+ 2 files changed, 27 insertions(+), 29 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 67199a026..df8a83b04 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void)
+ static void
+ aarch64_layout_frame (void)
+ {
+- poly_int64 offset = 0;
+ int regno, last_fp_reg = INVALID_REGNUM;
+ machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
+ poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
+@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void)
+ gcc_assert (crtl->is_leaf
+ || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
+
+- frame.bytes_below_saved_regs = crtl->outgoing_args_size;
++ poly_int64 offset = crtl->outgoing_args_size;
++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
++ frame.bytes_below_saved_regs = offset;
+
+ /* Now assign stack slots for the registers. Start with the predicate
+ registers, since predicate LDR and STR have a relatively small
+@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void)
+ offset += BYTES_PER_SVE_PRED;
+ }
+
+- if (maybe_ne (offset, 0))
++ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
++ if (maybe_ne (saved_prs_size, 0))
+ {
+ /* If we have any vector registers to save above the predicate registers,
+ the offset of the vector register save slots need to be a multiple
+@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void)
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+ else
+ {
+- if (known_le (offset, vector_save_size))
+- offset = vector_save_size;
+- else if (known_le (offset, vector_save_size * 2))
+- offset = vector_save_size * 2;
++ if (known_le (saved_prs_size, vector_save_size))
++ offset = frame.bytes_below_saved_regs + vector_save_size;
++ else if (known_le (saved_prs_size, vector_save_size * 2))
++ offset = frame.bytes_below_saved_regs + vector_save_size * 2;
+ else
+ gcc_unreachable ();
+ }
+@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void)
+
+ /* OFFSET is now the offset of the hard frame pointer from the bottom
+ of the callee save area. */
+- bool saves_below_hard_fp_p = maybe_ne (offset, 0);
+- frame.below_hard_fp_saved_regs_size = offset;
+- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
++ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
++ bool saves_below_hard_fp_p
++ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
++ frame.bytes_below_hard_fp = offset;
+ if (frame.emit_frame_chain)
+ {
+ /* FP and LR are placed in the linkage record. */
+@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void)
+
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+
+- frame.saved_regs_size = offset;
++ frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
+
+- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
++ poly_int64 varargs_and_saved_regs_size
++ = frame.saved_regs_size + frame.saved_varargs_size;
+
+ poly_int64 saved_regs_and_above
+ = aligned_upper_bound (varargs_and_saved_regs_size
+@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = (frame.reg_offset[regno]
+- + frame.bytes_below_saved_regs
+- - bytes_below_sp);
++ offset = frame.reg_offset[regno] - bytes_below_sp;
+ rtx base_rtx = stack_pointer_rtx;
+ poly_int64 sp_offset = offset;
+
+@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = (frame.reg_offset[regno]
+- + frame.bytes_below_saved_regs
+- - bytes_below_sp);
++ offset = frame.reg_offset[regno] - bytes_below_sp;
+ rtx base_rtx = stack_pointer_rtx;
+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void)
+ it as a stack probe for -fstack-clash-protection. */
+ if (flag_stack_clash_protection
+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
+- && known_eq (offset, 0))
++ && known_eq (offset, frame.bytes_below_saved_regs))
+ continue;
+
+ /* Get the offset relative to the register we'll use. */
+ if (frame_pointer_needed)
+- offset -= frame.below_hard_fp_saved_regs_size;
+- else
+- offset += frame.bytes_below_saved_regs;
++ offset -= frame.bytes_below_hard_fp;
+
+ /* Check that we can access the stack slot of the register with one
+ direct load with no adjustments needed. */
+@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ rtx reg = gen_rtx_REG (mode, regno);
+ poly_int64 offset = frame.reg_offset[regno];
+ if (frame_pointer_needed)
+- offset -= frame.below_hard_fp_saved_regs_size;
+- else
+- offset += frame.bytes_below_saved_regs;
++ offset -= frame.bytes_below_hard_fp;
+
+ rtx addr = plus_constant (Pmode, ptr_reg, offset);
+ rtx mem = gen_frame_mem (mode, addr);
+@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ /* REGNO2 can be saved/restored in a pair with REGNO. */
+ rtx reg2 = gen_rtx_REG (mode, regno2);
+ if (frame_pointer_needed)
+- offset2 -= frame.below_hard_fp_saved_regs_size;
+- else
+- offset2 += frame.bytes_below_saved_regs;
++ offset2 -= frame.bytes_below_hard_fp;
+ rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+ rtx mem2 = gen_frame_mem (mode, addr2);
+ rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ if (final_adjustment_p
+ && known_eq (frame.below_hard_fp_saved_regs_size, 0))
+ {
+- poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
++ poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
++ - frame.bytes_below_saved_regs);
+ if (known_ge (lr_offset, 0))
+ min_probe_threshold -= lr_offset.to_constant ();
+ else
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 39abca051..f340237d0 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune;
+ #ifdef HAVE_POLY_INT_H
+ struct GTY (()) aarch64_frame
+ {
++ /* The offset from the bottom of the static frame (the bottom of the
++ outgoing arguments) of each register save slot, or -2 if no save is
++ needed. */
+ poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
+
+ /* The number of extra stack bytes taken up by register varargs.
+--
+2.33.0
+