summaryrefslogtreecommitdiff
path: root/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
committerCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
commit73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree8e28b63e478c43c252f18b49836dff7313affe54 /0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
parent49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch')
-rw-r--r--0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch228
1 files changed, 228 insertions, 0 deletions
diff --git a/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch b/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
new file mode 100644
index 0000000..c302467
--- /dev/null
+++ b/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
@@ -0,0 +1,228 @@
+From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:06 +0100
+Subject: [PATCH 089/157] [Backport][SME] aarch64: Tweak
+ aarch64_save/restore_callee_saves
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db
+
+aarch64_save_callee_saves and aarch64_restore_callee_saves took
+a parameter called start_offset that gives the offset of the
+bottom of the saved register area from the current stack pointer.
+However, it's more convenient for later patches if we use the
+bottom of the entire frame as the reference point, rather than
+the bottom of the saved registers.
+
+Doing that removes the need for the callee_offset field.
+Other than that, this is not a win on its own. It only really
+makes sense in combination with the follow-on patches.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
+ * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove
+ callee_offset handling.
+ (aarch64_save_callee_saves): Replace the start_offset parameter
+ with a bytes_below_sp parameter.
+ (aarch64_restore_callee_saves): Likewise.
+ (aarch64_expand_prologue): Update accordingly.
+ (aarch64_expand_epilogue): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------
+ gcc/config/aarch64/aarch64.h | 4 ---
+ 2 files changed, 28 insertions(+), 32 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 4d505c6fc..a0a4c7ac3 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void)
+ frame.final_adjust = 0;
+ frame.callee_adjust = 0;
+ frame.sve_callee_adjust = 0;
+- frame.callee_offset = 0;
+
+ frame.wb_pop_candidate1 = frame.wb_push_candidate1;
+ frame.wb_pop_candidate2 = frame.wb_push_candidate2;
+@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void)
+ stp reg1, reg2, [sp, bytes_below_saved_regs]
+ stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
+ frame.initial_adjust = frame.frame_size;
+- frame.callee_offset = const_below_saved_regs;
+ }
+ else if (saves_below_hard_fp_p
+ && known_eq (frame.saved_regs_size,
+@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
+ }
+
+ /* Emit code to save the callee-saved registers from register number START
+- to LIMIT to the stack at the location starting at offset START_OFFSET,
+- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P
+- is true if the hard frame pointer has been set up. */
++ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
++ bytes above the bottom of the static frame. Skip any write-back
++ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
++ frame pointer has been set up. */
+
+ static void
+-aarch64_save_callee_saves (poly_int64 start_offset,
++aarch64_save_callee_saves (poly_int64 bytes_below_sp,
+ unsigned start, unsigned limit, bool skip_wb,
+ bool hard_fp_valid_p)
+ {
+@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = start_offset + frame.reg_offset[regno];
++ offset = (frame.reg_offset[regno]
++ + frame.bytes_below_saved_regs
++ - bytes_below_sp);
+ rtx base_rtx = stack_pointer_rtx;
+ poly_int64 sp_offset = offset;
+
+@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ else if (GP_REGNUM_P (regno)
+ && (!offset.is_constant (&const_offset) || const_offset >= 512))
+ {
+- gcc_assert (known_eq (start_offset, 0));
+- poly_int64 fp_offset
+- = frame.below_hard_fp_saved_regs_size;
++ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
+ if (hard_fp_valid_p)
+ base_rtx = hard_frame_pointer_rtx;
+ else
+@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ }
+
+ /* Emit code to restore the callee registers from register number START
+- up to and including LIMIT. Restore from the stack offset START_OFFSET,
+- skipping any write-back candidates if SKIP_WB is true. Write the
+- appropriate REG_CFA_RESTORE notes into CFI_OPS. */
++ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
++ bytes above the bottom of the static frame. Skip any write-back
++ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
++ notes into CFI_OPS. */
+
+ static void
+-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
++aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
+ unsigned limit, bool skip_wb, rtx *cfi_ops)
+ {
+ aarch64_frame &frame = cfun->machine->frame;
+@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+
+ machine_mode mode = aarch64_reg_save_mode (regno);
+ reg = gen_rtx_REG (mode, regno);
+- offset = start_offset + frame.reg_offset[regno];
++ offset = (frame.reg_offset[regno]
++ + frame.bytes_below_saved_regs
++ - bytes_below_sp);
+ rtx base_rtx = stack_pointer_rtx;
+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void)
+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
+ poly_int64 final_adjust = frame.final_adjust;
+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+- poly_int64 below_hard_fp_saved_regs_size
+- = frame.below_hard_fp_saved_regs_size;
+ unsigned reg1 = frame.wb_push_candidate1;
+ unsigned reg2 = frame.wb_push_candidate2;
+ bool emit_frame_chain = frame.emit_frame_chain;
+@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void)
+ - frame.hard_fp_offset);
+ gcc_assert (known_ge (chain_offset, 0));
+
+- /* The offset of the bottom of the save area from the current SP. */
+- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
++ /* The offset of the current SP from the bottom of the static frame. */
++ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+
+ if (emit_frame_chain)
+ {
+@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void)
+ {
+ reg1 = R29_REGNUM;
+ reg2 = R30_REGNUM;
+- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
++ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
+ false, false);
+ }
+ else
+@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void)
+ aarch64_emit_stack_tie (hard_frame_pointer_rtx);
+ }
+
+- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
++ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
+ callee_adjust != 0 || emit_frame_chain,
+ emit_frame_chain);
+ if (maybe_ne (sve_callee_adjust, 0))
+@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void)
+ aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
+ sve_callee_adjust,
+ !frame_pointer_needed, false);
+- saved_regs_offset += sve_callee_adjust;
++ bytes_below_sp -= sve_callee_adjust;
+ }
+- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
++ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
+ false, emit_frame_chain);
+- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
++ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
+ callee_adjust != 0 || emit_frame_chain,
+ emit_frame_chain);
+
+ /* We may need to probe the final adjustment if it is larger than the guard
+ that is assumed by the called. */
++ gcc_assert (known_eq (bytes_below_sp, final_adjust));
+ aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
+ !frame_pointer_needed, true);
+ }
+@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ poly_int64 initial_adjust = frame.initial_adjust;
+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
+ poly_int64 final_adjust = frame.final_adjust;
+- poly_int64 callee_offset = frame.callee_offset;
+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
+ unsigned reg1 = frame.wb_pop_candidate1;
+@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+
+ /* Restore the vector registers before the predicate registers,
+ so that we can use P4 as a temporary for big-endian SVE frames. */
+- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
++ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
+ callee_adjust != 0, &cfi_ops);
+- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
++ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
+ false, &cfi_ops);
+ if (maybe_ne (sve_callee_adjust, 0))
+ aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
+@@ -10545,7 +10545,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ /* When shadow call stack is enabled, the scs_pop in the epilogue will
+ restore x30, we don't need to restore x30 again in the traditional
+ way. */
+- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
++ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
+ R0_REGNUM, last_gpr,
+ callee_adjust != 0, &cfi_ops);
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index dedc5b32f..a1db4f689 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -837,10 +837,6 @@ struct GTY (()) aarch64_frame
+ It is zero when no push is used. */
+ HOST_WIDE_INT callee_adjust;
+
+- /* The offset from SP to the callee-save registers after initial_adjust.
+- It may be non-zero if no push is used (ie. callee_adjust == 0). */
+- poly_int64 callee_offset;
+-
+ /* The size of the stack adjustment before saving or after restoring
+ SVE registers. */
+ poly_int64 sve_callee_adjust;
+--
+2.33.0
+