summaryrefslogtreecommitdiff
path: root/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
diff options
context:
space:
mode:
Diffstat (limited to '0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch')
-rw-r--r--0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch126
1 files changed, 126 insertions, 0 deletions
diff --git a/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch b/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
new file mode 100644
index 0000000..70cd64b
--- /dev/null
+++ b/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
@@ -0,0 +1,126 @@
+From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:11 +0100
+Subject: [PATCH 099/157] [Backport][SME] aarch64: Simplify probe of final
+ frame allocation
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae
+
+Previous patches ensured that the final frame allocation only needs
+a probe when the size is strictly greater than 1KiB. It's therefore
+safe to use the normal 1024 probe offset in all cases.
+
+The main motivation for doing this is to simplify the code and
+remove the number of special cases.
+
+gcc/
+ * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
+ Always probe the residual allocation at offset 1024, asserting
+ that that is in range.
+
+gcc/testsuite/
+ * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
+ to be at offset 1024 rather than offset 0.
+ * gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
+ * gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 12 ++++--------
+ .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +-
+ .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++--
+ .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++--
+ 4 files changed, 9 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 1ba4c2f89..6d835dc8f 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ are still safe. */
+ if (residual)
+ {
+- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
++ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
++
+ /* If we're doing final adjustments, and we've done any full page
+ allocations then any residual needs to be probed. */
+ if (final_adjustment_p && rounded_size != 0)
+ min_probe_threshold = 0;
+- /* If doing a small final adjustment, we always probe at offset 0.
+- This is done to avoid issues when the final adjustment is smaller
+- than the probing offset. */
+- else if (final_adjustment_p && rounded_size == 0)
+- residual_probe_offset = 0;
+
+ aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
+ if (residual >= min_probe_threshold)
+@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
+ "\n", residual);
+
+- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+- residual_probe_offset));
++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
++ guard_used_by_caller));
+ emit_insn (gen_blockage ());
+ }
+ }
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+index 0d8a25d73..f0ec13897 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -33,7 +33,7 @@ int test1(int z) {
+ ** ...
+ ** str x30, \[sp\]
+ ** sub sp, sp, #1040
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+index 82447d20f..6383bec5e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -9,7 +9,7 @@ void g();
+ ** ...
+ ** str x30, \[sp\]
+ ** sub sp, sp, #4064
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+@@ -50,7 +50,7 @@ int test1(int z) {
+ ** ...
+ ** str x30, \[sp\]
+ ** sub sp, sp, #1040
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+index 73ac3e4e4..562039b5e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+@@ -9,7 +9,7 @@ void g();
+ ** ...
+ ** str x30, \[sp\]
+ ** sub sp, sp, #4064
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+@@ -50,7 +50,7 @@ int test1(int z) {
+ ** ...
+ ** str x30, \[sp\]
+ ** sub sp, sp, #1040
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+--
+2.33.0
+