diff options
Diffstat (limited to '0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch')
-rw-r--r-- | 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch b/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch new file mode 100644 index 0000000..25a3b36 --- /dev/null +++ b/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch @@ -0,0 +1,107 @@ +From ccc3ca614bbaa242fe25ec82b903dfcac03fe2de Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:46:33 +0100 +Subject: [PATCH 105/157] [Backport][SME] aarch64: Put LR save slot first in + more cases + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666 + +Now that the prologue and epilogue code iterates over saved +registers in offset order, we can put the LR save slot first +without compromising LDP/STP formation. + +This isn't worthwhile when shadow call stacks are enabled, since the +first two registers are also push/pop candidates, and LR cannot be +popped when shadow call stacks are enabled. (LR is instead loaded +first and compared against the shadow stack's value.) + +But otherwise, it seems better to put the LR save slot first, +to reduce unnecessary variation with the layout for stack clash +protection. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make + the position of the LR save slot dependent on stack clash + protection unless shadow call stacks are enabled. + +gcc/testsuite/ + * gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_10.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 4 ++-- + 5 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e10c9d763..1c127192d 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8920,7 +8920,7 @@ aarch64_layout_frame (void) + allocate_gpr_slot (R29_REGNUM); + allocate_gpr_slot (R30_REGNUM); + } +- else if (flag_stack_clash_protection ++ else if ((flag_stack_clash_protection || !frame.is_scs_enabled) + && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED)) + /* Put the LR save slot first, since it makes a good choice of probe + for stack clash purposes. The idea is that the link register usually +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +index c19505082..c54ab2d0c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +@@ -14,6 +14,6 @@ + t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10]) + t_frame_run (test10) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp, \[0-9\]+\\\]" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, \[0-9\]+\\\]" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp, \[0-9\]+\\\]" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +index 7e5df84cf..0d715314c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19") + t_frame_run (test2) + + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +index ed13487a0..b41229c42 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +@@ -13,6 +13,6 @@ + t_frame_pattern (test4, 400, "x19") + t_frame_run (test4) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +index 964527949..5702656a5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +@@ -13,6 +13,6 @@ + t_frame_pattern (test7, 700, "x19") + t_frame_run (test7) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp]" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\]" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp]" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\]" } } */ + +-- +2.33.0 + |