summaryrefslogtreecommitdiff
path: root/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
blob: 25a3b3681e8b3f87279dd7d67d0811ac1906d659 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
From ccc3ca614bbaa242fe25ec82b903dfcac03fe2de Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 17 Oct 2023 23:46:33 +0100
Subject: [PATCH 105/157] [Backport][SME] aarch64: Put LR save slot first in
 more cases

Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666

Now that the prologue and epilogue code iterates over saved
registers in offset order, we can put the LR save slot first
without compromising LDP/STP formation.

This isn't worthwhile when shadow call stacks are enabled, since the
first two registers are also push/pop candidates, and LR cannot be
popped when shadow call stacks are enabled.  (LR is instead loaded
first and compared against the shadow stack's value.)

But otherwise, it seems better to put the LR save slot first,
to reduce unnecessary variation with the layout for stack clash
protection.

gcc/
	* config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make
	the position of the LR save slot dependent on stack clash
	protection unless shadow call stacks are enabled.

gcc/testsuite/
	* gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19.
	* gcc.target/aarch64/test_frame_4.c: Likewise.
	* gcc.target/aarch64/test_frame_7.c: Likewise.
	* gcc.target/aarch64/test_frame_10.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc                    | 2 +-
 gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++--
 gcc/testsuite/gcc.target/aarch64/test_frame_2.c  | 4 ++--
 gcc/testsuite/gcc.target/aarch64/test_frame_4.c  | 4 ++--
 gcc/testsuite/gcc.target/aarch64/test_frame_7.c  | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e10c9d763..1c127192d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8920,7 +8920,7 @@ aarch64_layout_frame (void)
       allocate_gpr_slot (R29_REGNUM);
       allocate_gpr_slot (R30_REGNUM);
     }
-  else if (flag_stack_clash_protection
+  else if ((flag_stack_clash_protection || !frame.is_scs_enabled)
 	   && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
     /* Put the LR save slot first, since it makes a good choice of probe
        for stack clash purposes.  The idea is that the link register usually
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
index c19505082..c54ab2d0c 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
@@ -14,6 +14,6 @@
 t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
 t_frame_run (test10)
 
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp, \[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, \[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp, \[0-9\]+\\\]" } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
index 7e5df84cf..0d715314c 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19")
 t_frame_run (test2)
 
 
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
index ed13487a0..b41229c42 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
@@ -13,6 +13,6 @@
 t_frame_pattern (test4, 400, "x19")
 t_frame_run (test4)
 
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
index 964527949..5702656a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
@@ -13,6 +13,6 @@
 t_frame_pattern (test7, 700, "x19")
 t_frame_run (test7)
 
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp]" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\]" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp]" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\]" } } */
 
-- 
2.33.0