summaryrefslogtreecommitdiff
path: root/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
blob: 6348a4997b45c0eebb92b69e83392b274d1f050a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 17 Oct 2023 23:45:43 +0100
Subject: [PATCH 103/157] [Backport][SME] Handle epilogues that contain jumps

Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e

The prologue/epilogue pass allows the prologue sequence to contain
jumps.  The sequence is then partitioned into basic blocks using
find_many_sub_basic_blocks.

This patch treats epilogues in a similar way.  Since only one block
might need to be split, the patch (re)introduces a find_sub_basic_blocks
routine to handle a single block.

The new routine hard-codes the assumption that split_block will chain
the new block immediately after the original block.  The routine doesn't
try to replicate the fix for PR81030, since that was specific to
gimple->rtl expansion.

The patch is needed for follow-on aarch64 patches that add conditional
code to the epilogue.  The tests are part of those patches.

gcc/
	* cfgbuild.h (find_sub_basic_blocks): Declare.
	* cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
	split out from...
	(find_many_sub_basic_blocks): ...here.
	(find_sub_basic_blocks): New function.
	* function.cc (thread_prologue_and_epilogue_insns): Handle
	epilogues that contain jumps.
---
 gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
 gcc/cfgbuild.h  |  1 +
 gcc/function.cc |  4 +++
 3 files changed, 70 insertions(+), 30 deletions(-)

diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
index 646a06614..58b865f29 100644
--- a/gcc/cfgbuild.cc
+++ b/gcc/cfgbuild.cc
@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
     }
 }
 
+/* Update the profile information for BB, which was created by splitting
+   an RTL block that had a non-final jump.  */
+
+static void
+update_profile_for_new_sub_basic_block (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+
+  bool initialized_src = false, uninitialized_src = false;
+  bb->count = profile_count::zero ();
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      if (e->count ().initialized_p ())
+	{
+	  bb->count += e->count ();
+	  initialized_src = true;
+	}
+      else
+	uninitialized_src = true;
+    }
+  /* When some edges are missing with read profile, this is
+     most likely because RTL expansion introduced loop.
+     When profile is guessed we may have BB that is reachable
+     from unlikely path as well as from normal path.
+
+     TODO: We should handle loops created during BB expansion
+     correctly here.  For now we assume all those loop to cycle
+     precisely once.  */
+  if (!initialized_src
+      || (uninitialized_src
+	   && profile_status_for_fn (cfun) < PROFILE_GUESSED))
+    bb->count = profile_count::uninitialized ();
+
+  compute_outgoing_frequencies (bb);
+}
+
 /* Assume that some pass has inserted labels or control flow
    instructions within a basic block.  Split basic blocks as needed
    and create edges.  */
@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
   if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
     FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
       {
-	edge e;
-	edge_iterator ei;
-
 	if (STATE (bb) == BLOCK_ORIGINAL)
 	  continue;
 	if (STATE (bb) == BLOCK_NEW)
 	  {
-	    bool initialized_src = false, uninitialized_src = false;
-	    bb->count = profile_count::zero ();
-	    FOR_EACH_EDGE (e, ei, bb->preds)
-	      {
-		if (e->count ().initialized_p ())
-		  {
-		    bb->count += e->count ();
-		    initialized_src = true;
-		  }
-		else
-		  uninitialized_src = true;
-	      }
-	    /* When some edges are missing with read profile, this is
-	       most likely because RTL expansion introduced loop.
-	       When profile is guessed we may have BB that is reachable
-	       from unlikely path as well as from normal path.
-
-	       TODO: We should handle loops created during BB expansion
-	       correctly here.  For now we assume all those loop to cycle
-	       precisely once.  */
-	    if (!initialized_src
-		|| (uninitialized_src
-		     && profile_status_for_fn (cfun) < PROFILE_GUESSED))
-	      bb->count = profile_count::uninitialized ();
+	    update_profile_for_new_sub_basic_block (bb);
+	    continue;
 	  }
- 	/* If nothing changed, there is no need to create new BBs.  */
-	else if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
+	/* If nothing changed, there is no need to create new BBs.  */
+	if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
 	  {
 	    /* In rare occassions RTL expansion might have mistakely assigned
 	       a probabilities different from what is in CFG.  This happens
@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
 	      update_br_prob_note (bb);
 	    continue;
 	  }
-
 	compute_outgoing_frequencies (bb);
       }
 
   FOR_EACH_BB_FN (bb, cfun)
     SET_STATE (bb, 0);
 }
+
+/* Like find_many_sub_basic_blocks, but look only within BB.  */
+
+void
+find_sub_basic_blocks (basic_block bb)
+{
+  basic_block end_bb = bb->next_bb;
+  find_bb_boundaries (bb);
+  if (bb->next_bb == end_bb)
+    return;
+
+  /* Re-scan and wire in all edges.  This expects simple (conditional)
+     jumps at the end of each new basic blocks.  */
+  make_edges (bb, end_bb->prev_bb, 1);
+
+  /* Update branch probabilities.  Expect only (un)conditional jumps
+     to be created with only the forward edges.  */
+  if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
+    {
+      compute_outgoing_frequencies (bb);
+      for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
+	update_profile_for_new_sub_basic_block (bb);
+    }
+}
diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
index 85145da7f..53543bb75 100644
--- a/gcc/cfgbuild.h
+++ b/gcc/cfgbuild.h
@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
 extern bool control_flow_insn_p (const rtx_insn *);
 extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
 extern void find_many_sub_basic_blocks (sbitmap);
+extern void find_sub_basic_blocks (basic_block);
 
 #endif /* GCC_CFGBUILD_H */
diff --git a/gcc/function.cc b/gcc/function.cc
index ddab43ca4..f4fc211a0 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void)
 		  && returnjump_p (BB_END (e->src)))
 		e->flags &= ~EDGE_FALLTHRU;
 	    }
+
+	  find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
 	}
       else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
 	{
@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void)
 	  set_insn_locations (seq, epilogue_location);
 
 	  emit_insn_before (seq, insn);
+
+	  find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
 	}
     }
 
-- 
2.33.0