summaryrefslogtreecommitdiff
path: root/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
diff options
context:
space:
mode:
Diffstat (limited to '0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch')
-rw-r--r--0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch201
1 files changed, 201 insertions, 0 deletions
diff --git a/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch b/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
new file mode 100644
index 0000000..6348a49
--- /dev/null
+++ b/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
@@ -0,0 +1,201 @@
+From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 17 Oct 2023 23:45:43 +0100
+Subject: [PATCH 103/157] [Backport][SME] Handle epilogues that contain jumps
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e
+
+The prologue/epilogue pass allows the prologue sequence to contain
+jumps. The sequence is then partitioned into basic blocks using
+find_many_sub_basic_blocks.
+
+This patch treats epilogues in a similar way. Since only one block
+might need to be split, the patch (re)introduces a find_sub_basic_blocks
+routine to handle a single block.
+
+The new routine hard-codes the assumption that split_block will chain
+the new block immediately after the original block. The routine doesn't
+try to replicate the fix for PR81030, since that was specific to
+gimple->rtl expansion.
+
+The patch is needed for follow-on aarch64 patches that add conditional
+code to the epilogue. The tests are part of those patches.
+
+gcc/
+ * cfgbuild.h (find_sub_basic_blocks): Declare.
+ * cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
+ split out from...
+ (find_many_sub_basic_blocks): ...here.
+ (find_sub_basic_blocks): New function.
+ * function.cc (thread_prologue_and_epilogue_insns): Handle
+ epilogues that contain jumps.
+---
+ gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
+ gcc/cfgbuild.h | 1 +
+ gcc/function.cc | 4 +++
+ 3 files changed, 70 insertions(+), 30 deletions(-)
+
+diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
+index 646a06614..58b865f29 100644
+--- a/gcc/cfgbuild.cc
++++ b/gcc/cfgbuild.cc
+@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
+ }
+ }
+
++/* Update the profile information for BB, which was created by splitting
++ an RTL block that had a non-final jump. */
++
++static void
++update_profile_for_new_sub_basic_block (basic_block bb)
++{
++ edge e;
++ edge_iterator ei;
++
++ bool initialized_src = false, uninitialized_src = false;
++ bb->count = profile_count::zero ();
++ FOR_EACH_EDGE (e, ei, bb->preds)
++ {
++ if (e->count ().initialized_p ())
++ {
++ bb->count += e->count ();
++ initialized_src = true;
++ }
++ else
++ uninitialized_src = true;
++ }
++ /* When some edges are missing with read profile, this is
++ most likely because RTL expansion introduced loop.
++ When profile is guessed we may have BB that is reachable
++ from unlikely path as well as from normal path.
++
++ TODO: We should handle loops created during BB expansion
++ correctly here. For now we assume all those loop to cycle
++ precisely once. */
++ if (!initialized_src
++ || (uninitialized_src
++ && profile_status_for_fn (cfun) < PROFILE_GUESSED))
++ bb->count = profile_count::uninitialized ();
++
++ compute_outgoing_frequencies (bb);
++}
++
+ /* Assume that some pass has inserted labels or control flow
+ instructions within a basic block. Split basic blocks as needed
+ and create edges. */
+@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
+ if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
+ FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
+ {
+- edge e;
+- edge_iterator ei;
+-
+ if (STATE (bb) == BLOCK_ORIGINAL)
+ continue;
+ if (STATE (bb) == BLOCK_NEW)
+ {
+- bool initialized_src = false, uninitialized_src = false;
+- bb->count = profile_count::zero ();
+- FOR_EACH_EDGE (e, ei, bb->preds)
+- {
+- if (e->count ().initialized_p ())
+- {
+- bb->count += e->count ();
+- initialized_src = true;
+- }
+- else
+- uninitialized_src = true;
+- }
+- /* When some edges are missing with read profile, this is
+- most likely because RTL expansion introduced loop.
+- When profile is guessed we may have BB that is reachable
+- from unlikely path as well as from normal path.
+-
+- TODO: We should handle loops created during BB expansion
+- correctly here. For now we assume all those loop to cycle
+- precisely once. */
+- if (!initialized_src
+- || (uninitialized_src
+- && profile_status_for_fn (cfun) < PROFILE_GUESSED))
+- bb->count = profile_count::uninitialized ();
++ update_profile_for_new_sub_basic_block (bb);
++ continue;
+ }
+- /* If nothing changed, there is no need to create new BBs. */
+- else if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
++ /* If nothing changed, there is no need to create new BBs. */
++ if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
+ {
+ /* In rare occassions RTL expansion might have mistakely assigned
+ a probabilities different from what is in CFG. This happens
+@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
+ update_br_prob_note (bb);
+ continue;
+ }
+-
+ compute_outgoing_frequencies (bb);
+ }
+
+ FOR_EACH_BB_FN (bb, cfun)
+ SET_STATE (bb, 0);
+ }
++
++/* Like find_many_sub_basic_blocks, but look only within BB. */
++
++void
++find_sub_basic_blocks (basic_block bb)
++{
++ basic_block end_bb = bb->next_bb;
++ find_bb_boundaries (bb);
++ if (bb->next_bb == end_bb)
++ return;
++
++ /* Re-scan and wire in all edges. This expects simple (conditional)
++ jumps at the end of each new basic blocks. */
++ make_edges (bb, end_bb->prev_bb, 1);
++
++ /* Update branch probabilities. Expect only (un)conditional jumps
++ to be created with only the forward edges. */
++ if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
++ {
++ compute_outgoing_frequencies (bb);
++ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
++ update_profile_for_new_sub_basic_block (bb);
++ }
++}
+diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
+index 85145da7f..53543bb75 100644
+--- a/gcc/cfgbuild.h
++++ b/gcc/cfgbuild.h
+@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
+ extern bool control_flow_insn_p (const rtx_insn *);
+ extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
+ extern void find_many_sub_basic_blocks (sbitmap);
++extern void find_sub_basic_blocks (basic_block);
+
+ #endif /* GCC_CFGBUILD_H */
+diff --git a/gcc/function.cc b/gcc/function.cc
+index ddab43ca4..f4fc211a0 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void)
+ && returnjump_p (BB_END (e->src)))
+ e->flags &= ~EDGE_FALLTHRU;
+ }
++
++ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
+ }
+ else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
+ {
+@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void)
+ set_insn_locations (seq, epilogue_location);
+
+ emit_insn_before (seq, insn);
++
++ find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
+ }
+ }
+
+--
+2.33.0
+