summaryrefslogtreecommitdiff
path: root/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
diff options
context:
space:
mode:
Diffstat (limited to '0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch')
-rw-r--r--0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch321
1 files changed, 321 insertions, 0 deletions
diff --git a/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch b/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
new file mode 100644
index 0000000..c87f3ec
--- /dev/null
+++ b/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
@@ -0,0 +1,321 @@
+From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001
+From: "Vladimir N. Makarov" <vmakarov@redhat.com>
+Date: Thu, 26 Oct 2023 09:50:40 -0400
+Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences
+
+RISCV target developers reported that pseudos with equivalence used in
+a loop can be spilled. Simple changes of heuristics of cost
+calculation of pseudos with equivalence or even ignoring equivalences
+resulted in numerous testsuite failures on different targets or worse
+spec2017 performance. This patch implements more sophisticated cost
+calculations of pseudos with equivalences. The patch does not change
+RA behaviour for targets still using the old reload pass instead of
+LRA. The patch solves the reported problem and improves x86-64
+specint2017 a bit (specfp2017 performance stays the same). The patch
+takes into account how the equivalence will be used: will it be
+integrated into the user insns or require an input reload insn. It
+requires additional pass over insns. To compensate RA slow down, the
+patch removes a pass over insns in the reload pass used by IRA before.
+This also decouples IRA from reload more and will help to remove the
+reload pass in the future if it ever happens.
+
+gcc/ChangeLog:
+
+ * dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
+ LRA is used.
+ * ira-costs.cc: Include regset.h.
+ (equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
+ New functions.
+ (find_costs_and_classes): Call calculate_equiv_gains and redefine
+ mem_cost of pseudos with equivs when LRA is used.
+ * var-tracking.cc: Include ira.h and lra.h.
+ (vt_initialize): Use lra_eliminate_regs when LRA is used.
+---
+ gcc/dwarf2out.cc | 4 +-
+ gcc/ira-costs.cc | 169 ++++++++++++++++++++++++++++++++++++++++++--
+ gcc/var-tracking.cc | 14 +++-
+ 3 files changed, 179 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
+index 0a5c081d8..f0f6f4fd4 100644
+--- a/gcc/dwarf2out.cc
++++ b/gcc/dwarf2out.cc
+@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
+ argument pointer and soft frame pointer rtx's.
+ Use DW_OP_fbreg offset DW_OP_stack_value in this case. */
+ if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
+- && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
++ && (ira_use_lra_p
++ ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
++ : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
+ {
+ dw_loc_descr_ref result = NULL;
+
+diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
+index 642fda529..c79311783 100644
+--- a/gcc/ira-costs.cc
++++ b/gcc/ira-costs.cc
+@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "tm_p.h"
+ #include "insn-config.h"
+ #include "regs.h"
++#include "regset.h"
+ #include "ira.h"
+ #include "ira-int.h"
+ #include "addresses.h"
+@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
+ process_bb_for_costs (bb);
+ }
+
++/* Check that reg REGNO can be changed by TO in INSN. Return true in case the
++ result insn would be valid one. */
++static bool
++equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
++{
++ validate_replace_src_group (regno_reg_rtx[regno], to, insn);
++ bool res = verify_changes (0);
++ cancel_changes (0);
++ return res;
++}
++
++/* Return true if X contains a pseudo with equivalence. In this case also
++ return the pseudo through parameter REG. If the pseudo is a part of subreg,
++ return the subreg through parameter SUBREG. */
++
++static bool
++get_equiv_regno (rtx x, int &regno, rtx &subreg)
++{
++ subreg = NULL_RTX;
++ if (GET_CODE (x) == SUBREG)
++ {
++ subreg = x;
++ x = SUBREG_REG (x);
++ }
++ if (REG_P (x)
++ && (ira_reg_equiv[REGNO (x)].memory != NULL
++ || ira_reg_equiv[REGNO (x)].constant != NULL))
++ {
++ regno = REGNO (x);
++ return true;
++ }
++ RTX_CODE code = GET_CODE (x);
++ const char *fmt = GET_RTX_FORMAT (code);
++
++ for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
++ if (fmt[i] == 'e')
++ {
++ if (get_equiv_regno (XEXP (x, i), regno, subreg))
++ return true;
++ }
++ else if (fmt[i] == 'E')
++ {
++ for (int j = 0; j < XVECLEN (x, i); j++)
++ if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
++ return true;
++ }
++ return false;
++}
++
++/* A pass through the current function insns. Calculate costs of using
++ equivalences for pseudos and store them in regno_equiv_gains. */
++
++static void
++calculate_equiv_gains (void)
++{
++ basic_block bb;
++ int regno, freq, cost;
++ rtx subreg;
++ rtx_insn *insn;
++ machine_mode mode;
++ enum reg_class rclass;
++ bitmap_head equiv_pseudos;
++
++ ira_assert (allocno_p);
++ bitmap_initialize (&equiv_pseudos, &reg_obstack);
++ for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
++ if (ira_reg_equiv[regno].init_insns != NULL
++ && (ira_reg_equiv[regno].memory != NULL
++ || (ira_reg_equiv[regno].constant != NULL
++ /* Ignore complicated constants which probably will be placed
++ in memory: */
++ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE
++ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR
++ && GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF)))
++ {
++ rtx_insn_list *x;
++ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
++ {
++ insn = x->insn ();
++ rtx set = single_set (insn);
++
++ if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno])
++ break;
++ bb = BLOCK_FOR_INSN (insn);
++ ira_curr_regno_allocno_map
++ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
++ mode = PSEUDO_REGNO_MODE (regno);
++ rclass = pref[COST_INDEX (regno)];
++ ira_init_register_move_cost_if_necessary (mode);
++ if (ira_reg_equiv[regno].memory != NULL)
++ cost = ira_memory_move_cost[mode][rclass][1];
++ else
++ cost = ira_register_move_cost[mode][rclass][rclass];
++ freq = REG_FREQ_FROM_BB (bb);
++ regno_equiv_gains[regno] += cost * freq;
++ }
++ if (x != NULL)
++ /* We found complicated equiv or reverse equiv mem=reg. Ignore
++ them. */
++ regno_equiv_gains[regno] = 0;
++ else
++ bitmap_set_bit (&equiv_pseudos, regno);
++ }
++
++ FOR_EACH_BB_FN (bb, cfun)
++ {
++ freq = REG_FREQ_FROM_BB (bb);
++ ira_curr_regno_allocno_map
++ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
++ FOR_BB_INSNS (bb, insn)
++ {
++ if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg)
++ || !bitmap_bit_p (&equiv_pseudos, regno))
++ continue;
++ rtx subst = ira_reg_equiv[regno].memory;
++
++ if (subst == NULL)
++ subst = ira_reg_equiv[regno].constant;
++ ira_assert (subst != NULL);
++ mode = PSEUDO_REGNO_MODE (regno);
++ ira_init_register_move_cost_if_necessary (mode);
++ bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
++
++ rclass = pref[COST_INDEX (regno)];
++ if (MEM_P (subst)
++ /* If it is a change of constant into double for example, the
++ result constant probably will be placed in memory. */
++ || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg))))
++ cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1);
++ else if (consumed_p)
++ continue;
++ else
++ cost = ira_register_move_cost[mode][rclass][rclass];
++ regno_equiv_gains[regno] -= cost * freq;
++ }
++ }
++ bitmap_clear (&equiv_pseudos);
++}
++
+ /* Find costs of register classes and memory for allocnos or pseudos
+ and their best costs. Set up preferred, alternative and allocno
+ classes for pseudos. */
+@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file)
+ if (pass == 0)
+ pref = pref_buffer;
+
++ if (ira_use_lra_p && allocno_p && pass == 1)
++ /* It is a pass through all insns. So do it once and only for RA (not
++ for insn scheduler) when we already found preferable pseudo register
++ classes on the previous pass. */
++ calculate_equiv_gains ();
++
+ /* Now for each allocno look at how desirable each class is and
+ find which class is preferred. */
+ for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--)
+@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file)
+ }
+ if (i >= first_moveable_pseudo && i < last_moveable_pseudo)
+ i_mem_cost = 0;
++ else if (ira_use_lra_p)
++ {
++ if (equiv_savings > 0)
++ {
++ i_mem_cost = 0;
++ if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
++ fprintf (ira_dump_file,
++ " Use MEM for r%d as the equiv savings is %d\n",
++ i, equiv_savings);
++ }
++ }
+ else if (equiv_savings < 0)
+ i_mem_cost = -equiv_savings;
+ else if (equiv_savings > 0)
+@@ -2378,7 +2535,10 @@ ira_costs (void)
+ total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size
+ * ira_allocnos_num);
+ initiate_regno_cost_classes ();
+- calculate_elim_costs_all_insns ();
++ if (!ira_use_lra_p)
++ /* Process equivs in reload to update costs through hook
++ ira_adjust_equiv_reg_cost. */
++ calculate_elim_costs_all_insns ();
+ find_costs_and_classes (ira_dump_file);
+ setup_allocno_class_and_costs ();
+ finish_regno_cost_classes ();
+@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void)
+ }
+ }
+
+-/* Add COST to the estimated gain for eliminating REGNO with its
+- equivalence. If COST is zero, record that no such elimination is
+- possible. */
++/* A hook from the reload pass. Add COST to the estimated gain for eliminating
++ REGNO with its equivalence. If COST is zero, record that no such
++ elimination is possible. */
+
+ void
+ ira_adjust_equiv_reg_cost (unsigned regno, int cost)
+ {
++ ira_assert (!ira_use_lra_p);
+ if (cost == 0)
+ regno_equiv_gains[regno] = 0;
+ else
+diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc
+index 7c3ad0a55..b10c8c1eb 100644
+--- a/gcc/var-tracking.cc
++++ b/gcc/var-tracking.cc
+@@ -107,6 +107,8 @@
+ #include "cfgrtl.h"
+ #include "cfganal.h"
+ #include "reload.h"
++#include "ira.h"
++#include "lra.h"
+ #include "calls.h"
+ #include "tree-dfa.h"
+ #include "tree-ssa.h"
+@@ -10133,7 +10135,9 @@ vt_initialize (void)
+ #else
+ reg = arg_pointer_rtx;
+ #endif
+- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
++ elim = (ira_use_lra_p
++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
++ : eliminate_regs (reg, VOIDmode, NULL_RTX));
+ if (elim != reg)
+ {
+ if (GET_CODE (elim) == PLUS)
+@@ -10153,7 +10157,9 @@ vt_initialize (void)
+ reg = arg_pointer_rtx;
+ fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl);
+ #endif
+- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
++ elim = (ira_use_lra_p
++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
++ : eliminate_regs (reg, VOIDmode, NULL_RTX));
+ if (elim != reg)
+ {
+ if (GET_CODE (elim) == PLUS)
+@@ -10185,7 +10191,9 @@ vt_initialize (void)
+ #else
+ reg = arg_pointer_rtx;
+ #endif
+- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
++ elim = (ira_use_lra_p
++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
++ : eliminate_regs (reg, VOIDmode, NULL_RTX));
+ if (elim != reg)
+ {
+ if (GET_CODE (elim) == PLUS)
+--
+2.28.0.windows.1
+