diff options
Diffstat (limited to '0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch')
-rw-r--r-- | 0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch b/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch new file mode 100644 index 0000000..c87f3ec --- /dev/null +++ b/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch @@ -0,0 +1,321 @@ +From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001 +From: "Vladimir N. Makarov" <vmakarov@redhat.com> +Date: Thu, 26 Oct 2023 09:50:40 -0400 +Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences + +RISCV target developers reported that pseudos with equivalence used in +a loop can be spilled. Simple changes of heuristics of cost +calculation of pseudos with equivalence or even ignoring equivalences +resulted in numerous testsuite failures on different targets or worse +spec2017 performance. This patch implements more sophisticated cost +calculations of pseudos with equivalences. The patch does not change +RA behaviour for targets still using the old reload pass instead of +LRA. The patch solves the reported problem and improves x86-64 +specint2017 a bit (specfp2017 performance stays the same). The patch +takes into account how the equivalence will be used: will it be +integrated into the user insns or require an input reload insn. It +requires additional pass over insns. To compensate RA slow down, the +patch removes a pass over insns in the reload pass used by IRA before. +This also decouples IRA from reload more and will help to remove the +reload pass in the future if it ever happens. + +gcc/ChangeLog: + + * dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when + LRA is used. + * ira-costs.cc: Include regset.h. + (equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains): + New functions. + (find_costs_and_classes): Call calculate_equiv_gains and redefine + mem_cost of pseudos with equivs when LRA is used. + * var-tracking.cc: Include ira.h and lra.h. + (vt_initialize): Use lra_eliminate_regs when LRA is used. +--- + gcc/dwarf2out.cc | 4 +- + gcc/ira-costs.cc | 169 ++++++++++++++++++++++++++++++++++++++++++-- + gcc/var-tracking.cc | 14 +++- + 3 files changed, 179 insertions(+), 8 deletions(-) + +diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc +index 0a5c081d8..f0f6f4fd4 100644 +--- a/gcc/dwarf2out.cc ++++ b/gcc/dwarf2out.cc +@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized) + argument pointer and soft frame pointer rtx's. + Use DW_OP_fbreg offset DW_OP_stack_value in this case. */ + if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx) +- && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl) ++ && (ira_use_lra_p ++ ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX) ++ : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl) + { + dw_loc_descr_ref result = NULL; + +diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc +index 642fda529..c79311783 100644 +--- a/gcc/ira-costs.cc ++++ b/gcc/ira-costs.cc +@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see + #include "tm_p.h" + #include "insn-config.h" + #include "regs.h" ++#include "regset.h" + #include "ira.h" + #include "ira-int.h" + #include "addresses.h" +@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node) + process_bb_for_costs (bb); + } + ++/* Check that reg REGNO can be changed by TO in INSN. Return true in case the ++ result insn would be valid one. */ ++static bool ++equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn) ++{ ++ validate_replace_src_group (regno_reg_rtx[regno], to, insn); ++ bool res = verify_changes (0); ++ cancel_changes (0); ++ return res; ++} ++ ++/* Return true if X contains a pseudo with equivalence. In this case also ++ return the pseudo through parameter REG. If the pseudo is a part of subreg, ++ return the subreg through parameter SUBREG. */ ++ ++static bool ++get_equiv_regno (rtx x, int ®no, rtx &subreg) ++{ ++ subreg = NULL_RTX; ++ if (GET_CODE (x) == SUBREG) ++ { ++ subreg = x; ++ x = SUBREG_REG (x); ++ } ++ if (REG_P (x) ++ && (ira_reg_equiv[REGNO (x)].memory != NULL ++ || ira_reg_equiv[REGNO (x)].constant != NULL)) ++ { ++ regno = REGNO (x); ++ return true; ++ } ++ RTX_CODE code = GET_CODE (x); ++ const char *fmt = GET_RTX_FORMAT (code); ++ ++ for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) ++ if (fmt[i] == 'e') ++ { ++ if (get_equiv_regno (XEXP (x, i), regno, subreg)) ++ return true; ++ } ++ else if (fmt[i] == 'E') ++ { ++ for (int j = 0; j < XVECLEN (x, i); j++) ++ if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg)) ++ return true; ++ } ++ return false; ++} ++ ++/* A pass through the current function insns. Calculate costs of using ++ equivalences for pseudos and store them in regno_equiv_gains. */ ++ ++static void ++calculate_equiv_gains (void) ++{ ++ basic_block bb; ++ int regno, freq, cost; ++ rtx subreg; ++ rtx_insn *insn; ++ machine_mode mode; ++ enum reg_class rclass; ++ bitmap_head equiv_pseudos; ++ ++ ira_assert (allocno_p); ++ bitmap_initialize (&equiv_pseudos, ®_obstack); ++ for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--) ++ if (ira_reg_equiv[regno].init_insns != NULL ++ && (ira_reg_equiv[regno].memory != NULL ++ || (ira_reg_equiv[regno].constant != NULL ++ /* Ignore complicated constants which probably will be placed ++ in memory: */ ++ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE ++ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR ++ && GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF))) ++ { ++ rtx_insn_list *x; ++ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ()) ++ { ++ insn = x->insn (); ++ rtx set = single_set (insn); ++ ++ if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno]) ++ break; ++ bb = BLOCK_FOR_INSN (insn); ++ ira_curr_regno_allocno_map ++ = ira_bb_nodes[bb->index].parent->regno_allocno_map; ++ mode = PSEUDO_REGNO_MODE (regno); ++ rclass = pref[COST_INDEX (regno)]; ++ ira_init_register_move_cost_if_necessary (mode); ++ if (ira_reg_equiv[regno].memory != NULL) ++ cost = ira_memory_move_cost[mode][rclass][1]; ++ else ++ cost = ira_register_move_cost[mode][rclass][rclass]; ++ freq = REG_FREQ_FROM_BB (bb); ++ regno_equiv_gains[regno] += cost * freq; ++ } ++ if (x != NULL) ++ /* We found complicated equiv or reverse equiv mem=reg. Ignore ++ them. */ ++ regno_equiv_gains[regno] = 0; ++ else ++ bitmap_set_bit (&equiv_pseudos, regno); ++ } ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ freq = REG_FREQ_FROM_BB (bb); ++ ira_curr_regno_allocno_map ++ = ira_bb_nodes[bb->index].parent->regno_allocno_map; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg) ++ || !bitmap_bit_p (&equiv_pseudos, regno)) ++ continue; ++ rtx subst = ira_reg_equiv[regno].memory; ++ ++ if (subst == NULL) ++ subst = ira_reg_equiv[regno].constant; ++ ira_assert (subst != NULL); ++ mode = PSEUDO_REGNO_MODE (regno); ++ ira_init_register_move_cost_if_necessary (mode); ++ bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn); ++ ++ rclass = pref[COST_INDEX (regno)]; ++ if (MEM_P (subst) ++ /* If it is a change of constant into double for example, the ++ result constant probably will be placed in memory. */ ++ || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg)))) ++ cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1); ++ else if (consumed_p) ++ continue; ++ else ++ cost = ira_register_move_cost[mode][rclass][rclass]; ++ regno_equiv_gains[regno] -= cost * freq; ++ } ++ } ++ bitmap_clear (&equiv_pseudos); ++} ++ + /* Find costs of register classes and memory for allocnos or pseudos + and their best costs. Set up preferred, alternative and allocno + classes for pseudos. */ +@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file) + if (pass == 0) + pref = pref_buffer; + ++ if (ira_use_lra_p && allocno_p && pass == 1) ++ /* It is a pass through all insns. So do it once and only for RA (not ++ for insn scheduler) when we already found preferable pseudo register ++ classes on the previous pass. */ ++ calculate_equiv_gains (); ++ + /* Now for each allocno look at how desirable each class is and + find which class is preferred. */ + for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) +@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file) + } + if (i >= first_moveable_pseudo && i < last_moveable_pseudo) + i_mem_cost = 0; ++ else if (ira_use_lra_p) ++ { ++ if (equiv_savings > 0) ++ { ++ i_mem_cost = 0; ++ if (ira_dump_file != NULL && internal_flag_ira_verbose > 5) ++ fprintf (ira_dump_file, ++ " Use MEM for r%d as the equiv savings is %d\n", ++ i, equiv_savings); ++ } ++ } + else if (equiv_savings < 0) + i_mem_cost = -equiv_savings; + else if (equiv_savings > 0) +@@ -2378,7 +2535,10 @@ ira_costs (void) + total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size + * ira_allocnos_num); + initiate_regno_cost_classes (); +- calculate_elim_costs_all_insns (); ++ if (!ira_use_lra_p) ++ /* Process equivs in reload to update costs through hook ++ ira_adjust_equiv_reg_cost. */ ++ calculate_elim_costs_all_insns (); + find_costs_and_classes (ira_dump_file); + setup_allocno_class_and_costs (); + finish_regno_cost_classes (); +@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void) + } + } + +-/* Add COST to the estimated gain for eliminating REGNO with its +- equivalence. If COST is zero, record that no such elimination is +- possible. */ ++/* A hook from the reload pass. Add COST to the estimated gain for eliminating ++ REGNO with its equivalence. If COST is zero, record that no such ++ elimination is possible. */ + + void + ira_adjust_equiv_reg_cost (unsigned regno, int cost) + { ++ ira_assert (!ira_use_lra_p); + if (cost == 0) + regno_equiv_gains[regno] = 0; + else +diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc +index 7c3ad0a55..b10c8c1eb 100644 +--- a/gcc/var-tracking.cc ++++ b/gcc/var-tracking.cc +@@ -107,6 +107,8 @@ + #include "cfgrtl.h" + #include "cfganal.h" + #include "reload.h" ++#include "ira.h" ++#include "lra.h" + #include "calls.h" + #include "tree-dfa.h" + #include "tree-ssa.h" +@@ -10133,7 +10135,9 @@ vt_initialize (void) + #else + reg = arg_pointer_rtx; + #endif +- elim = eliminate_regs (reg, VOIDmode, NULL_RTX); ++ elim = (ira_use_lra_p ++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX) ++ : eliminate_regs (reg, VOIDmode, NULL_RTX)); + if (elim != reg) + { + if (GET_CODE (elim) == PLUS) +@@ -10153,7 +10157,9 @@ vt_initialize (void) + reg = arg_pointer_rtx; + fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl); + #endif +- elim = eliminate_regs (reg, VOIDmode, NULL_RTX); ++ elim = (ira_use_lra_p ++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX) ++ : eliminate_regs (reg, VOIDmode, NULL_RTX)); + if (elim != reg) + { + if (GET_CODE (elim) == PLUS) +@@ -10185,7 +10191,9 @@ vt_initialize (void) + #else + reg = arg_pointer_rtx; + #endif +- elim = eliminate_regs (reg, VOIDmode, NULL_RTX); ++ elim = (ira_use_lra_p ++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX) ++ : eliminate_regs (reg, VOIDmode, NULL_RTX)); + if (elim != reg) + { + if (GET_CODE (elim) == PLUS) +-- +2.28.0.windows.1 + |