summaryrefslogtreecommitdiff
path: root/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
committerCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
commit73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree8e28b63e478c43c252f18b49836dff7313affe54 /0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
parent49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch')
-rw-r--r--0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch560
1 files changed, 560 insertions, 0 deletions
diff --git a/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch b/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
new file mode 100644
index 0000000..813eba9
--- /dev/null
+++ b/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
@@ -0,0 +1,560 @@
+From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001
+From: vchernon <chernonog.vyacheslav@huawei.com>
+Date: Wed, 28 Feb 2024 23:05:12 +0800
+Subject: [PATCH 02/18] [rtl-ifcvt] introduce rtl ifcvt enchancements new
+ option: -fifcvt-allow-complicated-cmps: allows ifcvt to deal
+ with complicated cmps like
+
+ cmp reg1 (reg2 + reg3)
+
+ can increase compilation time
+ new param:
+ -param=ifcvt-allow-register-renaming=[0,1,2]
+ 1 : allows ifcvt to rename registers in then and else bb
+ 2 : allows to rename registers in condition and else/then bb
+ can increase compilation time and register pressure
+---
+ gcc/common.opt | 4 +
+ gcc/ifcvt.cc | 291 +++++++++++++++---
+ gcc/params.opt | 4 +
+ .../gcc.c-torture/execute/ifcvt-renaming-1.c | 35 +++
+ gcc/testsuite/gcc.dg/ifcvt-6.c | 27 ++
+ 5 files changed, 311 insertions(+), 50 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index c7c6bc256..aa00fb7b0 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3691,4 +3691,8 @@ fipa-ra
+ Common Var(flag_ipa_ra) Optimization
+ Use caller save register across calls if possible.
+
++fifcvt-allow-complicated-cmps
++Common Var(flag_ifcvt_allow_complicated_cmps) Optimization
++Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
++
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
+index 2c1eba312..584db7b55 100644
+--- a/gcc/ifcvt.cc
++++ b/gcc/ifcvt.cc
+@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
+ }
+
+ /* Don't even try if the comparison operands or the mode of X are weird. */
+- if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
++ if (!flag_ifcvt_allow_complicated_cmps
++ && (cond_complex
++ || !SCALAR_INT_MODE_P (GET_MODE (x))))
+ return NULL_RTX;
+
+ return emit_store_flag (x, code, XEXP (cond, 0),
+@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
+ /* Currently support only simple single sets in test_bb. */
+ if (!sset
+ || !noce_operand_ok (SET_DEST (sset))
+- || contains_ccmode_rtx_p (SET_DEST (sset))
++ || (!flag_ifcvt_allow_complicated_cmps
++ && contains_ccmode_rtx_p (SET_DEST (sset)))
+ || !noce_operand_ok (SET_SRC (sset)))
+ return false;
+
+@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
+ in this function. */
+
+ static bool
+-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
++bbs_ok_for_cmove_arith (basic_block bb_a,
++ basic_block bb_b,
++ rtx to_rename,
++ bitmap conflict_regs)
+ {
+ rtx_insn *a_insn;
+ bitmap bba_sets = BITMAP_ALLOC (&reg_obstack);
+-
++ bitmap intersections = BITMAP_ALLOC (&reg_obstack);
+ df_ref def;
+ df_ref use;
++ rtx_insn *last_a = last_active_insn (bb_a, FALSE);
+
+ FOR_BB_INSNS (bb_a, a_insn)
+ {
+@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+ rtx sset_a = single_set (a_insn);
+
+ if (!sset_a)
+- {
+- BITMAP_FREE (bba_sets);
+- return false;
+- }
++ goto end_cmove_arith_check_and_fail;
+ /* Record all registers that BB_A sets. */
+ FOR_EACH_INSN_DEF (def, a_insn)
+- if (!(to_rename && DF_REF_REG (def) == to_rename))
++ if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
+ bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
+ }
+
++ bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
+ rtx_insn *b_insn;
+-
+ FOR_BB_INSNS (bb_b, b_insn)
+ {
+ if (!active_insn_p (b_insn))
+@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+ rtx sset_b = single_set (b_insn);
+
+ if (!sset_b)
+- {
+- BITMAP_FREE (bba_sets);
+- return false;
+- }
++ goto end_cmove_arith_check_and_fail;
+
+ /* Make sure this is a REG and not some instance
+ of ZERO_EXTRACT or SUBREG or other dangerous stuff.
+@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+ if (MEM_P (SET_DEST (sset_b)))
+ gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
+ else if (!REG_P (SET_DEST (sset_b)))
+- {
+- BITMAP_FREE (bba_sets);
+- return false;
+- }
++ goto end_cmove_arith_check_and_fail;
+
+- /* If the insn uses a reg set in BB_A return false. */
++ /* If the insn uses a reg set in BB_A return false
++ or try to collect register list for renaming. */
+ FOR_EACH_INSN_USE (use, b_insn)
+ {
+- if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
++ if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
+ {
+- BITMAP_FREE (bba_sets);
+- return false;
++ if (param_ifcvt_allow_register_renaming < 1)
++ goto end_cmove_arith_check_and_fail;
++
++ /* Those regs should be renamed. We can't rename CC reg, but
++ possibly we can provide combined comparison in the future. */
++ if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
++ goto end_cmove_arith_check_and_fail;
++ bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
+ }
+ }
+-
+ }
+
+ BITMAP_FREE (bba_sets);
++ BITMAP_FREE (intersections);
+ return true;
++
++end_cmove_arith_check_and_fail:
++ BITMAP_FREE (bba_sets);
++ BITMAP_FREE (intersections);
++ return false;
+ }
+
+ /* Emit copies of all the active instructions in BB except the last.
+@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
+ return true;
+ }
+
++/* This function tries to rename regs that intersect with considered bb
++ inside condition expression. Condition expression will be moved down
++ if the optimization will be applied, so it is essential to be sure that
++ all intersected registers will be renamed otherwise transformation
++ can't be applied. Function returns true if renaming was successful
++ and optimization can proceed futher. */
++
++static bool
++noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
++{
++ bool success = true;
++ if (bitmap_empty_p (cond_rename_regs))
++ return true;
++ if (param_ifcvt_allow_register_renaming < 2)
++ return false;
++ df_ref use;
++ rtx_insn *cmp_insn = if_info->cond_earliest;
++ /* Jump instruction as a condion currently unsupported. */
++ if (JUMP_P (cmp_insn))
++ return false;
++ rtx_insn *before_cmp = PREV_INSN (cmp_insn);
++ start_sequence ();
++ rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
++ basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
++ FOR_EACH_INSN_USE (use, cmp_insn)
++ {
++ if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
++ {
++ rtx use_reg = DF_REF_REG (use);
++ rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
++ if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
++ {
++ end_sequence ();
++ return false;
++ }
++ noce_emit_move_insn (tmp, use_reg);
++ }
++ }
++
++ emit_insn (PATTERN (copy_of_cmp));
++ rtx_insn *seq = get_insns ();
++ unshare_all_rtl_in_chain (seq);
++ end_sequence ();
++
++ emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn));
++ delete_insn_and_edges (cmp_insn);
++ rtx_insn *insn;
++ FOR_BB_INSNS (cmp_block, insn)
++ df_insn_rescan (insn);
++
++ if_info->cond = noce_get_condition (if_info->jump,
++ &copy_of_cmp,
++ if_info->then_else_reversed);
++ if_info->cond_earliest = copy_of_cmp;
++ if_info->rev_cond = NULL_RTX;
++
++ return success;
++}
++
++/* This function tries to rename regs that intersect with considered bb.
++ return true if the renaming was successful and optimization can
++ proceed futher, false otherwise. */
++static bool
++noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
++{
++ if (bitmap_empty_p (rename_regs))
++ return true;
++ rtx_insn *insn;
++ rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
++ bool res = true;
++ start_sequence ();
++ FOR_BB_INSNS (test_bb, insn)
++ {
++ if (!active_insn_p (insn))
++ continue;
++ /* Only ssets are supported for now. */
++ rtx sset = single_set (insn);
++ gcc_assert (sset);
++ rtx x = SET_DEST (sset);
++ if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
++ continue;
++ /* Do not need to rename dest in the last instruction
++ it will be renamed anyway. */
++ if (insn == last_insn)
++ continue;
++ machine_mode mode = GET_MODE (x);
++ rtx tmp = gen_reg_rtx (mode);
++ if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
++ {
++ gcc_assert (insn != last_insn);
++ /* We can generate additional move for such case,
++ but it will increase register preasure.
++ For now just stop transformation. */
++ rtx result_rtx = SET_DEST (single_set (last_insn));
++ if (REG_P (result_rtx) && (x != result_rtx))
++ {
++ res = false;
++ break;
++ }
++ if (!validate_replace_rtx (x, tmp, insn))
++ gcc_unreachable ();
++ noce_emit_move_insn (tmp,x);
++ }
++ set_used_flags (insn);
++ rtx_insn *rename_candidate;
++ for (rename_candidate = NEXT_INSN (insn);
++ rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb));
++ rename_candidate = NEXT_INSN (rename_candidate))
++ {
++ if (!reg_overlap_mentioned_p (x, rename_candidate))
++ continue;
++
++ int replace_res = TRUE;
++ if (rename_candidate == last_insn)
++ {
++ validate_replace_src_group (x, tmp, rename_candidate);
++ replace_res = apply_change_group ();
++ }
++ else
++ replace_res = validate_replace_rtx (x, tmp, rename_candidate);
++ gcc_assert (replace_res);
++ set_used_flags (rename_candidate);
++ }
++ set_used_flags (x);
++ set_used_flags (tmp);
++ }
++ rtx_insn *seq = get_insns ();
++ unshare_all_rtl_in_chain (seq);
++ end_sequence ();
++ emit_insn_before_setloc (seq, first_active_insn (test_bb),
++ INSN_LOCATION (first_active_insn (test_bb)));
++ FOR_BB_INSNS (test_bb, insn)
++ df_insn_rescan (insn);
++ return res;
++}
++
+ /* Try more complex cases involving conditional_move. */
+
+ static int
+@@ -2185,11 +2331,30 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
+ std::swap (then_bb, else_bb);
+ }
+ }
+-
++ bitmap else_bb_rename_regs = BITMAP_ALLOC (&reg_obstack);
++ bitmap then_bb_rename_regs = BITMAP_ALLOC (&reg_obstack);
+ if (then_bb && else_bb
+- && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x)
+- || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x)))
+- return FALSE;
++ && (!bbs_ok_for_cmove_arith (then_bb, else_bb,
++ if_info->orig_x,
++ then_bb_rename_regs)
++ || !bbs_ok_for_cmove_arith (else_bb, then_bb,
++ if_info->orig_x,
++ else_bb_rename_regs)))
++ {
++ BITMAP_FREE (then_bb_rename_regs);
++ BITMAP_FREE (else_bb_rename_regs);
++ return FALSE;
++ }
++ bool prepass_renaming = noce_rename_regs_in_bb (then_bb,
++ then_bb_rename_regs)
++ && noce_rename_regs_in_bb (else_bb,
++ else_bb_rename_regs);
++
++ BITMAP_FREE (then_bb_rename_regs);
++ BITMAP_FREE (else_bb_rename_regs);
++
++ if (!prepass_renaming)
++ return FALSE;
+
+ start_sequence ();
+
+@@ -3072,7 +3237,8 @@ noce_operand_ok (const_rtx op)
+
+ static bool
+ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
+- unsigned int *cost, bool *simple_p)
++ unsigned int *cost, bool *simple_p,
++ bitmap cond_rename_regs)
+ {
+ if (!test_bb)
+ return false;
+@@ -3112,8 +3278,9 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
+ rtx_insn *prev_last_insn = PREV_INSN (last_insn);
+ gcc_assert (prev_last_insn);
+
+- /* For now, disallow setting x multiple times in test_bb. */
+- if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn))
++ if (REG_P (x)
++ && reg_set_between_p (x, first_insn, prev_last_insn)
++ && param_ifcvt_allow_register_renaming < 1)
+ return false;
+
+ bitmap test_bb_temps = BITMAP_ALLOC (&reg_obstack);
+@@ -3125,25 +3292,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
+ rtx_insn *insn;
+ FOR_BB_INSNS (test_bb, insn)
+ {
+- if (insn != last_insn)
+- {
+- if (!active_insn_p (insn))
+- continue;
++ if (insn == last_insn)
++ continue;
++ if (!active_insn_p (insn))
++ continue;
+
+- if (!insn_valid_noce_process_p (insn, cc))
+- goto free_bitmap_and_fail;
++ if (!insn_valid_noce_process_p (insn, cc))
++ goto free_bitmap_and_fail;
+
+- rtx sset = single_set (insn);
+- gcc_assert (sset);
++ rtx sset = single_set (insn);
++ gcc_assert (sset);
+
+- if (contains_mem_rtx_p (SET_SRC (sset))
+- || !REG_P (SET_DEST (sset))
+- || reg_overlap_mentioned_p (SET_DEST (sset), cond))
+- goto free_bitmap_and_fail;
++ if (contains_mem_rtx_p (SET_SRC (sset))
++ || !REG_P (SET_DEST (sset)))
++ goto free_bitmap_and_fail;
+
+- potential_cost += pattern_cost (sset, speed_p);
+- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
++ if (reg_overlap_mentioned_p (SET_DEST (sset), cond))
++ {
++ if (param_ifcvt_allow_register_renaming < 1)
++ goto free_bitmap_and_fail;
++ rtx sset_dest = SET_DEST (sset);
++ if (REG_P (sset_dest)
++ && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC))
++ bitmap_set_bit (cond_rename_regs, REGNO (sset_dest));
++ else
++ goto free_bitmap_and_fail;
+ }
++ potential_cost += pattern_cost (sset, speed_p);
++ if (SET_DEST (sset) != SET_DEST (last_set))
++ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
+ }
+
+ /* If any of the intermediate results in test_bb are live after test_bb
+@@ -3777,15 +3954,29 @@ noce_process_if_block (struct noce_if_info *if_info)
+
+ bool speed_p = optimize_bb_for_speed_p (test_bb);
+ unsigned int then_cost = 0, else_cost = 0;
++ bitmap cond_rename_regs = BITMAP_ALLOC (&reg_obstack);
+ if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
+- &if_info->then_simple))
+- return false;
++ &if_info->then_simple, cond_rename_regs))
++ {
++ BITMAP_FREE (cond_rename_regs);
++ return false;
++ }
+
+ if (else_bb
+ && !bb_valid_for_noce_process_p (else_bb, cond, &else_cost,
+- &if_info->else_simple))
+- return false;
++ &if_info->else_simple, cond_rename_regs))
++ {
++ BITMAP_FREE (cond_rename_regs);
++ return false;
++ }
+
++ if (!noce_rename_regs_in_cond (if_info, cond_rename_regs))
++ {
++ BITMAP_FREE (cond_rename_regs);
++ return false;
++ }
++ BITMAP_FREE (cond_rename_regs);
++ cond = if_info->cond;
+ if (speed_p)
+ if_info->original_cost += average_cost (then_cost, else_cost,
+ find_edge (test_bb, then_bb));
+@@ -5823,12 +6014,13 @@ if_convert (bool after_combine)
+ {
+ basic_block bb;
+ int pass;
+-
+ if (optimize == 1)
+ {
+ df_live_add_problem ();
+ df_live_set_all_dirty ();
+ }
++ free_dominance_info (CDI_DOMINATORS);
++ cleanup_cfg (CLEANUP_EXPENSIVE);
+
+ /* Record whether we are after combine pass. */
+ ifcvt_after_combine = after_combine;
+@@ -5933,7 +6125,6 @@ rest_of_handle_if_conversion (void)
+ dump_reg_info (dump_file);
+ dump_flow_info (dump_file, dump_flags);
+ }
+- cleanup_cfg (CLEANUP_EXPENSIVE);
+ if_convert (false);
+ if (num_updated_if_blocks)
+ /* Get rid of any dead CC-related instructions. */
+diff --git a/gcc/params.opt b/gcc/params.opt
+index d2196dc68..ba87f820b 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -669,6 +669,10 @@ Maximum permissible cost for the sequence that would be generated by the RTL if-
+ Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization
+ Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable.
+
++-param=ifcvt-allow-register-renaming=
++Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization
++Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created.
++
+ -param=max-sched-extend-regions-iters=
+ Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization
+ The maximum number of iterations through CFG to extend regions.
+diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+new file mode 100644
+index 000000000..65c4d4140
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+@@ -0,0 +1,35 @@
++
++extern void abort(void);
++
++__attribute__ ((noinline))
++int foo (int x, int y, int z, int a, int b)
++{
++ if (a < 2) {
++ if (a == 0) {
++ if (x - y < 0)
++ x = x - y + z;
++ else
++ x = x - y;
++ }
++ else {
++ if (x + y >= z)
++ x = x + y - z;
++ else
++ x = x + y;
++ }
++ }
++ return x;
++}
++
++int main(void) {
++ if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2
++ abort ();
++ if (foo (50,10,7,0,1) != 40) // x - y = 40
++ abort ();
++ if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8
++ abort ();
++ if (foo (5,10,70,1,1) != 15) // x + y = 15
++ abort ();
++ return 0;
++}
++
+diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c
+new file mode 100644
+index 000000000..be9a67b3f
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ifcvt-6.c
+@@ -0,0 +1,27 @@
++/* { dg-do compile { target { aarch64*-*-* } } } */
++/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 -fifcvt-allow-complicated-cmps" } */
++
++typedef unsigned int uint16_t;
++
++uint16_t
++foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a,
++ uint16_t b, uint16_t c, uint16_t d) {
++ int i = 1;
++ int j = 1;
++ if (a > b) {
++ j = x;
++ if (b > c)
++ i = y;
++ else
++ i = z;
++ }
++ else {
++ j = y;
++ if (c > d)
++ i = z;
++ }
++ return i * j;
++}
++
++/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */
++
+--
+2.33.0
+