diff options
author | CoprDistGit <infra@openeuler.org> | 2025-02-28 10:03:49 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2025-02-28 10:03:49 +0000 |
commit | 73127104a245052cd5cf29cdaaca3e5c32c70348 (patch) | |
tree | 8e28b63e478c43c252f18b49836dff7313affe54 /0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch | |
parent | 49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff) |
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch')
-rw-r--r-- | 0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch | 560 |
1 files changed, 560 insertions, 0 deletions
diff --git a/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch b/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch new file mode 100644 index 0000000..813eba9 --- /dev/null +++ b/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch @@ -0,0 +1,560 @@ +From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001 +From: vchernon <chernonog.vyacheslav@huawei.com> +Date: Wed, 28 Feb 2024 23:05:12 +0800 +Subject: [PATCH 02/18] [rtl-ifcvt] introduce rtl ifcvt enchancements new + option: -fifcvt-allow-complicated-cmps: allows ifcvt to deal + with complicated cmps like + + cmp reg1 (reg2 + reg3) + + can increase compilation time + new param: + -param=ifcvt-allow-register-renaming=[0,1,2] + 1 : allows ifcvt to rename registers in then and else bb + 2 : allows to rename registers in condition and else/then bb + can increase compilation time and register pressure +--- + gcc/common.opt | 4 + + gcc/ifcvt.cc | 291 +++++++++++++++--- + gcc/params.opt | 4 + + .../gcc.c-torture/execute/ifcvt-renaming-1.c | 35 +++ + gcc/testsuite/gcc.dg/ifcvt-6.c | 27 ++ + 5 files changed, 311 insertions(+), 50 deletions(-) + create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c + create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index c7c6bc256..aa00fb7b0 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3691,4 +3691,8 @@ fipa-ra + Common Var(flag_ipa_ra) Optimization + Use caller save register across calls if possible. + ++fifcvt-allow-complicated-cmps ++Common Var(flag_ifcvt_allow_complicated_cmps) Optimization ++Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time). ++ + ; This comment is to ensure we retain the blank line above. +diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc +index 2c1eba312..584db7b55 100644 +--- a/gcc/ifcvt.cc ++++ b/gcc/ifcvt.cc +@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep, + } + + /* Don't even try if the comparison operands or the mode of X are weird. */ +- if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x))) ++ if (!flag_ifcvt_allow_complicated_cmps ++ && (cond_complex ++ || !SCALAR_INT_MODE_P (GET_MODE (x)))) + return NULL_RTX; + + return emit_store_flag (x, code, XEXP (cond, 0), +@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) + /* Currently support only simple single sets in test_bb. */ + if (!sset + || !noce_operand_ok (SET_DEST (sset)) +- || contains_ccmode_rtx_p (SET_DEST (sset)) ++ || (!flag_ifcvt_allow_complicated_cmps ++ && contains_ccmode_rtx_p (SET_DEST (sset))) + || !noce_operand_ok (SET_SRC (sset))) + return false; + +@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) + in this function. */ + + static bool +-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) ++bbs_ok_for_cmove_arith (basic_block bb_a, ++ basic_block bb_b, ++ rtx to_rename, ++ bitmap conflict_regs) + { + rtx_insn *a_insn; + bitmap bba_sets = BITMAP_ALLOC (®_obstack); +- ++ bitmap intersections = BITMAP_ALLOC (®_obstack); + df_ref def; + df_ref use; ++ rtx_insn *last_a = last_active_insn (bb_a, FALSE); + + FOR_BB_INSNS (bb_a, a_insn) + { +@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) + rtx sset_a = single_set (a_insn); + + if (!sset_a) +- { +- BITMAP_FREE (bba_sets); +- return false; +- } ++ goto end_cmove_arith_check_and_fail; + /* Record all registers that BB_A sets. */ + FOR_EACH_INSN_DEF (def, a_insn) +- if (!(to_rename && DF_REF_REG (def) == to_rename)) ++ if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a)) + bitmap_set_bit (bba_sets, DF_REF_REGNO (def)); + } + ++ bitmap_and (intersections, df_get_live_in (bb_b), bba_sets); + rtx_insn *b_insn; +- + FOR_BB_INSNS (bb_b, b_insn) + { + if (!active_insn_p (b_insn)) +@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) + rtx sset_b = single_set (b_insn); + + if (!sset_b) +- { +- BITMAP_FREE (bba_sets); +- return false; +- } ++ goto end_cmove_arith_check_and_fail; + + /* Make sure this is a REG and not some instance + of ZERO_EXTRACT or SUBREG or other dangerous stuff. +@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) + if (MEM_P (SET_DEST (sset_b))) + gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename)); + else if (!REG_P (SET_DEST (sset_b))) +- { +- BITMAP_FREE (bba_sets); +- return false; +- } ++ goto end_cmove_arith_check_and_fail; + +- /* If the insn uses a reg set in BB_A return false. */ ++ /* If the insn uses a reg set in BB_A return false ++ or try to collect register list for renaming. */ + FOR_EACH_INSN_USE (use, b_insn) + { +- if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use))) ++ if (bitmap_bit_p (intersections, DF_REF_REGNO (use))) + { +- BITMAP_FREE (bba_sets); +- return false; ++ if (param_ifcvt_allow_register_renaming < 1) ++ goto end_cmove_arith_check_and_fail; ++ ++ /* Those regs should be renamed. We can't rename CC reg, but ++ possibly we can provide combined comparison in the future. */ ++ if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC) ++ goto end_cmove_arith_check_and_fail; ++ bitmap_set_bit (conflict_regs, DF_REF_REGNO (use)); + } + } +- + } + + BITMAP_FREE (bba_sets); ++ BITMAP_FREE (intersections); + return true; ++ ++end_cmove_arith_check_and_fail: ++ BITMAP_FREE (bba_sets); ++ BITMAP_FREE (intersections); ++ return false; + } + + /* Emit copies of all the active instructions in BB except the last. +@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple) + return true; + } + ++/* This function tries to rename regs that intersect with considered bb ++ inside condition expression. Condition expression will be moved down ++ if the optimization will be applied, so it is essential to be sure that ++ all intersected registers will be renamed otherwise transformation ++ can't be applied. Function returns true if renaming was successful ++ and optimization can proceed futher. */ ++ ++static bool ++noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) ++{ ++ bool success = true; ++ if (bitmap_empty_p (cond_rename_regs)) ++ return true; ++ if (param_ifcvt_allow_register_renaming < 2) ++ return false; ++ df_ref use; ++ rtx_insn *cmp_insn = if_info->cond_earliest; ++ /* Jump instruction as a condion currently unsupported. */ ++ if (JUMP_P (cmp_insn)) ++ return false; ++ rtx_insn *before_cmp = PREV_INSN (cmp_insn); ++ start_sequence (); ++ rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn)); ++ basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn); ++ FOR_EACH_INSN_USE (use, cmp_insn) ++ { ++ if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use))) ++ { ++ rtx use_reg = DF_REF_REG (use); ++ rtx tmp = gen_reg_rtx (GET_MODE (use_reg)); ++ if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp)) ++ { ++ end_sequence (); ++ return false; ++ } ++ noce_emit_move_insn (tmp, use_reg); ++ } ++ } ++ ++ emit_insn (PATTERN (copy_of_cmp)); ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ ++ emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn)); ++ delete_insn_and_edges (cmp_insn); ++ rtx_insn *insn; ++ FOR_BB_INSNS (cmp_block, insn) ++ df_insn_rescan (insn); ++ ++ if_info->cond = noce_get_condition (if_info->jump, ++ ©_of_cmp, ++ if_info->then_else_reversed); ++ if_info->cond_earliest = copy_of_cmp; ++ if_info->rev_cond = NULL_RTX; ++ ++ return success; ++} ++ ++/* This function tries to rename regs that intersect with considered bb. ++ return true if the renaming was successful and optimization can ++ proceed futher, false otherwise. */ ++static bool ++noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) ++{ ++ if (bitmap_empty_p (rename_regs)) ++ return true; ++ rtx_insn *insn; ++ rtx_insn *last_insn = last_active_insn (test_bb, FALSE); ++ bool res = true; ++ start_sequence (); ++ FOR_BB_INSNS (test_bb, insn) ++ { ++ if (!active_insn_p (insn)) ++ continue; ++ /* Only ssets are supported for now. */ ++ rtx sset = single_set (insn); ++ gcc_assert (sset); ++ rtx x = SET_DEST (sset); ++ if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x))) ++ continue; ++ /* Do not need to rename dest in the last instruction ++ it will be renamed anyway. */ ++ if (insn == last_insn) ++ continue; ++ machine_mode mode = GET_MODE (x); ++ rtx tmp = gen_reg_rtx (mode); ++ if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn)) ++ { ++ gcc_assert (insn != last_insn); ++ /* We can generate additional move for such case, ++ but it will increase register preasure. ++ For now just stop transformation. */ ++ rtx result_rtx = SET_DEST (single_set (last_insn)); ++ if (REG_P (result_rtx) && (x != result_rtx)) ++ { ++ res = false; ++ break; ++ } ++ if (!validate_replace_rtx (x, tmp, insn)) ++ gcc_unreachable (); ++ noce_emit_move_insn (tmp,x); ++ } ++ set_used_flags (insn); ++ rtx_insn *rename_candidate; ++ for (rename_candidate = NEXT_INSN (insn); ++ rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb)); ++ rename_candidate = NEXT_INSN (rename_candidate)) ++ { ++ if (!reg_overlap_mentioned_p (x, rename_candidate)) ++ continue; ++ ++ int replace_res = TRUE; ++ if (rename_candidate == last_insn) ++ { ++ validate_replace_src_group (x, tmp, rename_candidate); ++ replace_res = apply_change_group (); ++ } ++ else ++ replace_res = validate_replace_rtx (x, tmp, rename_candidate); ++ gcc_assert (replace_res); ++ set_used_flags (rename_candidate); ++ } ++ set_used_flags (x); ++ set_used_flags (tmp); ++ } ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ emit_insn_before_setloc (seq, first_active_insn (test_bb), ++ INSN_LOCATION (first_active_insn (test_bb))); ++ FOR_BB_INSNS (test_bb, insn) ++ df_insn_rescan (insn); ++ return res; ++} ++ + /* Try more complex cases involving conditional_move. */ + + static int +@@ -2185,11 +2331,30 @@ noce_try_cmove_arith (struct noce_if_info *if_info) + std::swap (then_bb, else_bb); + } + } +- ++ bitmap else_bb_rename_regs = BITMAP_ALLOC (®_obstack); ++ bitmap then_bb_rename_regs = BITMAP_ALLOC (®_obstack); + if (then_bb && else_bb +- && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) +- || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) +- return FALSE; ++ && (!bbs_ok_for_cmove_arith (then_bb, else_bb, ++ if_info->orig_x, ++ then_bb_rename_regs) ++ || !bbs_ok_for_cmove_arith (else_bb, then_bb, ++ if_info->orig_x, ++ else_bb_rename_regs))) ++ { ++ BITMAP_FREE (then_bb_rename_regs); ++ BITMAP_FREE (else_bb_rename_regs); ++ return FALSE; ++ } ++ bool prepass_renaming = noce_rename_regs_in_bb (then_bb, ++ then_bb_rename_regs) ++ && noce_rename_regs_in_bb (else_bb, ++ else_bb_rename_regs); ++ ++ BITMAP_FREE (then_bb_rename_regs); ++ BITMAP_FREE (else_bb_rename_regs); ++ ++ if (!prepass_renaming) ++ return FALSE; + + start_sequence (); + +@@ -3072,7 +3237,8 @@ noce_operand_ok (const_rtx op) + + static bool + bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, +- unsigned int *cost, bool *simple_p) ++ unsigned int *cost, bool *simple_p, ++ bitmap cond_rename_regs) + { + if (!test_bb) + return false; +@@ -3112,8 +3278,9 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, + rtx_insn *prev_last_insn = PREV_INSN (last_insn); + gcc_assert (prev_last_insn); + +- /* For now, disallow setting x multiple times in test_bb. */ +- if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn)) ++ if (REG_P (x) ++ && reg_set_between_p (x, first_insn, prev_last_insn) ++ && param_ifcvt_allow_register_renaming < 1) + return false; + + bitmap test_bb_temps = BITMAP_ALLOC (®_obstack); +@@ -3125,25 +3292,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, + rtx_insn *insn; + FOR_BB_INSNS (test_bb, insn) + { +- if (insn != last_insn) +- { +- if (!active_insn_p (insn)) +- continue; ++ if (insn == last_insn) ++ continue; ++ if (!active_insn_p (insn)) ++ continue; + +- if (!insn_valid_noce_process_p (insn, cc)) +- goto free_bitmap_and_fail; ++ if (!insn_valid_noce_process_p (insn, cc)) ++ goto free_bitmap_and_fail; + +- rtx sset = single_set (insn); +- gcc_assert (sset); ++ rtx sset = single_set (insn); ++ gcc_assert (sset); + +- if (contains_mem_rtx_p (SET_SRC (sset)) +- || !REG_P (SET_DEST (sset)) +- || reg_overlap_mentioned_p (SET_DEST (sset), cond)) +- goto free_bitmap_and_fail; ++ if (contains_mem_rtx_p (SET_SRC (sset)) ++ || !REG_P (SET_DEST (sset))) ++ goto free_bitmap_and_fail; + +- potential_cost += pattern_cost (sset, speed_p); +- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); ++ if (reg_overlap_mentioned_p (SET_DEST (sset), cond)) ++ { ++ if (param_ifcvt_allow_register_renaming < 1) ++ goto free_bitmap_and_fail; ++ rtx sset_dest = SET_DEST (sset); ++ if (REG_P (sset_dest) ++ && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC)) ++ bitmap_set_bit (cond_rename_regs, REGNO (sset_dest)); ++ else ++ goto free_bitmap_and_fail; + } ++ potential_cost += pattern_cost (sset, speed_p); ++ if (SET_DEST (sset) != SET_DEST (last_set)) ++ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); + } + + /* If any of the intermediate results in test_bb are live after test_bb +@@ -3777,15 +3954,29 @@ noce_process_if_block (struct noce_if_info *if_info) + + bool speed_p = optimize_bb_for_speed_p (test_bb); + unsigned int then_cost = 0, else_cost = 0; ++ bitmap cond_rename_regs = BITMAP_ALLOC (®_obstack); + if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost, +- &if_info->then_simple)) +- return false; ++ &if_info->then_simple, cond_rename_regs)) ++ { ++ BITMAP_FREE (cond_rename_regs); ++ return false; ++ } + + if (else_bb + && !bb_valid_for_noce_process_p (else_bb, cond, &else_cost, +- &if_info->else_simple)) +- return false; ++ &if_info->else_simple, cond_rename_regs)) ++ { ++ BITMAP_FREE (cond_rename_regs); ++ return false; ++ } + ++ if (!noce_rename_regs_in_cond (if_info, cond_rename_regs)) ++ { ++ BITMAP_FREE (cond_rename_regs); ++ return false; ++ } ++ BITMAP_FREE (cond_rename_regs); ++ cond = if_info->cond; + if (speed_p) + if_info->original_cost += average_cost (then_cost, else_cost, + find_edge (test_bb, then_bb)); +@@ -5823,12 +6014,13 @@ if_convert (bool after_combine) + { + basic_block bb; + int pass; +- + if (optimize == 1) + { + df_live_add_problem (); + df_live_set_all_dirty (); + } ++ free_dominance_info (CDI_DOMINATORS); ++ cleanup_cfg (CLEANUP_EXPENSIVE); + + /* Record whether we are after combine pass. */ + ifcvt_after_combine = after_combine; +@@ -5933,7 +6125,6 @@ rest_of_handle_if_conversion (void) + dump_reg_info (dump_file); + dump_flow_info (dump_file, dump_flags); + } +- cleanup_cfg (CLEANUP_EXPENSIVE); + if_convert (false); + if (num_updated_if_blocks) + /* Get rid of any dead CC-related instructions. */ +diff --git a/gcc/params.opt b/gcc/params.opt +index d2196dc68..ba87f820b 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -669,6 +669,10 @@ Maximum permissible cost for the sequence that would be generated by the RTL if- + Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization + Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable. + ++-param=ifcvt-allow-register-renaming= ++Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization ++Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created. ++ + -param=max-sched-extend-regions-iters= + Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization + The maximum number of iterations through CFG to extend regions. +diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c +new file mode 100644 +index 000000000..65c4d4140 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c +@@ -0,0 +1,35 @@ ++ ++extern void abort(void); ++ ++__attribute__ ((noinline)) ++int foo (int x, int y, int z, int a, int b) ++{ ++ if (a < 2) { ++ if (a == 0) { ++ if (x - y < 0) ++ x = x - y + z; ++ else ++ x = x - y; ++ } ++ else { ++ if (x + y >= z) ++ x = x + y - z; ++ else ++ x = x + y; ++ } ++ } ++ return x; ++} ++ ++int main(void) { ++ if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2 ++ abort (); ++ if (foo (50,10,7,0,1) != 40) // x - y = 40 ++ abort (); ++ if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8 ++ abort (); ++ if (foo (5,10,70,1,1) != 15) // x + y = 15 ++ abort (); ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c +new file mode 100644 +index 000000000..be9a67b3f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ifcvt-6.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile { target { aarch64*-*-* } } } */ ++/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 -fifcvt-allow-complicated-cmps" } */ ++ ++typedef unsigned int uint16_t; ++ ++uint16_t ++foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a, ++ uint16_t b, uint16_t c, uint16_t d) { ++ int i = 1; ++ int j = 1; ++ if (a > b) { ++ j = x; ++ if (b > c) ++ i = y; ++ else ++ i = z; ++ } ++ else { ++ j = y; ++ if (c > d) ++ i = z; ++ } ++ return i * j; ++} ++ ++/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */ ++ +-- +2.33.0 + |