diff options
Diffstat (limited to '0042-Add-split-complex-instructions-pass.patch')
-rw-r--r-- | 0042-Add-split-complex-instructions-pass.patch | 1245 |
1 files changed, 1245 insertions, 0 deletions
diff --git a/0042-Add-split-complex-instructions-pass.patch b/0042-Add-split-complex-instructions-pass.patch new file mode 100644 index 0000000..b73affd --- /dev/null +++ b/0042-Add-split-complex-instructions-pass.patch @@ -0,0 +1,1245 @@ +From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001 +From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com> +Date: Mon, 21 Aug 2023 12:35:19 +0300 +Subject: [PATCH 09/18] Add split-complex-instructions pass + + - Add option -fsplit-ldp-stp + - Add functionality to detect and split depended from store LDP instructions. + - Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range + - Add RTL tests + +Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com> +Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com> +Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com> +--- + gcc/common.opt | 5 + + gcc/config/aarch64/aarch64.cc | 42 ++ + gcc/doc/tm.texi | 8 + + gcc/doc/tm.texi.in | 4 + + gcc/params.opt | 3 + + gcc/passes.def | 1 + + gcc/sched-rgn.cc | 704 +++++++++++++++++- + gcc/target.def | 10 + + .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 74 ++ + .../rtl/aarch64/test-ldp-split-rearrange.c | 40 + + .../gcc.dg/rtl/aarch64/test-ldp-split.c | 174 +++++ + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + 13 files changed, 1066 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c + create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c + create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index a42bee250..c0e3f5687 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1797,6 +1797,11 @@ floop-nest-optimize + Common Var(flag_loop_nest_optimize) Optimization + Enable the loop nest optimizer. + ++fsplit-ldp-stp ++Common Var(flag_split_ldp_stp) Optimization ++Split load/store pair instructions into separate load/store operations ++for better performance. ++ + fstrict-volatile-bitfields + Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization + Force bitfield accesses to match their type width. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 04072ca25..48e2eded0 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void) + + #endif /* #if CHECKING_P */ + ++/* TODO: refuse to use ranges intead of full list of an instruction codes. */ ++ ++bool ++is_aarch64_ldp_insn (int icode) ++{ ++ if ((icode >= CODE_FOR_load_pair_sw_sisi ++ && icode <= CODE_FOR_load_pair_dw_tftf) ++ || (icode >= CODE_FOR_loadwb_pairsi_si ++ && icode <= CODE_FOR_loadwb_pairtf_di) ++ || (icode >= CODE_FOR_load_pairv8qiv8qi ++ && icode <= CODE_FOR_load_pairdfdf) ++ || (icode >= CODE_FOR_load_pairv16qiv16qi ++ && icode <= CODE_FOR_load_pairv8bfv2df) ++ || (icode >= CODE_FOR_load_pair_lanesv8qi ++ && icode <= CODE_FOR_load_pair_lanesdf)) ++ return true; ++ return false; ++} ++ ++bool ++is_aarch64_stp_insn (int icode) ++{ ++ if ((icode >= CODE_FOR_store_pair_sw_sisi ++ && icode <= CODE_FOR_store_pair_dw_tftf) ++ || (icode >= CODE_FOR_storewb_pairsi_si ++ && icode <= CODE_FOR_storewb_pairtf_di) ++ || (icode >= CODE_FOR_vec_store_pairv8qiv8qi ++ && icode <= CODE_FOR_vec_store_pairdfdf) ++ || (icode >= CODE_FOR_vec_store_pairv16qiv16qi ++ && icode <= CODE_FOR_vec_store_pairv8bfv2df) ++ || (icode >= CODE_FOR_store_pair_lanesv8qi ++ && icode <= CODE_FOR_store_pair_lanesdf)) ++ return true; ++ return false; ++} ++ ++#undef TARGET_IS_LDP_INSN ++#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn ++ ++#undef TARGET_IS_STP_INSN ++#define TARGET_IS_STP_INSN is_aarch64_stp_insn ++ + #undef TARGET_STACK_PROTECT_GUARD + #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index c5006afc0..0c6415a9c 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export + lists. + @end defmac + ++@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode}) ++Return true if icode is corresponding to any of the LDP instruction types. ++@end deftypefn ++ ++@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode}) ++Return true if icode is corresponding to any of the STP instruction types. ++@end deftypefn ++ + @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void) + This target hook returns @code{true} past the point in which new jump + instructions could be created. On machines that require a register for +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index f869ddd5e..6ff60e562 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export + lists. + @end defmac + ++@hook TARGET_IS_LDP_INSN ++ ++@hook TARGET_IS_STP_INSN ++ + @hook TARGET_CANNOT_MODIFY_JUMPS_P + + @hook TARGET_HAVE_CONDITIONAL_EXECUTION +diff --git a/gcc/params.opt b/gcc/params.opt +index 7fcc2398d..6176d4790 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER) + Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization + Target size of compressed pointer, which should be 8, 16 or 32. + ++-param=param-ldp-dependency-search-range= ++Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization ++Range for depended ldp search in split-ldp-stp path. + ; This comment is to ensure we retain the blank line above. +diff --git a/gcc/passes.def b/gcc/passes.def +index 941bbadf0..a30e05688 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_reorder_blocks); + NEXT_PASS (pass_leaf_regs); + NEXT_PASS (pass_split_before_sched2); ++ NEXT_PASS (pass_split_complex_instructions); + NEXT_PASS (pass_sched2); + NEXT_PASS (pass_stack_regs); + PUSH_INSERT_PASSES_WITHIN (pass_stack_regs) +diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc +index a0dfdb788..b4df8bdc5 100644 +--- a/gcc/sched-rgn.cc ++++ b/gcc/sched-rgn.cc +@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3. If not see + are actually scheduled. */ + + #include "config.h" ++#define INCLUDE_SET ++#define INCLUDE_VECTOR + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3. If not see + #include "dbgcnt.h" + #include "pretty-print.h" + #include "print-rtl.h" ++#include "cfgrtl.h" + + /* Disable warnings about quoting issues in the pp_xxx calls below + that (intentionally) don't follow GCC diagnostic conventions. */ +@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt) + return new pass_sched_fusion (ctxt); + } + ++namespace { ++ ++/* Def-use analisys special functions implementation. */ ++ ++static struct df_link * ++get_defs (rtx_insn *insn, rtx reg) ++{ ++ df_ref use; ++ struct df_link *ref_chain, *ref_link; ++ ++ FOR_EACH_INSN_USE (use, insn) ++ { ++ if (GET_CODE (DF_REF_REG (use)) == SUBREG) ++ return NULL; ++ if (REGNO (DF_REF_REG (use)) == REGNO (reg)) ++ break; ++ } ++ ++ gcc_assert (use != NULL); ++ ++ ref_chain = DF_REF_CHAIN (use); ++ ++ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) ++ { ++ /* Problem getting some definition for this instruction. */ ++ if (ref_link->ref == NULL) ++ return NULL; ++ if (DF_REF_INSN_INFO (ref_link->ref) == NULL) ++ return NULL; ++ /* As global regs are assumed to be defined at each function call ++ dataflow can report a call_insn as being a definition of REG. ++ But we can't do anything with that in this pass so proceed only ++ if the instruction really sets REG in a way that can be deduced ++ from the RTL structure. */ ++ if (global_regs[REGNO (reg)] ++ && !set_of (reg, DF_REF_INSN (ref_link->ref))) ++ return NULL; ++ } ++ ++ return ref_chain; ++} ++ ++static struct df_link * ++get_uses (rtx_insn *insn, rtx reg) ++{ ++ df_ref def; ++ struct df_link *ref_chain, *ref_link; ++ ++ FOR_EACH_INSN_DEF (def, insn) ++ if (REGNO (DF_REF_REG (def)) == REGNO (reg)) ++ break; ++ ++ gcc_assert (def != NULL && "Broken def-use analisys chain."); ++ ++ ref_chain = DF_REF_CHAIN (def); ++ ++ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) ++ { ++ /* Problem getting some use for this instruction. */ ++ if (ref_link->ref == NULL) ++ return NULL; ++ } ++ ++ return ref_chain; ++} ++ ++const pass_data pass_data_split_complex_instructions = { ++ RTL_PASS, /* Type. */ ++ "split_complex_instructions", /* Name. */ ++ OPTGROUP_NONE, /* Optinfo_flags. */ ++ TV_SPLIT_CMP_INS, /* Tv_id. */ ++ 0, /* Properties_required. */ ++ 0, /* Properties_provided. */ ++ 0, /* Properties_destroyed. */ ++ 0, /* Todo_flags_start. */ ++ (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */ ++}; ++ ++class pass_split_complex_instructions : public rtl_opt_pass ++{ ++private: ++ enum complex_instructions_t ++ { ++ UNDEFINED, ++ LDP, ++ LDP_TI, ++ STP, ++ STR ++ }; ++ ++ void split_complex_insn (rtx_insn *insn); ++ void split_ldp_ti (rtx_insn *insn); ++ void split_ldp_with_offset (rtx_insn *ldp_insn); ++ void split_simple_ldp (rtx_insn *ldp_insn); ++ void split_ldp_stp (rtx_insn *insn); ++ complex_instructions_t get_insn_type (rtx_insn *insn); ++ ++ basic_block bb; ++ rtx_insn *insn; ++ std::set<rtx_insn *> dependent_stores_candidates; ++ std::set<rtx_insn *> ldp_to_split_list; ++ ++ complex_instructions_t complex_insn_type = UNDEFINED; ++ bool is_store_insn (rtx_insn *insn); ++ bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb); ++ bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb, ++ rtx_insn *search_insn, ++ int search_range ++ = param_ldp_dependency_search_range); ++ bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn); ++ void init_df (); ++ void find_dependent_stores_candidates (rtx_insn *ldp_insn); ++ int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type, ++ int *arith_operation_ptr = NULL); ++ ++public: ++ pass_split_complex_instructions (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_split_complex_instructions, ctxt) ++ { ++ } ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ ++ virtual unsigned int ++ execute (function *) ++ { ++ enum rtx_code ldp_memref_code; ++ init_df (); ++ ldp_to_split_list.clear (); ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ complex_instructions_t insn_type = get_insn_type (insn); ++ /* TODO: Add splitting of STP instructions. */ ++ if (insn_type != LDP && insn_type != LDP_TI) ++ continue; ++ /* TODO: Currently support only ldp_ti and ldp with REG or ++ PLUS/MINUS offset expression. */ ++ if (insn_type == LDP_TI) ++ { ++ ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1), ++ 0)); ++ if (ldp_memref_code != REG && ldp_memref_code != PLUS ++ && ldp_memref_code != MINUS) ++ continue; ++ } ++ if (is_ldp_dependent_on_store (insn, bb)) ++ { ++ ldp_to_split_list.insert (insn); ++ } ++ } ++ } ++ ++ for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin (); ++ i != ldp_to_split_list.end (); ++i) ++ split_complex_insn (*i); ++ ++ return 0; ++ } ++}; // class pass_split_complex_instructions ++ ++bool ++pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn, ++ basic_block bb) ++{ ++ find_dependent_stores_candidates (ldp_insn); ++ return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn); ++} ++ ++bool ++pass_split_complex_instructions::bfs_for_reg_dependent_store ( ++ rtx_insn *ldp_insn, basic_block search_bb, rtx_insn *search_insn, ++ int search_range) ++{ ++ rtx_insn *current_search_insn = search_insn; ++ ++ for (int i = search_range; i > 0; --i) ++ { ++ if (!current_search_insn) ++ return false; ++ bool checking_result ++ = is_store_reg_dependent (ldp_insn, current_search_insn); ++ if (checking_result) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "LDP to split:\n"); ++ print_rtl_single (dump_file, ldp_insn); ++ fprintf (dump_file, "Found STR:\n"); ++ print_rtl_single (dump_file, current_search_insn); ++ } ++ return true; ++ } ++ if (current_search_insn == BB_HEAD (search_bb)) ++ { ++ /* Search in all parent BBs for the reg_dependent store. */ ++ edge_iterator ei; ++ edge e; ++ ++ FOR_EACH_EDGE (e, ei, search_bb->preds) ++ if (e->src->index != 0 ++ && bfs_for_reg_dependent_store (ldp_insn, e->src, ++ BB_END (e->src), i - 1)) ++ return true; ++ return false; ++ } ++ else ++ { ++ if (!active_insn_p (current_search_insn)) ++ i++; ++ current_search_insn = PREV_INSN (current_search_insn); ++ } ++ } ++ return false; ++} ++ ++void ++pass_split_complex_instructions::init_df () ++{ ++ df_set_flags (DF_RD_PRUNE_DEAD_DEFS); ++ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); ++ df_mir_add_problem (); ++ df_live_add_problem (); ++ df_live_set_all_dirty (); ++ df_analyze (); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++} ++ ++void ++pass_split_complex_instructions::find_dependent_stores_candidates ( ++ rtx_insn *ldp_insn) ++{ ++ dependent_stores_candidates.clear (); ++ df_ref use; ++ ++ FOR_EACH_INSN_USE (use, ldp_insn) ++ { ++ df_link *defs = get_defs (ldp_insn, DF_REF_REG (use)); ++ if (!defs) ++ return; ++ ++ for (df_link *def = defs; def; def = def->next) ++ { ++ df_link *uses ++ = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref)); ++ if (!uses) ++ continue; ++ ++ for (df_link *use = uses; use; use = use->next) ++ { ++ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR ++ && is_store_insn (DF_REF_INSN (use->ref))) ++ dependent_stores_candidates.insert (DF_REF_INSN (use->ref)); ++ } ++ } ++ } ++} ++ ++bool ++pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn, ++ rtx_insn *str_insn) ++{ ++ if (!is_store_insn (str_insn) ++ || dependent_stores_candidates.find (str_insn) ++ == dependent_stores_candidates.end ()) ++ return false; ++ ++ int ldp_offset_sign = UNDEFINED; ++ int ldp_offset ++ = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign); ++ if (ldp_offset_sign == MINUS) ++ ldp_offset = -ldp_offset; ++ ++ int str_offset_sign = UNDEFINED; ++ int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign); ++ if (str_offset_sign == MINUS) ++ str_offset = -str_offset; ++ ++ if (str_offset == ldp_offset || str_offset == ldp_offset + 8) ++ return true; ++ ++ return false; ++} ++ ++bool ++pass_split_complex_instructions::is_store_insn (rtx_insn *insn) ++{ ++ if (!insn) ++ return false; ++ rtx sset_b = single_set (insn); ++ /* TODO: The condition below allow to take only store instructions in which ++ the memory location's operand is either a register (base) or an plus/minus ++ operation (base + #imm). So it might make sense to add support for other ++ cases (e.g. multiply and shift). */ ++ if (sset_b && MEM_P (SET_DEST (sset_b)) ++ && GET_MODE (XEXP (sset_b, 0)) != BLKmode ++ && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG ++ || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS ++ || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS) ++ && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT))) ++ return true; ++ ++ return false; ++} ++ ++int ++pass_split_complex_instructions::get_insn_offset ( ++ rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr) ++{ ++ rtx insn_pat = PATTERN (insn); ++ int returned_offset = 0; ++ ++ rtx offset_expr = NULL; ++ rtx offset_value_expr = NULL; ++ ++ switch (insn_type) ++ { ++ case LDP: ++ { ++ int number_of_sub_insns = XVECLEN (insn_pat, 0); ++ ++ /* Calculate it's own ofsset of first load insn. */ ++ rtx_insn *first_load_insn = NULL; ++ if (number_of_sub_insns == 2) ++ { ++ first_load_insn ++ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0))); ++ arith_operation_ptr = NULL; ++ ++ offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0); ++ if (GET_CODE (offset_expr) == PLUS ++ || GET_CODE (offset_expr) == MINUS) ++ offset_value_expr ++ = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1); ++ else ++ offset_expr = NULL; ++ } ++ else if (number_of_sub_insns == 3) ++ { ++ rtx_insn *offset_sub_insn ++ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0))); ++ ++ offset_expr = XEXP (PATTERN (offset_sub_insn), 1); ++ offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1); ++ } ++ else ++ { ++ gcc_assert (false ++ && "Wrong number of elements in the ldp_insn vector"); ++ } ++ break; ++ } ++ case LDP_TI: ++ { ++ offset_expr = XEXP (XEXP (insn_pat, 1), 0); ++ if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS) ++ return 0; ++ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1); ++ break; ++ } ++ case STR: ++ { ++ offset_expr = XEXP (XEXP (insn_pat, 0), 0); ++ /* If memory location is specified by single base register then the ++ offset is zero. */ ++ if (GET_CODE (offset_expr) == REG) ++ return 0; ++ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1); ++ break; ++ } ++ default: ++ { ++ if (dumps_are_enabled && dump_file) ++ { ++ fprintf (dump_file, "Instruction that was tried to split:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ gcc_assert (false && "Unsupported instruction type"); ++ break; ++ } ++ } ++ ++ if (offset_expr != NULL && offset_value_expr ++ && GET_CODE (offset_value_expr) == CONST_INT) ++ returned_offset = XINT (offset_value_expr, 0); ++ ++ if (arith_operation_ptr != NULL) ++ { ++ *arith_operation_ptr = GET_CODE (offset_expr); ++ gcc_assert ((*arith_operation_ptr == MINUS ++ || *arith_operation_ptr == PLUS) ++ && "Unexpected arithmetic operation in the offset expr"); ++ } ++ ++ return returned_offset; ++} ++ ++void ++pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn) ++{ ++ rtx pat = PATTERN (ldp_insn); ++ ++ rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0))); ++ rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1))); ++ ++ int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1))); ++ int src_regno; ++ ++ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0); ++ ++ if (GET_CODE (srs_reg_insn) == REG) ++ src_regno = REGNO (srs_reg_insn); ++ else ++ src_regno = REGNO (XEXP (srs_reg_insn, 0)); ++ ++ rtx_insn *emited_insn_1, *emited_insn_2; ++ ++ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */ ++ if (src_regno == dest_regno) ++ std::swap (mem_insn_1, mem_insn_2); ++ ++ emited_insn_1 = emit_insn (PATTERN (mem_insn_1)); ++ emited_insn_2 = emit_insn (PATTERN (mem_insn_2)); ++ ++ int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0); ++ int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0); ++ ++ INSN_CODE (emited_insn_1) = sub_insn_1_code; ++ INSN_CODE (emited_insn_2) = sub_insn_2_code; ++} ++ ++void ++pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn) ++{ ++ rtx pat = PATTERN (ldp_insn); ++ bool post_index = true; ++ ++ rtx_insn offset_insn; ++ rtx_insn mem_insn_1; ++ rtx_insn mem_insn_2; ++ ++ int offset_insn_code; ++ int mem_insn_1_code = -1; ++ int mem_insn_2_code = -1; ++ ++ int offset = 0; ++ int arith_operation = UNDEFINED; ++ ++ for (int i = 0; i < 3; i++) ++ { ++ rtx sub_insn = XVECEXP (pat, 0, i); ++ rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn)); ++ int sub_insn_code ++ = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0); ++ ++ /* If sub_insn is offset related. */ ++ if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY) ++ { ++ offset_insn = *copy_of_sub_insn; ++ offset_insn_code = sub_insn_code; ++ gcc_assert (i == 0 ++ && "Offset related insn must be the first " ++ "element of a parallel insn vector"); ++ ++ offset = get_insn_offset (ldp_insn, LDP, &arith_operation); ++ } ++ else ++ { ++ if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG) ++ { ++ rtx &offset_expr ++ = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1); ++ if (GET_CODE (offset_expr) == CONST_INT) ++ { ++ int local_offset = XINT (offset_expr, 0); ++ offset = (arith_operation == PLUS ? offset : -offset); ++ ++ offset_expr = GEN_INT (local_offset + offset); ++ ++ gcc_assert ( ++ (arith_operation == MINUS || arith_operation == PLUS) ++ && "Unexpected arithmetic operation in offset related " ++ "sub_insn"); ++ ++ if (i == 1) ++ post_index = false; ++ } ++ else ++ { ++ post_index = true; ++ } ++ } ++ } ++ if (i == 1) ++ { ++ mem_insn_1 = *copy_of_sub_insn; ++ mem_insn_1_code = sub_insn_code; ++ } ++ if (i == 2) ++ { ++ mem_insn_2 = *copy_of_sub_insn; ++ mem_insn_2_code = sub_insn_code; ++ } ++ } ++ gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1 ++ && "Uninitialized memory insns"); ++ ++ int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1))); ++ int src_regno; ++ ++ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0); ++ ++ if (GET_CODE (srs_reg_insn) == REG) ++ src_regno = REGNO (srs_reg_insn); ++ else ++ src_regno = REGNO (XEXP (srs_reg_insn, 0)); ++ ++ /* Don't split such weird LDP. */ ++ if (src_regno == dest_regno) ++ return; ++ ++ rtx_insn *emited_offset_insn; ++ if (!post_index) ++ { ++ emited_offset_insn = emit_insn (PATTERN (&offset_insn)); ++ INSN_CODE (emited_offset_insn) = offset_insn_code; ++ } ++ ++ rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1)); ++ rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2)); ++ ++ ++ INSN_CODE (emited_insn_1) = mem_insn_1_code; ++ INSN_CODE (emited_insn_2) = mem_insn_2_code; ++ ++ if (post_index) ++ { ++ emited_offset_insn = emit_insn (PATTERN (&offset_insn)); ++ INSN_CODE (emited_offset_insn) = offset_insn_code; ++ } ++} ++ ++void ++pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn) ++{ ++ rtx_insn *prev_insn = PREV_INSN (insn); ++ int number_of_sub_insns = XVECLEN (PATTERN (insn), 0); ++ ++ start_sequence (); ++ ++ if (number_of_sub_insns == 2) ++ split_simple_ldp (insn); ++ else if (number_of_sub_insns == 3) ++ split_ldp_with_offset (insn); ++ else ++ gcc_assert (false && "Broken complex insn vector"); ++ ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ ++ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn)); ++ delete_insn_and_edges (insn); ++} ++ ++void ++pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn) ++{ ++ rtx_insn *prev_insn = PREV_INSN (insn); ++ rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn))); ++ rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn))); ++ ++ rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0); ++ rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1); ++ rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1); ++ ++ PUT_MODE (mem_insn_1, DImode); ++ PUT_MODE (mem_insn_2, DImode); ++ ++ int reg_no_1 = REGNO (reg_insn_1); ++ ++ XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1); ++ XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1); ++ ++ rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0); ++ if (GET_CODE (load_insn_2_plus_expr) == REG) ++ { ++ XEXP (XEXP (PATTERN (load_insn_2), 1), 0) ++ = gen_rtx_PLUS (DImode, ++ gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)), ++ GEN_INT (GET_MODE_SIZE (DImode))); ++ } ++ else ++ { ++ rtx load_insn_2_offset_expr ++ = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1); ++ ++ if (load_insn_2_offset_expr == NULL) ++ return; ++ ++ if (GET_CODE (load_insn_2_offset_expr) == CONST_INT) ++ { ++ int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0); ++ XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1) ++ = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode)); ++ } ++ } ++ ++ start_sequence (); ++ ++ int src_regno; ++ rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0); ++ ++ if (GET_CODE (srs_reg_insn) == REG) ++ src_regno = REGNO (srs_reg_insn); ++ else ++ src_regno = REGNO (XEXP (srs_reg_insn, 0)); ++ ++ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */ ++ if (src_regno == reg_no_1) ++ std::swap (load_insn_1, load_insn_2); ++ ++ rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1)); ++ rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2)); ++ ++ INSN_CODE (emited_load_insn_1) ++ = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0); ++ INSN_CODE (emited_load_insn_2) ++ = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0); ++ ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ ++ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn)); ++ delete_insn_and_edges (insn); ++} ++ ++void ++pass_split_complex_instructions::split_complex_insn (rtx_insn *insn) ++{ ++ complex_instructions_t insn_type = get_insn_type (insn); ++ /* TODO: Add splitting of STP instructions. */ ++ if (insn_type == LDP || insn_type == STP) ++ split_ldp_stp (insn); ++ else if (insn_type == LDP_TI) ++ split_ldp_ti (insn); ++ else ++ gcc_assert (false && "Unsupported type of insn to split"); ++} ++ ++pass_split_complex_instructions::complex_instructions_t ++pass_split_complex_instructions::get_insn_type (rtx_insn *insn) ++{ ++ if (!INSN_P (insn)) ++ return UNDEFINED; ++ ++ rtx pat = PATTERN (insn); ++ int icode = recog (PATTERN (insn), insn, NULL); ++ ++ if (GET_CODE (pat) == PARALLEL) ++ { ++ if (targetm.is_ldp_insn (icode)) ++ { ++ return LDP; ++ } ++ if (targetm.is_stp_insn (icode)) ++ { ++ return STP; ++ } ++ else ++ { ++ return UNDEFINED; ++ } ++ } ++ rtx set_insn = single_set (insn); ++ if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM ++ && GET_MODE (XEXP (set_insn, 1)) == E_TImode) ++ return LDP_TI; ++ ++ return UNDEFINED; ++} ++ ++bool ++pass_split_complex_instructions::gate (function *) ++{ ++ return targetm.is_ldp_insn && targetm.is_stp_insn && optimize > 0 ++ && flag_split_ldp_stp > 0; ++} ++ ++} // anon namespace ++ ++rtl_opt_pass * ++make_pass_split_complex_instructions (gcc::context *ctxt) ++{ ++ return new pass_split_complex_instructions (ctxt); ++} ++ + #if __GNUC__ >= 10 + # pragma GCC diagnostic pop +-#endif ++#endif +\ No newline at end of file +diff --git a/gcc/target.def b/gcc/target.def +index d85adf36a..a3a50b474 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2677,6 +2677,16 @@ modes and they have different conditional execution capability, such as ARM.", + bool, (void), + default_have_conditional_execution) + ++DEFHOOK ++(is_ldp_insn, ++ "Return true if icode is corresponding to any of the LDP instruction types.", ++ bool, (int icode), NULL) ++ ++DEFHOOK ++(is_stp_insn, ++ "Return true if icode is corresponding to any of the STP instruction types.", ++ bool, (int icode), NULL) ++ + DEFHOOK + (gen_ccmp_first, + "This function prepares to emit a comparison insn for the first compare in a\n\ +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c +new file mode 100644 +index 000000000..3918d43f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c +@@ -0,0 +1,74 @@ ++/* { dg-do compile { target aarch64-*-* } } */ ++/* { dg-additional-options "-fsplit-ldp-stp" } */ ++/* ++ * Tests are: ++ * Patterns where LDP insns should NOT be split ++ * */ ++ ++int __RTL (startwith ("split_complex_instructions")) ++simple_ldp_after_store () ++{ ++(function "simple_ldp_after_store" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 228 (set (reg/i:DI sp) ++ (reg/i:DI x0))) ++ (cinsn 101 (set (mem/c:DI ++ (plus:DI (reg/f:DI sp) ++ (const_int 32))[1 S4 A32])(reg:DI x0))) ++ (cinsn 10 (parallel [ ++ (set (reg:DI x29) ++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32])) ++ (set (reg:DI x30) ++ (mem:DI (plus:DI (reg/f:DI sp) ++ (const_int 16)) [1 S4 A32]))])) ++ (cinsn 11 (use (reg/i:DI sp))) ++ (cinsn 12 (use (reg/i:DI cc))) ++ (cinsn 13 (use (reg/i:DI x29))) ++ (cinsn 14 (use (reg/i:DI x30))) ++ (cinsn 15 (use (reg/i:DI x0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++) ;; function "simple_ldp_after_store" ++} ++ ++int __RTL (startwith ("split_complex_instructions")) ++ldp_after_store_in_different_bb () ++{ ++(function "ldp_after_store_in_different_bb" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 228 (set (reg/i:DI sp) ++ (reg/i:DI x0))) ++ (cinsn 101 (set (mem/c:DI ++ (plus:DI (reg/f:DI sp) ++ (const_int 32))[1 S4 A32])(reg:DI x0))) ++ (edge-to 3 (flags "FALLTHRU")) ++ ) ;; block 2 ++ (block 3 ++ (edge-from 2 (flags "FALLTHRU")) ++ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 10 (parallel [ ++ (set (reg:DI x29) ++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32])) ++ (set (reg:DI x30) ++ (mem:DI (plus:DI (reg/f:DI sp) ++ (const_int 16)) [1 S4 A32]))])) ++ (cinsn 11 (use (reg/i:DI sp))) ++ (cinsn 12 (use (reg/i:DI cc))) ++ (cinsn 13 (use (reg/i:DI x29))) ++ (cinsn 14 (use (reg/i:DI x30))) ++ (cinsn 15 (use (reg/i:DI x0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 3 ++ ) ;; insn-chain ++) ;; function "ldp_after_store_in_different_bb" ++} ++ ++/* Verify that the output code contains exactly 2 ldp. */ ++/* { dg-final { scan-assembler-times {ldp\t} 2 } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c +new file mode 100644 +index 000000000..653c30f83 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile { target aarch64-*-* } } */ ++/* { dg-additional-options "-fsplit-ldp-stp" } */ ++/* ++ * Test is: ++ * Pattern where LDP insns should be split with rearrangement in order ++ * to deal with data dependecy betwen subinstruction. ++ * */ ++ ++int __RTL (startwith ("split_complex_instructions")) ++simple_ldp_after_store () ++{ ++(function "ldp_equal_registers" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 228 (set (reg/i:DI x1) ++ (reg/i:DI x0))) ++ (cinsn 101 (set (mem/c:DI ++ (plus:DI (reg/f:DI x1) ++ (const_int 8))[1 S4 A32])(reg:DI x0))) ++ (cinsn 10 (parallel [ ++ (set (reg:DI x1) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32])) ++ (set (reg:DI x2) ++ (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 16)) [1 S4 A32]))])) ++ (cinsn 11 (use (reg/i:DI sp))) ++ (cinsn 12 (use (reg/i:DI cc))) ++ (cinsn 13 (use (reg/i:DI x0))) ++ (cinsn 14 (use (reg/i:DI x1))) ++ (cinsn 15 (use (reg/i:DI x2))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++) ;; function "ldp_equal_registers" ++} ++ ++/* Verify that the output code rearrange ldrs. */ ++/* { dg-final { scan-assembler-times ".*ldr.*x2.*x1,.*16.*ldr.*x1.*x1.*8" 1 } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c +new file mode 100644 +index 000000000..dc9f26efb +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c +@@ -0,0 +1,174 @@ ++/* { dg-do compile { target aarch64-*-* } } */ ++/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */ ++/* ++ * Tests are: ++ * Patterns where LDP insns should be split ++ * */ ++ ++int __RTL (startwith ("split_complex_instructions")) ++simple_ldp_after_store () ++{ ++(function "simple_ldp_after_store" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 228 (set (reg/i:DI sp) ++ (reg/i:DI x0))) ++ (cinsn 238 (set (reg/i:DI x1) ++ (reg/i:DI x0))) ++ ++ (cinsn 101 (set (mem/c:DI ++ (plus:DI (reg/f:DI sp) ++ (const_int 8))[1 S4 A32])(reg:DI x0))) ++ (cinsn 10 (parallel [ ++ (set (reg:DI x29) ++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32])) ++ (set (reg:DI x30) ++ (mem:DI (plus:DI (reg/f:DI sp) ++ (const_int 16)) [1 S4 A32]))])) ++ ++ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1) ++ (const_int -16)) [1 S4 A32]) ++ (reg:DI x0))) ++ (cinsn 11 (parallel [ ++ (set (reg:DI x3) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32])) ++ (set (reg:DI x4) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32])) ++ ])) ++ ++ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32]) ++ (reg:DI x0))) ++ (cinsn 12 (parallel [ ++ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32])) ++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 8)) [1 S4 A32])) ++ ])) ++ ++ (cinsn 13 (use (reg/i:DI sp))) ++ (cinsn 14 (use (reg/i:DI cc))) ++ (cinsn 15 (use (reg/i:DI x29))) ++ (cinsn 16 (use (reg/i:DI x30))) ++ (cinsn 17 (use (reg/i:DI x0))) ++ (cinsn 18 (use (reg/i:DI x3))) ++ (cinsn 19 (use (reg/i:DI x4))) ++ (cinsn 20 (use (reg/i:DI x5))) ++ (cinsn 21 (use (reg/i:DI x6))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++) ;; function "simple_ldp_after_store" ++} ++ ++int __RTL (startwith ("split_complex_instructions")) ++ldp_ti_after_store () ++{ ++ (function "ldp_ti_after_store" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 228 (set (reg/i:DI sp) ++ (reg/i:DI x0))) ++ (cinsn 238 (set (reg/i:DI x2) ++ (reg/i:DI x0))) ++ ++ (cinsn 101 (set (mem/c:DI ++ (plus:DI (reg/f:DI sp) ++ (const_int 136))[1 S4 A32])(reg:DI x0))) ++ (insn 81 (set (reg:TI x0 [1 S4 A32]) ++ (mem/c:TI (plus:DI (reg/f:DI sp) ++ (const_int 136 )) [1 S4 A32])) ++ (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp) ++ (const_int -24 )) [1 S4 A32]) ++ (nil))) ++ ++ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2) ++ (const_int -16)) [1 S4 A32]) ++ (reg:DI x0))) ++ (insn 82 (set (reg:TI x3 [1 S4 A32]) ++ (mem/c:TI (plus:DI (reg/f:DI x2) ++ (const_int -16)) [1 S4 A32]))) ++ ++ (cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32]) ++ (reg:DI x0))) ++ (insn 83 (set (reg:TI x5 [1 S4 A32]) ++ (mem/c:TI (reg/f:DI x2) [1 S4 A32]))) ++ ++ (cinsn 11 (use (reg/i:DI sp))) ++ (cinsn 12 (use (reg/i:DI cc))) ++ (cinsn 13 (use (reg/i:DI x29))) ++ (cinsn 14 (use (reg/i:DI x30))) ++ (cinsn 15 (use (reg/i:DI x0))) ++ (cinsn 16 (use (reg/i:DI x3))) ++ (cinsn 17 (use (reg/i:DI x5))) ++ (cinsn 18 (use (reg/i:DI x1))) ++ (cinsn 19 (use (reg/i:DI x4))) ++ (cinsn 20 (use (reg/i:DI x6))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++) ;; function "ldp_ti_after_store" ++} ++ ++int __RTL (startwith ("split_complex_instructions")) ++ldp_after_store_in_different_bb () ++{ ++(function "ldp_after_store_in_different_bb" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 228 (set (reg/i:DI sp) ++ (reg/i:DI x0))) ++ (cinsn 238 (set (reg/i:DI x1) ++ (reg/i:DI x0))) ++ ++ (cinsn 101 (set (mem/c:DI ++ (plus:DI (reg/f:DI sp) ++ (const_int 8))[1 S4 A32])(reg:DI x0))) ++ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1) ++ (const_int -16)) [1 S4 A32]) ++ (reg:DI x0))) ++ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32]) ++ (reg:DI x0))) ++ (edge-to 3 (flags "FALLTHRU")) ++ ) ;; block 2 ++ (block 3 ++ (edge-from 2 (flags "FALLTHRU")) ++ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK) ++ (cinsn 10 (parallel [ ++ (set (reg:DI x29) ++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32])) ++ (set (reg:DI x30) ++ (mem:DI (plus:DI (reg/f:DI sp) ++ (const_int 16)) [1 S4 A32]))])) ++ (cinsn 11 (parallel [ ++ (set (reg:DI x3) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32])) ++ (set (reg:DI x4) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32])) ++ ])) ++ (cinsn 12 (parallel [ ++ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32])) ++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 8)) [1 S4 A32])) ++ ])) ++ (cinsn 13 (use (reg/i:DI sp))) ++ (cinsn 14 (use (reg/i:DI cc))) ++ (cinsn 15 (use (reg/i:DI x29))) ++ (cinsn 16 (use (reg/i:DI x30))) ++ (cinsn 17 (use (reg/i:DI x0))) ++ (cinsn 18 (use (reg/i:DI x3))) ++ (cinsn 19 (use (reg/i:DI x4))) ++ (cinsn 20 (use (reg/i:DI x5))) ++ (cinsn 21 (use (reg/i:DI x6))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 3 ++ ) ;; insn-chain ++) ;; function "ldp_after_store_in_different_bb" ++} ++ ++/* Verify that the output code doesn't contain ldp. */ ++/* { dg-final { scan-assembler-not {ldp\t} } } */ +\ No newline at end of file +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 1e7d4e74b..2ccecffb5 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -280,6 +280,7 @@ DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") + DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") + DEFTIMEVAR (TV_REE , "ree") + DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue") ++DEFTIMEVAR (TV_SPLIT_CMP_INS , "split complex instructions") + DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") + DEFTIMEVAR (TV_SPLIT_PATHS , "split paths") + DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 86f38e2f2..6daac7fc1 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -612,6 +612,7 @@ extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context + *ctxt); + extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt); ++extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt); +-- +2.33.0 + |