diff options
Diffstat (limited to '0043-Extending-and-refactoring-of-pass_split_complex_inst.patch')
-rw-r--r-- | 0043-Extending-and-refactoring-of-pass_split_complex_inst.patch | 1426 |
1 files changed, 1426 insertions, 0 deletions
diff --git a/0043-Extending-and-refactoring-of-pass_split_complex_inst.patch b/0043-Extending-and-refactoring-of-pass_split_complex_inst.patch new file mode 100644 index 0000000..509a534 --- /dev/null +++ b/0043-Extending-and-refactoring-of-pass_split_complex_inst.patch @@ -0,0 +1,1426 @@ +From a49db831320ac70ca8f46b94ee60d7c6951f65c3 Mon Sep 17 00:00:00 2001 +From: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com> +Date: Wed, 20 Dec 2023 21:36:07 +0300 +Subject: [PATCH 10/18] Extending and refactoring of + pass_split_complex_instructions + +- Add flag parameter in is_ldp_insn and is_stp_insn to know + if instruction has writeback operation +- Add support of PRE_*, POST_* operands as a memory address + expression +- Split only LDPs that intersect with a dependent store + instruction +- Make the selection of dependent store instructions stricter + so it will be enough to check by BFS that dependent store + instruction appears in search range. +- Add helper methods to retrieve fields of rtx +- Remove redundant iterations in find_dependent_stores_candidates +- Refactor generation of instructions +- Add more test cases +--- + gcc/config/aarch64/aarch64.cc | 62 +- + gcc/doc/tm.texi | 12 +- + gcc/sched-rgn.cc | 771 +++++++++--------- + gcc/target.def | 14 +- + .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 35 +- + .../rtl/aarch64/test-ldp-split-rearrange.c | 2 +- + .../gcc.dg/rtl/aarch64/test-ldp-split.c | 181 +++- + 7 files changed, 603 insertions(+), 474 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 48e2eded0..fa566dd80 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -27507,39 +27507,59 @@ aarch64_run_selftests (void) + + #endif /* #if CHECKING_P */ + +-/* TODO: refuse to use ranges intead of full list of an instruction codes. */ ++/* TODO: refuse to use ranges instead of full list of an instruction codes. */ + + bool +-is_aarch64_ldp_insn (int icode) ++is_aarch64_ldp_insn (int icode, bool *has_wb) + { + if ((icode >= CODE_FOR_load_pair_sw_sisi +- && icode <= CODE_FOR_load_pair_dw_tftf) ++ && icode <= CODE_FOR_load_pair_sw_sfsf) ++ || (icode >= CODE_FOR_load_pair_dw_didi ++ && icode <= CODE_FOR_load_pair_dw_dfdf) ++ || (icode == CODE_FOR_load_pair_dw_tftf) + || (icode >= CODE_FOR_loadwb_pairsi_si +- && icode <= CODE_FOR_loadwb_pairtf_di) +- || (icode >= CODE_FOR_load_pairv8qiv8qi +- && icode <= CODE_FOR_load_pairdfdf) +- || (icode >= CODE_FOR_load_pairv16qiv16qi +- && icode <= CODE_FOR_load_pairv8bfv2df) +- || (icode >= CODE_FOR_load_pair_lanesv8qi +- && icode <= CODE_FOR_load_pair_lanesdf)) +- return true; ++ && icode <= CODE_FOR_loadwb_pairdi_di) ++ || (icode >= CODE_FOR_loadwb_pairsf_si ++ && icode <= CODE_FOR_loadwb_pairdf_di) ++ || (icode >= CODE_FOR_loadwb_pairti_si ++ && icode <= CODE_FOR_loadwb_pairtf_di)) ++ { ++ if (has_wb) ++ *has_wb = ((icode >= CODE_FOR_loadwb_pairsi_si ++ && icode <= CODE_FOR_loadwb_pairdi_di) ++ || (icode >= CODE_FOR_loadwb_pairsf_si ++ && icode <= CODE_FOR_loadwb_pairdf_di) ++ || (icode >= CODE_FOR_loadwb_pairti_si ++ && icode <= CODE_FOR_loadwb_pairtf_di)); ++ return true; ++ } + return false; + } + + bool +-is_aarch64_stp_insn (int icode) ++is_aarch64_stp_insn (int icode, bool *has_wb) + { + if ((icode >= CODE_FOR_store_pair_sw_sisi +- && icode <= CODE_FOR_store_pair_dw_tftf) ++ && icode <= CODE_FOR_store_pair_sw_sfsf) ++ || (icode >= CODE_FOR_store_pair_dw_didi ++ && icode <= CODE_FOR_store_pair_dw_dfdf) ++ || (icode == CODE_FOR_store_pair_dw_tftf) + || (icode >= CODE_FOR_storewb_pairsi_si +- && icode <= CODE_FOR_storewb_pairtf_di) +- || (icode >= CODE_FOR_vec_store_pairv8qiv8qi +- && icode <= CODE_FOR_vec_store_pairdfdf) +- || (icode >= CODE_FOR_vec_store_pairv16qiv16qi +- && icode <= CODE_FOR_vec_store_pairv8bfv2df) +- || (icode >= CODE_FOR_store_pair_lanesv8qi +- && icode <= CODE_FOR_store_pair_lanesdf)) +- return true; ++ && icode <= CODE_FOR_storewb_pairdi_di) ++ || (icode >= CODE_FOR_storewb_pairsf_si ++ && icode <= CODE_FOR_storewb_pairdf_di) ++ || (icode >= CODE_FOR_storewb_pairti_si ++ && icode <= CODE_FOR_storewb_pairtf_di)) ++ { ++ if (has_wb) ++ *has_wb = ((icode >= CODE_FOR_storewb_pairsi_si ++ && icode <= CODE_FOR_storewb_pairdi_di) ++ || (icode >= CODE_FOR_storewb_pairsf_si ++ && icode <= CODE_FOR_storewb_pairdf_di) ++ || (icode >= CODE_FOR_storewb_pairti_si ++ && icode <= CODE_FOR_storewb_pairtf_di)); ++ return true; ++ } + return false; + } + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 0c6415a9c..3b6e90bf2 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -12113,12 +12113,16 @@ object files that are not referenced from @code{main} and uses export + lists. + @end defmac + +-@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode}) +-Return true if icode is corresponding to any of the LDP instruction types. ++@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode}, bool *@var{has_wb}) ++Return true if @var{icode} is corresponding to any of the LDP instruction ++types. If @var{has_wb} is not NULL then its value is set to true if LDP ++contains post-index or pre-index operation. + @end deftypefn + +-@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode}) +-Return true if icode is corresponding to any of the STP instruction types. ++@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode}, bool *@var{has_wb}) ++Return true if @var{icode} is corresponding to any of the STP instruction ++types. If @var{has_wb} is not NULL then its value is set to true if STP ++contains post-index or pre-index operation. + @end deftypefn + + @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void) +diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc +index b4df8bdc5..5f61de1c8 100644 +--- a/gcc/sched-rgn.cc ++++ b/gcc/sched-rgn.cc +@@ -3956,7 +3956,7 @@ make_pass_sched_fusion (gcc::context *ctxt) + + namespace { + +-/* Def-use analisys special functions implementation. */ ++/* Def-use analysis special functions implementation. */ + + static struct df_link * + get_defs (rtx_insn *insn, rtx reg) +@@ -4032,42 +4032,66 @@ const pass_data pass_data_split_complex_instructions = { + (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */ + }; + ++/* Pass split_complex_instructions finds LOAD PAIR instructions (LDP) that can ++ be split into two LDR instructions. It splits only those LDP for which one ++ half of the requested memory is contained in the preceding STORE (STR/STP) ++ instruction whose base register has the same definition. This allows ++ to use hardware store-to-load forwarding mechanism and to get one half of ++ requested memory from the store queue of CPU. ++ ++ TODO: Add split of STP. ++ TODO: Add split of vector STP and LDP. */ + class pass_split_complex_instructions : public rtl_opt_pass + { + private: +- enum complex_instructions_t ++ enum mem_access_insn_t + { + UNDEFINED, + LDP, ++ /* LDP with post-index (see loadwb_pair in config/aarch64.md). */ ++ LDP_WB, ++ /* LDP that contains one destination register in RTL IR ++ (see movti_aarch64 in config/aarch64.md). */ + LDP_TI, + STP, ++ /* STP with pre-index (see storewb_pair in config/aarch64.md). */ ++ STP_WB, ++ /* STP that contains one source register in RTL IR ++ (see movti_aarch64 in config/aarch64.md). */ ++ STP_TI, + STR + }; + +- void split_complex_insn (rtx_insn *insn); +- void split_ldp_ti (rtx_insn *insn); +- void split_ldp_with_offset (rtx_insn *ldp_insn); +- void split_simple_ldp (rtx_insn *ldp_insn); +- void split_ldp_stp (rtx_insn *insn); +- complex_instructions_t get_insn_type (rtx_insn *insn); +- +- basic_block bb; +- rtx_insn *insn; + std::set<rtx_insn *> dependent_stores_candidates; + std::set<rtx_insn *> ldp_to_split_list; + +- complex_instructions_t complex_insn_type = UNDEFINED; +- bool is_store_insn (rtx_insn *insn); +- bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb); ++ void split_ldp_ti (rtx_insn *insn); ++ void split_ldp (rtx_insn *ldp_insn); ++ /* Emit a NEW_INSNS chain, recognize instruction code of each new instruction ++ and replace OLD_INSN with the emitted sequence. */ ++ void replace_insn (rtx_insn *old_insn, rtx_insn *new_insns); ++ ++ mem_access_insn_t get_insn_type (rtx_insn *insn); ++ bool is_typeof_ldp (mem_access_insn_t insn_type); ++ bool is_typeof_stp (mem_access_insn_t insn_type); ++ + bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb, + rtx_insn *search_insn, + int search_range + = param_ldp_dependency_search_range); + bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn); + void init_df (); +- void find_dependent_stores_candidates (rtx_insn *ldp_insn); +- int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type, +- int *arith_operation_ptr = NULL); ++ void find_dependent_stores_candidates (rtx_insn *ldp_insn, ++ mem_access_insn_t insn_type); ++ ++ rtx get_memref (rtx_insn *insn, mem_access_insn_t insn_type); ++ rtx get_base_reg (rtx memref); ++ /* Set OFFSET to the offset value. Returns TRUE if MEMREF's address ++ expression is supported, FALSE otherwise. */ ++ bool get_offset (rtx memref, int &offset); ++ /* Return size of memory referenced by MEMREF. Returns -1 if INSN_TYPE ++ wasn't recognized. */ ++ int get_unit_size (rtx memref, mem_access_insn_t insn_type); + + public: + pass_split_complex_instructions (gcc::context *ctxt) +@@ -4080,28 +4104,22 @@ public: + virtual unsigned int + execute (function *) + { +- enum rtx_code ldp_memref_code; ++ basic_block bb; ++ rtx_insn *insn; ++ + init_df (); + ldp_to_split_list.clear (); + FOR_EACH_BB_FN (bb, cfun) + { + FOR_BB_INSNS (bb, insn) + { +- complex_instructions_t insn_type = get_insn_type (insn); +- /* TODO: Add splitting of STP instructions. */ +- if (insn_type != LDP && insn_type != LDP_TI) ++ mem_access_insn_t insn_type = get_insn_type (insn); ++ if (!is_typeof_ldp (insn_type)) + continue; +- /* TODO: Currently support only ldp_ti and ldp with REG or +- PLUS/MINUS offset expression. */ +- if (insn_type == LDP_TI) +- { +- ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1), +- 0)); +- if (ldp_memref_code != REG && ldp_memref_code != PLUS +- && ldp_memref_code != MINUS) +- continue; +- } +- if (is_ldp_dependent_on_store (insn, bb)) ++ ++ find_dependent_stores_candidates (insn, insn_type); ++ if (!dependent_stores_candidates.empty () ++ && bfs_for_reg_dependent_store (insn, bb, insn)) + { + ldp_to_split_list.insert (insn); + } +@@ -4110,18 +4128,107 @@ public: + + for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin (); + i != ldp_to_split_list.end (); ++i) +- split_complex_insn (*i); ++ split_ldp (*i); + + return 0; + } + }; // class pass_split_complex_instructions + + bool +-pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn, +- basic_block bb) ++pass_split_complex_instructions::is_typeof_ldp ( ++ mem_access_insn_t insn_type) + { +- find_dependent_stores_candidates (ldp_insn); +- return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn); ++ return (insn_type == LDP || insn_type == LDP_WB || insn_type == LDP_TI); ++} ++ ++bool ++pass_split_complex_instructions::is_typeof_stp ( ++ mem_access_insn_t insn_type) ++{ ++ return (insn_type == STP || insn_type == STP_WB || insn_type == STP_TI); ++} ++ ++rtx ++pass_split_complex_instructions::get_memref ( ++ rtx_insn *insn, mem_access_insn_t insn_type) ++{ ++ rtx insn_pat = PATTERN (insn); ++ rtx memref = NULL; ++ ++ switch (insn_type) ++ { ++ case LDP: ++ memref = SET_SRC (XVECEXP (insn_pat, 0, 0)); ++ break; ++ case LDP_WB: ++ memref = SET_SRC (XVECEXP (insn_pat, 0, 1)); ++ break; ++ case LDP_TI: ++ memref = SET_SRC (insn_pat); ++ break; ++ case STP: ++ memref = SET_DEST (XVECEXP (insn_pat, 0, 0)); ++ break; ++ case STP_WB: ++ memref = SET_DEST (XVECEXP (insn_pat, 0, 1)); ++ break; ++ case STP_TI: ++ case STR: ++ memref = SET_DEST (insn_pat); ++ break; ++ default: ++ break; ++ } ++ ++ if (memref && !MEM_P (memref)) ++ return NULL; ++ return memref; ++} ++ ++rtx ++pass_split_complex_instructions::get_base_reg (rtx memref) ++{ ++ if (!memref || !MEM_P (memref)) ++ return NULL; ++ rtx addr_exp = XEXP (memref, 0); ++ ++ switch (GET_CODE (addr_exp)) ++ { ++ case REG: ++ return addr_exp; ++ case PLUS: ++ case PRE_DEC: ++ case PRE_INC: ++ case POST_DEC: ++ case POST_INC: ++ if (REG_P (XEXP (addr_exp, 0))) ++ return XEXP (addr_exp, 0); ++ default: ++ return NULL; ++ } ++} ++ ++int ++pass_split_complex_instructions::get_unit_size ( ++ rtx memref, mem_access_insn_t insn_type) ++{ ++ if (!memref) ++ return -1; ++ ++ switch (insn_type) ++ { ++ case LDP: ++ case STP: ++ case LDP_WB: ++ case STP_WB: ++ case STR: ++ return GET_MODE_SIZE (GET_MODE (memref)).to_constant (); ++ case LDP_TI: ++ case STP_TI: ++ return GET_MODE_SIZE (E_DImode).to_constant (); ++ default: ++ return -1; ++ } + } + + bool +@@ -4135,9 +4242,9 @@ pass_split_complex_instructions::bfs_for_reg_dependent_store ( + { + if (!current_search_insn) + return false; +- bool checking_result +- = is_store_reg_dependent (ldp_insn, current_search_insn); +- if (checking_result) ++ ++ if (dependent_stores_candidates.find (current_search_insn) ++ != dependent_stores_candidates.end ()) + { + if (dump_file) + { +@@ -4185,30 +4292,29 @@ pass_split_complex_instructions::init_df () + + void + pass_split_complex_instructions::find_dependent_stores_candidates ( +- rtx_insn *ldp_insn) ++ rtx_insn *ldp_insn, mem_access_insn_t insn_type) + { + dependent_stores_candidates.clear (); +- df_ref use; + +- FOR_EACH_INSN_USE (use, ldp_insn) +- { +- df_link *defs = get_defs (ldp_insn, DF_REF_REG (use)); +- if (!defs) +- return; ++ rtx base_reg = get_base_reg (get_memref (ldp_insn, insn_type)); ++ if (!base_reg) ++ return; + +- for (df_link *def = defs; def; def = def->next) +- { +- df_link *uses +- = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref)); +- if (!uses) +- continue; ++ df_link *defs = get_defs (ldp_insn, base_reg); ++ if (!defs) ++ return; + +- for (df_link *use = uses; use; use = use->next) +- { +- if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR +- && is_store_insn (DF_REF_INSN (use->ref))) +- dependent_stores_candidates.insert (DF_REF_INSN (use->ref)); +- } ++ for (df_link *def = defs; def; def = def->next) ++ { ++ df_link *uses = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref)); ++ if (!uses) ++ continue; ++ for (df_link *use = uses; use; use = use->next) ++ { ++ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR ++ && DF_REF_INSN (use->ref) != ldp_insn ++ && is_store_reg_dependent (ldp_insn, DF_REF_INSN (use->ref))) ++ dependent_stores_candidates.insert (DF_REF_INSN (use->ref)); + } + } + } +@@ -4217,423 +4323,274 @@ bool + pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn, + rtx_insn *str_insn) + { +- if (!is_store_insn (str_insn) +- || dependent_stores_candidates.find (str_insn) +- == dependent_stores_candidates.end ()) ++ if (!str_insn) + return false; + +- int ldp_offset_sign = UNDEFINED; +- int ldp_offset +- = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign); +- if (ldp_offset_sign == MINUS) +- ldp_offset = -ldp_offset; ++ mem_access_insn_t st_type = get_insn_type (str_insn); ++ if (!is_typeof_stp (st_type) && st_type != STR) ++ return false; + +- int str_offset_sign = UNDEFINED; +- int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign); +- if (str_offset_sign == MINUS) +- str_offset = -str_offset; ++ mem_access_insn_t ld_type = get_insn_type (ldp_insn); ++ rtx ld_memref = get_memref (ldp_insn, ld_type); ++ rtx st_memref = get_memref (str_insn, st_type); ++ rtx ld_base_reg = get_base_reg (ld_memref); ++ rtx st_base_reg = get_base_reg (st_memref); + +- if (str_offset == ldp_offset || str_offset == ldp_offset + 8) +- return true; ++ if (!ld_base_reg || !st_base_reg ++ || REGNO (ld_base_reg) != REGNO (st_base_reg)) ++ return false; + +- return false; +-} ++ int ld_offset = 0; ++ int st_offset = 0; ++ if (get_offset (ld_memref, ld_offset) ++ && get_offset (st_memref, st_offset)) ++ { ++ int ld_unit_size = get_unit_size (ld_memref, ld_type); ++ int st_size = get_unit_size (st_memref, st_type); ++ if (st_type != STR) ++ st_size *= 2; + +-bool +-pass_split_complex_instructions::is_store_insn (rtx_insn *insn) +-{ +- if (!insn) +- return false; +- rtx sset_b = single_set (insn); +- /* TODO: The condition below allow to take only store instructions in which +- the memory location's operand is either a register (base) or an plus/minus +- operation (base + #imm). So it might make sense to add support for other +- cases (e.g. multiply and shift). */ +- if (sset_b && MEM_P (SET_DEST (sset_b)) +- && GET_MODE (XEXP (sset_b, 0)) != BLKmode +- && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG +- || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS +- || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS) +- && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT))) +- return true; ++ if (ld_unit_size < 0 || st_size < 0) ++ return false; ++ ++ bool st_has_low_ld_part = (ld_offset >= st_offset ++ && (ld_offset + ld_unit_size <= st_offset + st_size)); ++ bool st_has_high_ld_part = ((ld_offset + ld_unit_size >= st_offset) ++ && (ld_offset + 2 * ld_unit_size <= st_offset + st_size)); ++ bool st_has_not_full_ld = (ld_offset < st_offset ++ || (ld_offset + 2 * ld_unit_size > st_offset + st_size)); ++ ++ if ((st_has_low_ld_part || st_has_high_ld_part) && st_has_not_full_ld) ++ return true; ++ } + + return false; + } + +-int +-pass_split_complex_instructions::get_insn_offset ( +- rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr) ++bool ++pass_split_complex_instructions::get_offset (rtx memref, int &offset) + { +- rtx insn_pat = PATTERN (insn); +- int returned_offset = 0; ++ rtx addr_exp = XEXP (memref, 0); + +- rtx offset_expr = NULL; +- rtx offset_value_expr = NULL; +- +- switch (insn_type) ++ switch (GET_CODE (addr_exp)) + { +- case LDP: +- { +- int number_of_sub_insns = XVECLEN (insn_pat, 0); +- +- /* Calculate it's own ofsset of first load insn. */ +- rtx_insn *first_load_insn = NULL; +- if (number_of_sub_insns == 2) ++ case REG: ++ case POST_DEC: ++ case POST_INC: ++ offset = 0; ++ return true; ++ case PRE_DEC: ++ offset = -(GET_MODE_SIZE (GET_MODE (memref)).to_constant ()); ++ return true; ++ case PRE_INC: ++ offset = GET_MODE_SIZE (GET_MODE (memref)).to_constant (); ++ return true; ++ case PLUS: ++ if (CONST_INT_P (XEXP (addr_exp, 1))) + { +- first_load_insn +- = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0))); +- arith_operation_ptr = NULL; +- +- offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0); +- if (GET_CODE (offset_expr) == PLUS +- || GET_CODE (offset_expr) == MINUS) +- offset_value_expr +- = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1); +- else +- offset_expr = NULL; ++ offset = INTVAL (XEXP (addr_exp, 1)); ++ return true; + } +- else if (number_of_sub_insns == 3) +- { +- rtx_insn *offset_sub_insn +- = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0))); +- +- offset_expr = XEXP (PATTERN (offset_sub_insn), 1); +- offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1); +- } +- else +- { +- gcc_assert (false +- && "Wrong number of elements in the ldp_insn vector"); +- } +- break; +- } +- case LDP_TI: +- { +- offset_expr = XEXP (XEXP (insn_pat, 1), 0); +- if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS) +- return 0; +- offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1); +- break; +- } +- case STR: +- { +- offset_expr = XEXP (XEXP (insn_pat, 0), 0); +- /* If memory location is specified by single base register then the +- offset is zero. */ +- if (GET_CODE (offset_expr) == REG) +- return 0; +- offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1); +- break; +- } +- default: +- { +- if (dumps_are_enabled && dump_file) +- { +- fprintf (dump_file, "Instruction that was tried to split:\n"); +- print_rtl_single (dump_file, insn); +- } +- gcc_assert (false && "Unsupported instruction type"); +- break; +- } +- } +- +- if (offset_expr != NULL && offset_value_expr +- && GET_CODE (offset_value_expr) == CONST_INT) +- returned_offset = XINT (offset_value_expr, 0); +- +- if (arith_operation_ptr != NULL) +- { +- *arith_operation_ptr = GET_CODE (offset_expr); +- gcc_assert ((*arith_operation_ptr == MINUS +- || *arith_operation_ptr == PLUS) +- && "Unexpected arithmetic operation in the offset expr"); ++ default: ++ return false; + } +- +- return returned_offset; + } + + void +-pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn) ++pass_split_complex_instructions::replace_insn (rtx_insn *old_insn, ++ rtx_insn *new_insns) + { +- rtx pat = PATTERN (ldp_insn); +- +- rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0))); +- rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1))); +- +- int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1))); +- int src_regno; +- +- rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0); +- +- if (GET_CODE (srs_reg_insn) == REG) +- src_regno = REGNO (srs_reg_insn); +- else +- src_regno = REGNO (XEXP (srs_reg_insn, 0)); +- +- rtx_insn *emited_insn_1, *emited_insn_2; ++ rtx_insn *prev_insn = PREV_INSN (old_insn); ++ start_sequence (); + +- /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */ +- if (src_regno == dest_regno) +- std::swap (mem_insn_1, mem_insn_2); ++ emit_insn (new_insns); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Split LDP:\n"); ++ print_rtl_single (dump_file, old_insn); ++ fprintf (dump_file, "Split into:\n"); ++ } + +- emited_insn_1 = emit_insn (PATTERN (mem_insn_1)); +- emited_insn_2 = emit_insn (PATTERN (mem_insn_2)); ++ for (rtx_insn *insn = new_insns; insn; insn = NEXT_INSN (insn)) ++ { ++ INSN_CODE (insn) = recog (PATTERN (insn), insn, NULL); ++ if (dump_file) ++ { ++ print_rtl_single (dump_file, insn); ++ } ++ } + +- int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0); +- int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0); ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); + +- INSN_CODE (emited_insn_1) = sub_insn_1_code; +- INSN_CODE (emited_insn_2) = sub_insn_2_code; ++ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (old_insn)); ++ delete_insn_and_edges (old_insn); + } + + void +-pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn) ++pass_split_complex_instructions::split_ldp (rtx_insn *ldp_insn) + { + rtx pat = PATTERN (ldp_insn); +- bool post_index = true; +- +- rtx_insn offset_insn; +- rtx_insn mem_insn_1; +- rtx_insn mem_insn_2; ++ mem_access_insn_t insn_type = get_insn_type (ldp_insn); ++ gcc_assert (is_typeof_ldp (insn_type)); + +- int offset_insn_code; +- int mem_insn_1_code = -1; +- int mem_insn_2_code = -1; ++ rtx load_rtx_1 = NULL; ++ rtx load_rtx_2 = NULL; ++ rtx post_index_rtx = NULL; + +- int offset = 0; +- int arith_operation = UNDEFINED; +- +- for (int i = 0; i < 3; i++) ++ switch (insn_type) + { +- rtx sub_insn = XVECEXP (pat, 0, i); +- rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn)); +- int sub_insn_code +- = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0); +- +- /* If sub_insn is offset related. */ +- if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY) +- { +- offset_insn = *copy_of_sub_insn; +- offset_insn_code = sub_insn_code; +- gcc_assert (i == 0 +- && "Offset related insn must be the first " +- "element of a parallel insn vector"); +- +- offset = get_insn_offset (ldp_insn, LDP, &arith_operation); +- } +- else +- { +- if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG) +- { +- rtx &offset_expr +- = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1); +- if (GET_CODE (offset_expr) == CONST_INT) +- { +- int local_offset = XINT (offset_expr, 0); +- offset = (arith_operation == PLUS ? offset : -offset); +- +- offset_expr = GEN_INT (local_offset + offset); +- +- gcc_assert ( +- (arith_operation == MINUS || arith_operation == PLUS) +- && "Unexpected arithmetic operation in offset related " +- "sub_insn"); +- +- if (i == 1) +- post_index = false; +- } +- else +- { +- post_index = true; +- } +- } +- } +- if (i == 1) +- { +- mem_insn_1 = *copy_of_sub_insn; +- mem_insn_1_code = sub_insn_code; +- } +- if (i == 2) +- { +- mem_insn_2 = *copy_of_sub_insn; +- mem_insn_2_code = sub_insn_code; +- } ++ case LDP: ++ load_rtx_1 = copy_rtx (XVECEXP (pat, 0, 0)); ++ load_rtx_2 = copy_rtx (XVECEXP (pat, 0, 1)); ++ break; ++ case LDP_WB: ++ post_index_rtx = copy_rtx (XVECEXP (pat, 0, 0)); ++ load_rtx_1 = copy_rtx (XVECEXP (pat, 0, 1)); ++ load_rtx_2 = copy_rtx (XVECEXP (pat, 0, 2)); ++ break; ++ case LDP_TI: ++ split_ldp_ti (ldp_insn); ++ return; ++ default: ++ return; + } +- gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1 +- && "Uninitialized memory insns"); + +- int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1))); +- int src_regno; +- +- rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0); +- +- if (GET_CODE (srs_reg_insn) == REG) +- src_regno = REGNO (srs_reg_insn); +- else +- src_regno = REGNO (XEXP (srs_reg_insn, 0)); ++ int dest_regno = REGNO (SET_DEST (load_rtx_1)); ++ int base_regno = REGNO (get_base_reg (get_memref (ldp_insn, insn_type))); + +- /* Don't split such weird LDP. */ +- if (src_regno == dest_regno) +- return; +- +- rtx_insn *emited_offset_insn; +- if (!post_index) ++ /* In cases like ldp r1,r2,[r1[, #imm]] emit ldr r2,[r1[, #imm]] first. ++ For LDP with post-index don't split such instruction. */ ++ if (base_regno == dest_regno) + { +- emited_offset_insn = emit_insn (PATTERN (&offset_insn)); +- INSN_CODE (emited_offset_insn) = offset_insn_code; ++ if (insn_type == LDP) ++ std::swap (load_rtx_1, load_rtx_2); ++ else ++ return; + } + +- rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1)); +- rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2)); +- +- +- INSN_CODE (emited_insn_1) = mem_insn_1_code; +- INSN_CODE (emited_insn_2) = mem_insn_2_code; +- +- if (post_index) ++ /* Construct the instruction chain for subsequent emitting. */ ++ rtx_insn *insn_seq = make_insn_raw (load_rtx_1); ++ rtx_insn *load_insn_2 = make_insn_raw (load_rtx_2); ++ SET_NEXT_INSN (insn_seq) = load_insn_2; ++ SET_NEXT_INSN (load_insn_2) = NULL; ++ if (post_index_rtx) + { +- emited_offset_insn = emit_insn (PATTERN (&offset_insn)); +- INSN_CODE (emited_offset_insn) = offset_insn_code; ++ rtx_insn *post_index_insn = make_insn_raw (post_index_rtx); ++ SET_NEXT_INSN (load_insn_2) = post_index_insn; ++ SET_NEXT_INSN (post_index_insn) = NULL; + } +-} +- +-void +-pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn) +-{ +- rtx_insn *prev_insn = PREV_INSN (insn); +- int number_of_sub_insns = XVECLEN (PATTERN (insn), 0); +- +- start_sequence (); + +- if (number_of_sub_insns == 2) +- split_simple_ldp (insn); +- else if (number_of_sub_insns == 3) +- split_ldp_with_offset (insn); +- else +- gcc_assert (false && "Broken complex insn vector"); +- +- rtx_insn *seq = get_insns (); +- unshare_all_rtl_in_chain (seq); +- end_sequence (); +- +- emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn)); +- delete_insn_and_edges (insn); ++ replace_insn (ldp_insn, insn_seq); + } + + void + pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn) + { +- rtx_insn *prev_insn = PREV_INSN (insn); +- rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn))); +- rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn))); +- +- rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0); +- rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1); +- rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1); +- +- PUT_MODE (mem_insn_1, DImode); +- PUT_MODE (mem_insn_2, DImode); +- +- int reg_no_1 = REGNO (reg_insn_1); ++ rtx pat = PATTERN (insn); ++ rtx memref = get_memref (insn, LDP_TI); ++ int unit_size = get_unit_size (memref, LDP_TI); ++ rtx base_reg = get_base_reg (memref); ++ rtx dest_reg = SET_DEST (pat); ++ ++ rtx reg_index_rtx = NULL; ++ rtx load_rtx_1 = NULL; ++ rtx load_rtx_2 = NULL; ++ bool post_index = false; ++ int offset = 0; + +- XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1); +- XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1); ++ rtx load_1_memref = gen_rtx_MEM (DImode, base_reg); + +- rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0); +- if (GET_CODE (load_insn_2_plus_expr) == REG) ++ rtx addr_expr = XEXP (memref, 0); ++ if (GET_CODE (addr_expr) == PLUS) + { +- XEXP (XEXP (PATTERN (load_insn_2), 1), 0) +- = gen_rtx_PLUS (DImode, +- gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)), +- GEN_INT (GET_MODE_SIZE (DImode))); ++ offset = INTVAL (XEXP (addr_expr, 1)); ++ XEXP (load_1_memref, 0) = gen_rtx_PLUS (DImode, base_reg, ++ GEN_INT (offset)); + } +- else +- { +- rtx load_insn_2_offset_expr +- = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1); + +- if (load_insn_2_offset_expr == NULL) +- return; +- +- if (GET_CODE (load_insn_2_offset_expr) == CONST_INT) +- { +- int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0); +- XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1) +- = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode)); +- } +- } +- +- start_sequence (); ++ rtx load_2_memref = gen_rtx_MEM (DImode, ++ gen_rtx_PLUS (DImode, base_reg, GEN_INT (offset + unit_size))); + +- int src_regno; +- rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0); ++ load_rtx_1 = gen_rtx_SET (gen_rtx_REG (DImode, REGNO (dest_reg)), ++ load_1_memref); ++ load_rtx_2 = gen_rtx_SET (gen_rtx_REG (DImode, REGNO (dest_reg) + 1), ++ load_2_memref); + +- if (GET_CODE (srs_reg_insn) == REG) +- src_regno = REGNO (srs_reg_insn); +- else +- src_regno = REGNO (XEXP (srs_reg_insn, 0)); ++ if (GET_CODE (addr_expr) == PRE_INC || GET_CODE (addr_expr) == PRE_DEC ++ || GET_CODE (addr_expr) == POST_INC || GET_CODE (addr_expr) == POST_DEC) ++ { ++ /* The amount of increment or decrement is equal to size of ++ machine-mode of the containing MEMREF (see rtl.def). */ ++ int index_offset = GET_MODE_SIZE (GET_MODE (memref)).to_constant (); + +- /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */ +- if (src_regno == reg_no_1) +- std::swap (load_insn_1, load_insn_2); ++ if (GET_CODE (addr_expr) == PRE_DEC || GET_CODE (addr_expr) == POST_DEC) ++ index_offset = -index_offset; + +- rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1)); +- rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2)); ++ if (GET_CODE (addr_expr) == POST_INC || GET_CODE (addr_expr) == POST_DEC) ++ post_index = true; + +- INSN_CODE (emited_load_insn_1) +- = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0); +- INSN_CODE (emited_load_insn_2) +- = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0); ++ reg_index_rtx = gen_rtx_SET (base_reg, ++ gen_rtx_PLUS (DImode, base_reg, ++ GEN_INT (index_offset))); ++ } + +- rtx_insn *seq = get_insns (); +- unshare_all_rtl_in_chain (seq); +- end_sequence (); ++ /* In cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */ ++ if (REGNO (base_reg) == REGNO (dest_reg)) ++ std::swap (load_rtx_1, load_rtx_2); + +- emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn)); +- delete_insn_and_edges (insn); +-} ++ /* Construct the instruction chain for subsequent emitting. */ ++ rtx_insn *insn_seq = make_insn_raw (load_rtx_1); ++ rtx_insn *load_insn_2 = make_insn_raw (load_rtx_2); ++ SET_NEXT_INSN (insn_seq) = load_insn_2; ++ SET_NEXT_INSN (load_insn_2) = NULL; ++ if (post_index && reg_index_rtx) ++ { ++ rtx_insn *post_index_insn = make_insn_raw (reg_index_rtx); ++ SET_NEXT_INSN (load_insn_2) = post_index_insn; ++ SET_NEXT_INSN (post_index_insn) = NULL; ++ } ++ else if (!post_index && reg_index_rtx) ++ { ++ rtx_insn *pre_index = make_insn_raw (reg_index_rtx); ++ SET_NEXT_INSN (pre_index) = insn_seq; ++ insn_seq = pre_index; ++ } + +-void +-pass_split_complex_instructions::split_complex_insn (rtx_insn *insn) +-{ +- complex_instructions_t insn_type = get_insn_type (insn); +- /* TODO: Add splitting of STP instructions. */ +- if (insn_type == LDP || insn_type == STP) +- split_ldp_stp (insn); +- else if (insn_type == LDP_TI) +- split_ldp_ti (insn); +- else +- gcc_assert (false && "Unsupported type of insn to split"); ++ replace_insn (insn, insn_seq); + } + +-pass_split_complex_instructions::complex_instructions_t ++pass_split_complex_instructions::mem_access_insn_t + pass_split_complex_instructions::get_insn_type (rtx_insn *insn) + { + if (!INSN_P (insn)) + return UNDEFINED; + +- rtx pat = PATTERN (insn); +- int icode = recog (PATTERN (insn), insn, NULL); ++ int icode = INSN_CODE (insn); ++ if (icode == -1) ++ icode = recog (PATTERN (insn), insn, 0); ++ bool has_wb = false; ++ ++ if (targetm.is_ldp_insn (icode, &has_wb)) ++ return (has_wb ? LDP_WB : LDP); + +- if (GET_CODE (pat) == PARALLEL) ++ if (targetm.is_stp_insn (icode, &has_wb)) ++ return (has_wb ? STP_WB : STP); ++ ++ rtx set_insn = single_set (insn); ++ if (set_insn && (GET_MODE (SET_SRC (set_insn)) == E_TImode ++ || GET_MODE (SET_DEST (set_insn)) == E_TImode)) + { +- if (targetm.is_ldp_insn (icode)) +- { +- return LDP; +- } +- if (targetm.is_stp_insn (icode)) +- { +- return STP; +- } +- else +- { +- return UNDEFINED; +- } ++ if (MEM_P (SET_SRC (set_insn)) && REG_P (SET_DEST (set_insn))) ++ return LDP_TI; ++ if (MEM_P (SET_DEST (set_insn)) && REG_P (SET_SRC (set_insn))) ++ return STP_TI; + } +- rtx set_insn = single_set (insn); +- if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM +- && GET_MODE (XEXP (set_insn, 1)) == E_TImode) +- return LDP_TI; ++ ++ if (set_insn && MEM_P (SET_DEST (set_insn)) && REG_P (SET_SRC (set_insn)) ++ && GET_MODE (SET_DEST (set_insn)) != BLKmode) ++ return STR; + + return UNDEFINED; + } +diff --git a/gcc/target.def b/gcc/target.def +index a3a50b474..8797a21d5 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2679,13 +2679,19 @@ modes and they have different conditional execution capability, such as ARM.", + + DEFHOOK + (is_ldp_insn, +- "Return true if icode is corresponding to any of the LDP instruction types.", +- bool, (int icode), NULL) ++ "Return true if @var{icode} is corresponding to any of the LDP instruction\n\ ++types. If @var{has_wb} is not NULL then its value is set to true if LDP\n\ ++contains post-index or pre-index operation.", ++ bool, (int icode, bool *has_wb), ++ NULL) + + DEFHOOK + (is_stp_insn, +- "Return true if icode is corresponding to any of the STP instruction types.", +- bool, (int icode), NULL) ++ "Return true if @var{icode} is corresponding to any of the STP instruction\n\ ++types. If @var{has_wb} is not NULL then its value is set to true if STP\n\ ++contains post-index or pre-index operation.", ++ bool, (int icode, bool *has_wb), ++ NULL) + + DEFHOOK + (gen_ccmp_first, +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c +index 3918d43f6..2d42231dc 100644 +--- a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c +@@ -1,5 +1,5 @@ + /* { dg-do compile { target aarch64-*-* } } */ +-/* { dg-additional-options "-fsplit-ldp-stp" } */ ++/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */ + /* + * Tests are: + * Patterns where LDP insns should NOT be split +@@ -15,6 +15,9 @@ simple_ldp_after_store () + (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cinsn 228 (set (reg/i:DI sp) + (reg/i:DI x0))) ++ (cinsn 238 (set (reg/i:DI x1) ++ (reg/i:DI x0))) ++ + (cinsn 101 (set (mem/c:DI + (plus:DI (reg/f:DI sp) + (const_int 32))[1 S4 A32])(reg:DI x0))) +@@ -24,11 +27,27 @@ simple_ldp_after_store () + (set (reg:DI x30) + (mem:DI (plus:DI (reg/f:DI sp) + (const_int 16)) [1 S4 A32]))])) +- (cinsn 11 (use (reg/i:DI sp))) +- (cinsn 12 (use (reg/i:DI cc))) +- (cinsn 13 (use (reg/i:DI x29))) +- (cinsn 14 (use (reg/i:DI x30))) +- (cinsn 15 (use (reg/i:DI x0))) ++ (cinsn 11 (use (reg/i:DI x29))) ++ (cinsn 12 (use (reg/i:DI x30))) ++ ++ /* stp x0, x2, [x1]. */ ++ (cinsn 102 (parallel [ ++ (set (mem:DI (reg/f:DI x1) [1 S4 A32]) ++ (reg:DI x0)) ++ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32]) ++ (reg:DI x2))])) ++ /* ldp x5, x6, [x1]. */ ++ (cinsn 13 (parallel [ ++ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32])) ++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 8)) [1 S4 A32])) ++ ])) ++ (cinsn 14 (use (reg/i:DI x5))) ++ (cinsn 15 (use (reg/i:DI x6))) ++ ++ (cinsn 100 (use (reg/i:DI sp))) ++ (cinsn 200 (use (reg/i:DI cc))) ++ (cinsn 300 (use (reg/i:DI x0))) + (edge-to exit (flags "FALLTHRU")) + ) ;; block 2 + ) ;; insn-chain +@@ -70,5 +89,5 @@ ldp_after_store_in_different_bb () + ) ;; function "ldp_after_store_in_different_bb" + } + +-/* Verify that the output code contains exactly 2 ldp. */ +-/* { dg-final { scan-assembler-times {ldp\t} 2 } } */ +\ No newline at end of file ++/* Verify that the output code contains exactly 3 ldp. */ ++/* { dg-final { scan-assembler-times {ldp\t} 3 } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c +index 653c30f83..59ff82df9 100644 +--- a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c +@@ -1,5 +1,5 @@ + /* { dg-do compile { target aarch64-*-* } } */ +-/* { dg-additional-options "-fsplit-ldp-stp" } */ ++/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */ + /* + * Test is: + * Pattern where LDP insns should be split with rearrangement in order +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c +index dc9f26efb..e25762160 100644 +--- a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c +@@ -13,48 +13,131 @@ simple_ldp_after_store () + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ /* mov sp, x0. */ + (cinsn 228 (set (reg/i:DI sp) +- (reg/i:DI x0))) ++ (reg/i:DI x0))) ++ /* mov x1, x0. */ + (cinsn 238 (set (reg/i:DI x1) +- (reg/i:DI x0))) ++ (reg/i:DI x0))) + ++ /* str x0, [sp, 8]. */ + (cinsn 101 (set (mem/c:DI + (plus:DI (reg/f:DI sp) + (const_int 8))[1 S4 A32])(reg:DI x0))) ++ /* ldp x29, x30, [sp, 8]. */ + (cinsn 10 (parallel [ + (set (reg:DI x29) + (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32])) + (set (reg:DI x30) + (mem:DI (plus:DI (reg/f:DI sp) + (const_int 16)) [1 S4 A32]))])) ++ (cinsn 11 (use (reg/i:DI x29))) ++ (cinsn 12 (use (reg/i:DI x30))) + ++ /* str x0, [x1, -16]. */ + (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1) + (const_int -16)) [1 S4 A32]) + (reg:DI x0))) +- (cinsn 11 (parallel [ ++ /* ldp x3, x4, [x1, -16]. */ ++ (cinsn 13 (parallel [ + (set (reg:DI x3) + (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32])) + (set (reg:DI x4) + (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32])) + ])) ++ (cinsn 14 (use (reg/i:DI x3))) ++ (cinsn 15 (use (reg/i:DI x4))) + ++ /* str x0, [x1]. */ + (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32]) + (reg:DI x0))) +- (cinsn 12 (parallel [ ++ /* ldp x5, x6, [x1]. */ ++ (cinsn 16 (parallel [ + (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32])) + (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1) + (const_int 8)) [1 S4 A32])) + ])) ++ (cinsn 17 (use (reg/i:DI x5))) ++ (cinsn 18 (use (reg/i:DI x6))) + +- (cinsn 13 (use (reg/i:DI sp))) +- (cinsn 14 (use (reg/i:DI cc))) +- (cinsn 15 (use (reg/i:DI x29))) +- (cinsn 16 (use (reg/i:DI x30))) +- (cinsn 17 (use (reg/i:DI x0))) +- (cinsn 18 (use (reg/i:DI x3))) +- (cinsn 19 (use (reg/i:DI x4))) +- (cinsn 20 (use (reg/i:DI x5))) +- (cinsn 21 (use (reg/i:DI x6))) ++ /* ldp x29, x30, [sp], 96. */ ++ (cinsn 19 (parallel [ ++ (set (reg/f:DI sp) ++ (plus:DI (reg/f:DI sp) (const_int 96))) ++ (set (reg:DI x29) ++ (mem:DI (reg/f:DI sp) [1 S4 A32])) ++ (set (reg:DI x30) ++ (mem:DI (plus:DI (reg/f:DI sp) ++ (const_int 8)) [1 S4 A32]))])) ++ (cinsn 20 (use (reg/i:DI x29))) ++ (cinsn 21 (use (reg/i:DI x30))) ++ ++ /* stp x0, x2, [x1, 128]. */ ++ (cinsn 104 (parallel [ ++ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 128)) [1 S4 A32]) ++ (reg:DI x0)) ++ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 136)) [1 S4 A32]) ++ (reg:DI x2))])) ++ /* ldp x29, x30, [x1, 120]. */ ++ (cinsn 22 (parallel [ ++ (set (reg:DI x29) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int 120)) [1 S4 A32])) ++ (set (reg:DI x30) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int 128)) [1 S4 A32]))])) ++ (cinsn 23 (use (reg/i:DI x29))) ++ (cinsn 24 (use (reg/i:DI x30))) ++ ++ /* stp x0, x2, [x1, 128]. */ ++ (cinsn 105 (parallel [ ++ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 128)) [1 S4 A32]) ++ (reg:DI x0)) ++ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 136)) [1 S4 A32]) ++ (reg:DI x2))])) ++ /* ldp x3, x4, [x1, 136]. */ ++ (cinsn 25 (parallel [ ++ (set (reg:DI x3) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int 136)) [1 S4 A32])) ++ (set (reg:DI x4) ++ (mem:DI (plus:DI (reg/f:DI x1) (const_int 144)) [1 S4 A32])) ++ ])) ++ (cinsn 26 (use (reg/i:DI x3))) ++ (cinsn 27 (use (reg/i:DI x4))) ++ ++ /* stp w0, w2, [x1, 32]. */ ++ (cinsn 106 (parallel [ ++ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 32)) [1 S4 A32]) ++ (reg:SI x0)) ++ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 36)) [1 S4 A32]) ++ (reg:SI x2))])) ++ /* ldp x5, x6, [x1, 32]. */ ++ (cinsn 28 (parallel [ ++ (set (reg:DI x5) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 32)) [1 S4 A32])) ++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 40)) [1 S4 A32])) ++ ])) ++ (cinsn 29 (use (reg/i:DI x5))) ++ (cinsn 30 (use (reg/i:DI x6))) ++ ++ /* stp w0, w2, [x1, 40]. */ ++ (cinsn 107 (parallel [ ++ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 40)) [1 S4 A32]) ++ (reg:SI x0)) ++ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 44)) [1 S4 A32]) ++ (reg:SI x2))])) ++ /* ldp x5, x6, [x1, 32]. */ ++ (cinsn 31 (parallel [ ++ (set (reg:DI x5) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 32)) [1 S4 A32])) ++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1) ++ (const_int 40)) [1 S4 A32])) ++ ])) ++ (cinsn 32 (use (reg/i:DI x5))) ++ (cinsn 33 (use (reg/i:DI x6))) ++ ++ (cinsn 100 (use (reg/i:DI sp))) ++ (cinsn 200 (use (reg/i:DI cc))) ++ (cinsn 400 (use (reg/i:DI x0))) + (edge-to exit (flags "FALLTHRU")) + ) ;; block 2 + ) ;; insn-chain +@@ -69,43 +152,83 @@ ldp_ti_after_store () + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ /* mov sp, x0. */ + (cinsn 228 (set (reg/i:DI sp) +- (reg/i:DI x0))) ++ (reg/i:DI x0))) ++ /* mov x2, x0. */ + (cinsn 238 (set (reg/i:DI x2) +- (reg/i:DI x0))) +- ++ (reg/i:DI x0))) ++ /* str x0, [sp, 136]. */ + (cinsn 101 (set (mem/c:DI + (plus:DI (reg/f:DI sp) + (const_int 136))[1 S4 A32])(reg:DI x0))) +- (insn 81 (set (reg:TI x0 [1 S4 A32]) ++ /* ldp x0, x1, [sp, 136]. */ ++ (cinsn 81 (set (reg:TI x0 [1 S4 A32]) + (mem/c:TI (plus:DI (reg/f:DI sp) +- (const_int 136 )) [1 S4 A32])) +- (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp) +- (const_int -24 )) [1 S4 A32]) +- (nil))) +- ++ (const_int 136)) [1 S4 A32]))) ++ /* str x0, [x2, -16]. */ + (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2) +- (const_int -16)) [1 S4 A32]) ++ (const_int -16)) [1 S4 A32]) + (reg:DI x0))) +- (insn 82 (set (reg:TI x3 [1 S4 A32]) ++ /* ldp x3, x4, [x2, -16]. */ ++ (cinsn 82 (set (reg:TI x3 [1 S4 A32]) + (mem/c:TI (plus:DI (reg/f:DI x2) +- (const_int -16)) [1 S4 A32]))) +- ++ (const_int -16)) [1 S4 A32]))) ++ /* str x0, [x2]. */ + (cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32]) + (reg:DI x0))) +- (insn 83 (set (reg:TI x5 [1 S4 A32]) ++ /* ldp x5, x6, [x2]. */ ++ (cinsn 83 (set (reg:TI x5 [1 S4 A32]) + (mem/c:TI (reg/f:DI x2) [1 S4 A32]))) + ++ /* stp x0, x1, [sp, -8]. */ ++ (cinsn 104 (set (mem:TI (plus:DI (reg/v/f:DI sp) ++ (const_int -8)) [1 S4 A32]) ++ (reg:TI x0))) ++ /* ldp x5, x6, [sp], -16. */ ++ (cinsn 84 (set (reg/v:TI x5 [1 S4 A32]) ++ (mem:TI (post_dec:DI (reg/v/f:DI sp)) [1 S4 A32]))) ++ (cinsn 85 (use (reg/i:DI x5))) ++ (cinsn 86 (use (reg/i:DI x6))) ++ ++ /* stp x0, x1, [sp, 8]. */ ++ (cinsn 105 (set (mem:TI (plus:DI (reg/v/f:DI sp) ++ (const_int 8)) [1 S4 A32]) ++ (reg:TI x0))) ++ /* ldp x5, x6, [sp], -16. */ ++ (cinsn 87 (set (reg/v:TI x5 [1 S4 A32]) ++ (mem:TI (post_dec:DI (reg/v/f:DI sp)) [1 S4 A32]))) ++ (cinsn 88 (use (reg/i:DI x5))) ++ (cinsn 89 (use (reg/i:DI x6))) ++ ++ /* Intersects with insn 102. */ ++ /* ldp x2, x3, [x2, -16]!. */ ++ (cinsn 90 (set (reg/v:TI x2 [1 S4 A32]) ++ (mem:TI (pre_dec:DI (reg/v/f:DI x2)) [1 S4 A32]))) ++ (cinsn 91 (use (reg/i:DI x2))) ++ (cinsn 92 (use (reg/i:DI x3))) ++ ++ /* mov x2, x0. */ ++ (cinsn 248 (set (reg/i:DI x2) ++ (reg/i:DI x0))) ++ /* str x0, [x2, 16]. */ ++ (cinsn 106 (set (mem:DI (plus:DI (reg/v/f:DI x2) ++ (const_int 16)) [1 S4 A32]) ++ (reg:DI x0))) ++ /* ldp x3, x4, [x2, 16]!. */ ++ (cinsn 93 (set (reg/v:TI x3 [1 S4 A32]) ++ (mem:TI (pre_inc:DI (reg/v/f:DI x2)) [1 S4 A32]))) ++ (cinsn 94 (use (reg/i:DI x3))) ++ (cinsn 95 (use (reg/i:DI x4))) ++ + (cinsn 11 (use (reg/i:DI sp))) + (cinsn 12 (use (reg/i:DI cc))) + (cinsn 13 (use (reg/i:DI x29))) + (cinsn 14 (use (reg/i:DI x30))) + (cinsn 15 (use (reg/i:DI x0))) + (cinsn 16 (use (reg/i:DI x3))) +- (cinsn 17 (use (reg/i:DI x5))) + (cinsn 18 (use (reg/i:DI x1))) + (cinsn 19 (use (reg/i:DI x4))) +- (cinsn 20 (use (reg/i:DI x6))) + (edge-to exit (flags "FALLTHRU")) + ) ;; block 2 + ) ;; insn-chain +-- +2.33.0 + |