summaryrefslogtreecommitdiff
path: root/0042-Add-split-complex-instructions-pass.patch
diff options
context:
space:
mode:
Diffstat (limited to '0042-Add-split-complex-instructions-pass.patch')
-rw-r--r--0042-Add-split-complex-instructions-pass.patch1245
1 files changed, 1245 insertions, 0 deletions
diff --git a/0042-Add-split-complex-instructions-pass.patch b/0042-Add-split-complex-instructions-pass.patch
new file mode 100644
index 0000000..b73affd
--- /dev/null
+++ b/0042-Add-split-complex-instructions-pass.patch
@@ -0,0 +1,1245 @@
+From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001
+From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com>
+Date: Mon, 21 Aug 2023 12:35:19 +0300
+Subject: [PATCH 09/18] Add split-complex-instructions pass
+
+ - Add option -fsplit-ldp-stp
+ - Add functionality to detect and split depended from store LDP instructions.
+ - Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range
+ - Add RTL tests
+
+Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
+Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com>
+Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
+---
+ gcc/common.opt | 5 +
+ gcc/config/aarch64/aarch64.cc | 42 ++
+ gcc/doc/tm.texi | 8 +
+ gcc/doc/tm.texi.in | 4 +
+ gcc/params.opt | 3 +
+ gcc/passes.def | 1 +
+ gcc/sched-rgn.cc | 704 +++++++++++++++++-
+ gcc/target.def | 10 +
+ .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 74 ++
+ .../rtl/aarch64/test-ldp-split-rearrange.c | 40 +
+ .../gcc.dg/rtl/aarch64/test-ldp-split.c | 174 +++++
+ gcc/timevar.def | 1 +
+ gcc/tree-pass.h | 1 +
+ 13 files changed, 1066 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index a42bee250..c0e3f5687 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1797,6 +1797,11 @@ floop-nest-optimize
+ Common Var(flag_loop_nest_optimize) Optimization
+ Enable the loop nest optimizer.
+
++fsplit-ldp-stp
++Common Var(flag_split_ldp_stp) Optimization
++Split load/store pair instructions into separate load/store operations
++for better performance.
++
+ fstrict-volatile-bitfields
+ Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization
+ Force bitfield accesses to match their type width.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 04072ca25..48e2eded0 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void)
+
+ #endif /* #if CHECKING_P */
+
++/* TODO: refuse to use ranges intead of full list of an instruction codes. */
++
++bool
++is_aarch64_ldp_insn (int icode)
++{
++ if ((icode >= CODE_FOR_load_pair_sw_sisi
++ && icode <= CODE_FOR_load_pair_dw_tftf)
++ || (icode >= CODE_FOR_loadwb_pairsi_si
++ && icode <= CODE_FOR_loadwb_pairtf_di)
++ || (icode >= CODE_FOR_load_pairv8qiv8qi
++ && icode <= CODE_FOR_load_pairdfdf)
++ || (icode >= CODE_FOR_load_pairv16qiv16qi
++ && icode <= CODE_FOR_load_pairv8bfv2df)
++ || (icode >= CODE_FOR_load_pair_lanesv8qi
++ && icode <= CODE_FOR_load_pair_lanesdf))
++ return true;
++ return false;
++}
++
++bool
++is_aarch64_stp_insn (int icode)
++{
++ if ((icode >= CODE_FOR_store_pair_sw_sisi
++ && icode <= CODE_FOR_store_pair_dw_tftf)
++ || (icode >= CODE_FOR_storewb_pairsi_si
++ && icode <= CODE_FOR_storewb_pairtf_di)
++ || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
++ && icode <= CODE_FOR_vec_store_pairdfdf)
++ || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
++ && icode <= CODE_FOR_vec_store_pairv8bfv2df)
++ || (icode >= CODE_FOR_store_pair_lanesv8qi
++ && icode <= CODE_FOR_store_pair_lanesdf))
++ return true;
++ return false;
++}
++
++#undef TARGET_IS_LDP_INSN
++#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
++
++#undef TARGET_IS_STP_INSN
++#define TARGET_IS_STP_INSN is_aarch64_stp_insn
++
+ #undef TARGET_STACK_PROTECT_GUARD
+ #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
+
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index c5006afc0..0c6415a9c 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export
+ lists.
+ @end defmac
+
++@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
++Return true if icode is corresponding to any of the LDP instruction types.
++@end deftypefn
++
++@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
++Return true if icode is corresponding to any of the STP instruction types.
++@end deftypefn
++
+ @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
+ This target hook returns @code{true} past the point in which new jump
+ instructions could be created. On machines that require a register for
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index f869ddd5e..6ff60e562 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export
+ lists.
+ @end defmac
+
++@hook TARGET_IS_LDP_INSN
++
++@hook TARGET_IS_STP_INSN
++
+ @hook TARGET_CANNOT_MODIFY_JUMPS_P
+
+ @hook TARGET_HAVE_CONDITIONAL_EXECUTION
+diff --git a/gcc/params.opt b/gcc/params.opt
+index 7fcc2398d..6176d4790 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
+ Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
+ Target size of compressed pointer, which should be 8, 16 or 32.
+
++-param=param-ldp-dependency-search-range=
++Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization
++Range for depended ldp search in split-ldp-stp path.
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 941bbadf0..a30e05688 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3. If not see
+ NEXT_PASS (pass_reorder_blocks);
+ NEXT_PASS (pass_leaf_regs);
+ NEXT_PASS (pass_split_before_sched2);
++ NEXT_PASS (pass_split_complex_instructions);
+ NEXT_PASS (pass_sched2);
+ NEXT_PASS (pass_stack_regs);
+ PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
+diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
+index a0dfdb788..b4df8bdc5 100644
+--- a/gcc/sched-rgn.cc
++++ b/gcc/sched-rgn.cc
+@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3. If not see
+ are actually scheduled. */
+
+ #include "config.h"
++#define INCLUDE_SET
++#define INCLUDE_VECTOR
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "dbgcnt.h"
+ #include "pretty-print.h"
+ #include "print-rtl.h"
++#include "cfgrtl.h"
+
+ /* Disable warnings about quoting issues in the pp_xxx calls below
+ that (intentionally) don't follow GCC diagnostic conventions. */
+@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt)
+ return new pass_sched_fusion (ctxt);
+ }
+
++namespace {
++
++/* Def-use analisys special functions implementation. */
++
++static struct df_link *
++get_defs (rtx_insn *insn, rtx reg)
++{
++ df_ref use;
++ struct df_link *ref_chain, *ref_link;
++
++ FOR_EACH_INSN_USE (use, insn)
++ {
++ if (GET_CODE (DF_REF_REG (use)) == SUBREG)
++ return NULL;
++ if (REGNO (DF_REF_REG (use)) == REGNO (reg))
++ break;
++ }
++
++ gcc_assert (use != NULL);
++
++ ref_chain = DF_REF_CHAIN (use);
++
++ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
++ {
++ /* Problem getting some definition for this instruction. */
++ if (ref_link->ref == NULL)
++ return NULL;
++ if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
++ return NULL;
++ /* As global regs are assumed to be defined at each function call
++ dataflow can report a call_insn as being a definition of REG.
++ But we can't do anything with that in this pass so proceed only
++ if the instruction really sets REG in a way that can be deduced
++ from the RTL structure. */
++ if (global_regs[REGNO (reg)]
++ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
++ return NULL;
++ }
++
++ return ref_chain;
++}
++
++static struct df_link *
++get_uses (rtx_insn *insn, rtx reg)
++{
++ df_ref def;
++ struct df_link *ref_chain, *ref_link;
++
++ FOR_EACH_INSN_DEF (def, insn)
++ if (REGNO (DF_REF_REG (def)) == REGNO (reg))
++ break;
++
++ gcc_assert (def != NULL && "Broken def-use analisys chain.");
++
++ ref_chain = DF_REF_CHAIN (def);
++
++ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
++ {
++ /* Problem getting some use for this instruction. */
++ if (ref_link->ref == NULL)
++ return NULL;
++ }
++
++ return ref_chain;
++}
++
++const pass_data pass_data_split_complex_instructions = {
++ RTL_PASS, /* Type. */
++ "split_complex_instructions", /* Name. */
++ OPTGROUP_NONE, /* Optinfo_flags. */
++ TV_SPLIT_CMP_INS, /* Tv_id. */
++ 0, /* Properties_required. */
++ 0, /* Properties_provided. */
++ 0, /* Properties_destroyed. */
++ 0, /* Todo_flags_start. */
++ (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */
++};
++
++class pass_split_complex_instructions : public rtl_opt_pass
++{
++private:
++ enum complex_instructions_t
++ {
++ UNDEFINED,
++ LDP,
++ LDP_TI,
++ STP,
++ STR
++ };
++
++ void split_complex_insn (rtx_insn *insn);
++ void split_ldp_ti (rtx_insn *insn);
++ void split_ldp_with_offset (rtx_insn *ldp_insn);
++ void split_simple_ldp (rtx_insn *ldp_insn);
++ void split_ldp_stp (rtx_insn *insn);
++ complex_instructions_t get_insn_type (rtx_insn *insn);
++
++ basic_block bb;
++ rtx_insn *insn;
++ std::set<rtx_insn *> dependent_stores_candidates;
++ std::set<rtx_insn *> ldp_to_split_list;
++
++ complex_instructions_t complex_insn_type = UNDEFINED;
++ bool is_store_insn (rtx_insn *insn);
++ bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb);
++ bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb,
++ rtx_insn *search_insn,
++ int search_range
++ = param_ldp_dependency_search_range);
++ bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn);
++ void init_df ();
++ void find_dependent_stores_candidates (rtx_insn *ldp_insn);
++ int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type,
++ int *arith_operation_ptr = NULL);
++
++public:
++ pass_split_complex_instructions (gcc::context *ctxt)
++ : rtl_opt_pass (pass_data_split_complex_instructions, ctxt)
++ {
++ }
++ /* opt_pass methods: */
++ virtual bool gate (function *);
++
++ virtual unsigned int
++ execute (function *)
++ {
++ enum rtx_code ldp_memref_code;
++ init_df ();
++ ldp_to_split_list.clear ();
++ FOR_EACH_BB_FN (bb, cfun)
++ {
++ FOR_BB_INSNS (bb, insn)
++ {
++ complex_instructions_t insn_type = get_insn_type (insn);
++ /* TODO: Add splitting of STP instructions. */
++ if (insn_type != LDP && insn_type != LDP_TI)
++ continue;
++ /* TODO: Currently support only ldp_ti and ldp with REG or
++ PLUS/MINUS offset expression. */
++ if (insn_type == LDP_TI)
++ {
++ ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1),
++ 0));
++ if (ldp_memref_code != REG && ldp_memref_code != PLUS
++ && ldp_memref_code != MINUS)
++ continue;
++ }
++ if (is_ldp_dependent_on_store (insn, bb))
++ {
++ ldp_to_split_list.insert (insn);
++ }
++ }
++ }
++
++ for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin ();
++ i != ldp_to_split_list.end (); ++i)
++ split_complex_insn (*i);
++
++ return 0;
++ }
++}; // class pass_split_complex_instructions
++
++bool
++pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn,
++ basic_block bb)
++{
++ find_dependent_stores_candidates (ldp_insn);
++ return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn);
++}
++
++bool
++pass_split_complex_instructions::bfs_for_reg_dependent_store (
++ rtx_insn *ldp_insn, basic_block search_bb, rtx_insn *search_insn,
++ int search_range)
++{
++ rtx_insn *current_search_insn = search_insn;
++
++ for (int i = search_range; i > 0; --i)
++ {
++ if (!current_search_insn)
++ return false;
++ bool checking_result
++ = is_store_reg_dependent (ldp_insn, current_search_insn);
++ if (checking_result)
++ {
++ if (dump_file)
++ {
++ fprintf (dump_file, "LDP to split:\n");
++ print_rtl_single (dump_file, ldp_insn);
++ fprintf (dump_file, "Found STR:\n");
++ print_rtl_single (dump_file, current_search_insn);
++ }
++ return true;
++ }
++ if (current_search_insn == BB_HEAD (search_bb))
++ {
++ /* Search in all parent BBs for the reg_dependent store. */
++ edge_iterator ei;
++ edge e;
++
++ FOR_EACH_EDGE (e, ei, search_bb->preds)
++ if (e->src->index != 0
++ && bfs_for_reg_dependent_store (ldp_insn, e->src,
++ BB_END (e->src), i - 1))
++ return true;
++ return false;
++ }
++ else
++ {
++ if (!active_insn_p (current_search_insn))
++ i++;
++ current_search_insn = PREV_INSN (current_search_insn);
++ }
++ }
++ return false;
++}
++
++void
++pass_split_complex_instructions::init_df ()
++{
++ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
++ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
++ df_mir_add_problem ();
++ df_live_add_problem ();
++ df_live_set_all_dirty ();
++ df_analyze ();
++ df_set_flags (DF_DEFER_INSN_RESCAN);
++}
++
++void
++pass_split_complex_instructions::find_dependent_stores_candidates (
++ rtx_insn *ldp_insn)
++{
++ dependent_stores_candidates.clear ();
++ df_ref use;
++
++ FOR_EACH_INSN_USE (use, ldp_insn)
++ {
++ df_link *defs = get_defs (ldp_insn, DF_REF_REG (use));
++ if (!defs)
++ return;
++
++ for (df_link *def = defs; def; def = def->next)
++ {
++ df_link *uses
++ = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref));
++ if (!uses)
++ continue;
++
++ for (df_link *use = uses; use; use = use->next)
++ {
++ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR
++ && is_store_insn (DF_REF_INSN (use->ref)))
++ dependent_stores_candidates.insert (DF_REF_INSN (use->ref));
++ }
++ }
++ }
++}
++
++bool
++pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn,
++ rtx_insn *str_insn)
++{
++ if (!is_store_insn (str_insn)
++ || dependent_stores_candidates.find (str_insn)
++ == dependent_stores_candidates.end ())
++ return false;
++
++ int ldp_offset_sign = UNDEFINED;
++ int ldp_offset
++ = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign);
++ if (ldp_offset_sign == MINUS)
++ ldp_offset = -ldp_offset;
++
++ int str_offset_sign = UNDEFINED;
++ int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign);
++ if (str_offset_sign == MINUS)
++ str_offset = -str_offset;
++
++ if (str_offset == ldp_offset || str_offset == ldp_offset + 8)
++ return true;
++
++ return false;
++}
++
++bool
++pass_split_complex_instructions::is_store_insn (rtx_insn *insn)
++{
++ if (!insn)
++ return false;
++ rtx sset_b = single_set (insn);
++ /* TODO: The condition below allow to take only store instructions in which
++ the memory location's operand is either a register (base) or an plus/minus
++ operation (base + #imm). So it might make sense to add support for other
++ cases (e.g. multiply and shift). */
++ if (sset_b && MEM_P (SET_DEST (sset_b))
++ && GET_MODE (XEXP (sset_b, 0)) != BLKmode
++ && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG
++ || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS
++ || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS)
++ && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT)))
++ return true;
++
++ return false;
++}
++
++int
++pass_split_complex_instructions::get_insn_offset (
++ rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr)
++{
++ rtx insn_pat = PATTERN (insn);
++ int returned_offset = 0;
++
++ rtx offset_expr = NULL;
++ rtx offset_value_expr = NULL;
++
++ switch (insn_type)
++ {
++ case LDP:
++ {
++ int number_of_sub_insns = XVECLEN (insn_pat, 0);
++
++ /* Calculate it's own ofsset of first load insn. */
++ rtx_insn *first_load_insn = NULL;
++ if (number_of_sub_insns == 2)
++ {
++ first_load_insn
++ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
++ arith_operation_ptr = NULL;
++
++ offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0);
++ if (GET_CODE (offset_expr) == PLUS
++ || GET_CODE (offset_expr) == MINUS)
++ offset_value_expr
++ = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1);
++ else
++ offset_expr = NULL;
++ }
++ else if (number_of_sub_insns == 3)
++ {
++ rtx_insn *offset_sub_insn
++ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
++
++ offset_expr = XEXP (PATTERN (offset_sub_insn), 1);
++ offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1);
++ }
++ else
++ {
++ gcc_assert (false
++ && "Wrong number of elements in the ldp_insn vector");
++ }
++ break;
++ }
++ case LDP_TI:
++ {
++ offset_expr = XEXP (XEXP (insn_pat, 1), 0);
++ if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS)
++ return 0;
++ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1);
++ break;
++ }
++ case STR:
++ {
++ offset_expr = XEXP (XEXP (insn_pat, 0), 0);
++ /* If memory location is specified by single base register then the
++ offset is zero. */
++ if (GET_CODE (offset_expr) == REG)
++ return 0;
++ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1);
++ break;
++ }
++ default:
++ {
++ if (dumps_are_enabled && dump_file)
++ {
++ fprintf (dump_file, "Instruction that was tried to split:\n");
++ print_rtl_single (dump_file, insn);
++ }
++ gcc_assert (false && "Unsupported instruction type");
++ break;
++ }
++ }
++
++ if (offset_expr != NULL && offset_value_expr
++ && GET_CODE (offset_value_expr) == CONST_INT)
++ returned_offset = XINT (offset_value_expr, 0);
++
++ if (arith_operation_ptr != NULL)
++ {
++ *arith_operation_ptr = GET_CODE (offset_expr);
++ gcc_assert ((*arith_operation_ptr == MINUS
++ || *arith_operation_ptr == PLUS)
++ && "Unexpected arithmetic operation in the offset expr");
++ }
++
++ return returned_offset;
++}
++
++void
++pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn)
++{
++ rtx pat = PATTERN (ldp_insn);
++
++ rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0)));
++ rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1)));
++
++ int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1)));
++ int src_regno;
++
++ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0);
++
++ if (GET_CODE (srs_reg_insn) == REG)
++ src_regno = REGNO (srs_reg_insn);
++ else
++ src_regno = REGNO (XEXP (srs_reg_insn, 0));
++
++ rtx_insn *emited_insn_1, *emited_insn_2;
++
++ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
++ if (src_regno == dest_regno)
++ std::swap (mem_insn_1, mem_insn_2);
++
++ emited_insn_1 = emit_insn (PATTERN (mem_insn_1));
++ emited_insn_2 = emit_insn (PATTERN (mem_insn_2));
++
++ int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0);
++ int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0);
++
++ INSN_CODE (emited_insn_1) = sub_insn_1_code;
++ INSN_CODE (emited_insn_2) = sub_insn_2_code;
++}
++
++void
++pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn)
++{
++ rtx pat = PATTERN (ldp_insn);
++ bool post_index = true;
++
++ rtx_insn offset_insn;
++ rtx_insn mem_insn_1;
++ rtx_insn mem_insn_2;
++
++ int offset_insn_code;
++ int mem_insn_1_code = -1;
++ int mem_insn_2_code = -1;
++
++ int offset = 0;
++ int arith_operation = UNDEFINED;
++
++ for (int i = 0; i < 3; i++)
++ {
++ rtx sub_insn = XVECEXP (pat, 0, i);
++ rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn));
++ int sub_insn_code
++ = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0);
++
++ /* If sub_insn is offset related. */
++ if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY)
++ {
++ offset_insn = *copy_of_sub_insn;
++ offset_insn_code = sub_insn_code;
++ gcc_assert (i == 0
++ && "Offset related insn must be the first "
++ "element of a parallel insn vector");
++
++ offset = get_insn_offset (ldp_insn, LDP, &arith_operation);
++ }
++ else
++ {
++ if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG)
++ {
++ rtx &offset_expr
++ = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1);
++ if (GET_CODE (offset_expr) == CONST_INT)
++ {
++ int local_offset = XINT (offset_expr, 0);
++ offset = (arith_operation == PLUS ? offset : -offset);
++
++ offset_expr = GEN_INT (local_offset + offset);
++
++ gcc_assert (
++ (arith_operation == MINUS || arith_operation == PLUS)
++ && "Unexpected arithmetic operation in offset related "
++ "sub_insn");
++
++ if (i == 1)
++ post_index = false;
++ }
++ else
++ {
++ post_index = true;
++ }
++ }
++ }
++ if (i == 1)
++ {
++ mem_insn_1 = *copy_of_sub_insn;
++ mem_insn_1_code = sub_insn_code;
++ }
++ if (i == 2)
++ {
++ mem_insn_2 = *copy_of_sub_insn;
++ mem_insn_2_code = sub_insn_code;
++ }
++ }
++ gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1
++ && "Uninitialized memory insns");
++
++ int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1)));
++ int src_regno;
++
++ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0);
++
++ if (GET_CODE (srs_reg_insn) == REG)
++ src_regno = REGNO (srs_reg_insn);
++ else
++ src_regno = REGNO (XEXP (srs_reg_insn, 0));
++
++ /* Don't split such weird LDP. */
++ if (src_regno == dest_regno)
++ return;
++
++ rtx_insn *emited_offset_insn;
++ if (!post_index)
++ {
++ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
++ INSN_CODE (emited_offset_insn) = offset_insn_code;
++ }
++
++ rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1));
++ rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2));
++
++
++ INSN_CODE (emited_insn_1) = mem_insn_1_code;
++ INSN_CODE (emited_insn_2) = mem_insn_2_code;
++
++ if (post_index)
++ {
++ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
++ INSN_CODE (emited_offset_insn) = offset_insn_code;
++ }
++}
++
++void
++pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn)
++{
++ rtx_insn *prev_insn = PREV_INSN (insn);
++ int number_of_sub_insns = XVECLEN (PATTERN (insn), 0);
++
++ start_sequence ();
++
++ if (number_of_sub_insns == 2)
++ split_simple_ldp (insn);
++ else if (number_of_sub_insns == 3)
++ split_ldp_with_offset (insn);
++ else
++ gcc_assert (false && "Broken complex insn vector");
++
++ rtx_insn *seq = get_insns ();
++ unshare_all_rtl_in_chain (seq);
++ end_sequence ();
++
++ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
++ delete_insn_and_edges (insn);
++}
++
++void
++pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn)
++{
++ rtx_insn *prev_insn = PREV_INSN (insn);
++ rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn)));
++ rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn)));
++
++ rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0);
++ rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1);
++ rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1);
++
++ PUT_MODE (mem_insn_1, DImode);
++ PUT_MODE (mem_insn_2, DImode);
++
++ int reg_no_1 = REGNO (reg_insn_1);
++
++ XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1);
++ XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1);
++
++ rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0);
++ if (GET_CODE (load_insn_2_plus_expr) == REG)
++ {
++ XEXP (XEXP (PATTERN (load_insn_2), 1), 0)
++ = gen_rtx_PLUS (DImode,
++ gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)),
++ GEN_INT (GET_MODE_SIZE (DImode)));
++ }
++ else
++ {
++ rtx load_insn_2_offset_expr
++ = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1);
++
++ if (load_insn_2_offset_expr == NULL)
++ return;
++
++ if (GET_CODE (load_insn_2_offset_expr) == CONST_INT)
++ {
++ int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0);
++ XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1)
++ = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode));
++ }
++ }
++
++ start_sequence ();
++
++ int src_regno;
++ rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0);
++
++ if (GET_CODE (srs_reg_insn) == REG)
++ src_regno = REGNO (srs_reg_insn);
++ else
++ src_regno = REGNO (XEXP (srs_reg_insn, 0));
++
++ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
++ if (src_regno == reg_no_1)
++ std::swap (load_insn_1, load_insn_2);
++
++ rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1));
++ rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2));
++
++ INSN_CODE (emited_load_insn_1)
++ = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0);
++ INSN_CODE (emited_load_insn_2)
++ = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0);
++
++ rtx_insn *seq = get_insns ();
++ unshare_all_rtl_in_chain (seq);
++ end_sequence ();
++
++ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
++ delete_insn_and_edges (insn);
++}
++
++void
++pass_split_complex_instructions::split_complex_insn (rtx_insn *insn)
++{
++ complex_instructions_t insn_type = get_insn_type (insn);
++ /* TODO: Add splitting of STP instructions. */
++ if (insn_type == LDP || insn_type == STP)
++ split_ldp_stp (insn);
++ else if (insn_type == LDP_TI)
++ split_ldp_ti (insn);
++ else
++ gcc_assert (false && "Unsupported type of insn to split");
++}
++
++pass_split_complex_instructions::complex_instructions_t
++pass_split_complex_instructions::get_insn_type (rtx_insn *insn)
++{
++ if (!INSN_P (insn))
++ return UNDEFINED;
++
++ rtx pat = PATTERN (insn);
++ int icode = recog (PATTERN (insn), insn, NULL);
++
++ if (GET_CODE (pat) == PARALLEL)
++ {
++ if (targetm.is_ldp_insn (icode))
++ {
++ return LDP;
++ }
++ if (targetm.is_stp_insn (icode))
++ {
++ return STP;
++ }
++ else
++ {
++ return UNDEFINED;
++ }
++ }
++ rtx set_insn = single_set (insn);
++ if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM
++ && GET_MODE (XEXP (set_insn, 1)) == E_TImode)
++ return LDP_TI;
++
++ return UNDEFINED;
++}
++
++bool
++pass_split_complex_instructions::gate (function *)
++{
++ return targetm.is_ldp_insn && targetm.is_stp_insn && optimize > 0
++ && flag_split_ldp_stp > 0;
++}
++
++} // anon namespace
++
++rtl_opt_pass *
++make_pass_split_complex_instructions (gcc::context *ctxt)
++{
++ return new pass_split_complex_instructions (ctxt);
++}
++
+ #if __GNUC__ >= 10
+ # pragma GCC diagnostic pop
+-#endif
++#endif
+\ No newline at end of file
+diff --git a/gcc/target.def b/gcc/target.def
+index d85adf36a..a3a50b474 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -2677,6 +2677,16 @@ modes and they have different conditional execution capability, such as ARM.",
+ bool, (void),
+ default_have_conditional_execution)
+
++DEFHOOK
++(is_ldp_insn,
++ "Return true if icode is corresponding to any of the LDP instruction types.",
++ bool, (int icode), NULL)
++
++DEFHOOK
++(is_stp_insn,
++ "Return true if icode is corresponding to any of the STP instruction types.",
++ bool, (int icode), NULL)
++
+ DEFHOOK
+ (gen_ccmp_first,
+ "This function prepares to emit a comparison insn for the first compare in a\n\
+diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
+new file mode 100644
+index 000000000..3918d43f6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
+@@ -0,0 +1,74 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-additional-options "-fsplit-ldp-stp" } */
++/*
++ * Tests are:
++ * Patterns where LDP insns should NOT be split
++ * */
++
++int __RTL (startwith ("split_complex_instructions"))
++simple_ldp_after_store ()
++{
++(function "simple_ldp_after_store"
++ (insn-chain
++ (block 2
++ (edge-from entry (flags "FALLTHRU"))
++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 228 (set (reg/i:DI sp)
++ (reg/i:DI x0)))
++ (cinsn 101 (set (mem/c:DI
++ (plus:DI (reg/f:DI sp)
++ (const_int 32))[1 S4 A32])(reg:DI x0)))
++ (cinsn 10 (parallel [
++ (set (reg:DI x29)
++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
++ (set (reg:DI x30)
++ (mem:DI (plus:DI (reg/f:DI sp)
++ (const_int 16)) [1 S4 A32]))]))
++ (cinsn 11 (use (reg/i:DI sp)))
++ (cinsn 12 (use (reg/i:DI cc)))
++ (cinsn 13 (use (reg/i:DI x29)))
++ (cinsn 14 (use (reg/i:DI x30)))
++ (cinsn 15 (use (reg/i:DI x0)))
++ (edge-to exit (flags "FALLTHRU"))
++ ) ;; block 2
++ ) ;; insn-chain
++) ;; function "simple_ldp_after_store"
++}
++
++int __RTL (startwith ("split_complex_instructions"))
++ldp_after_store_in_different_bb ()
++{
++(function "ldp_after_store_in_different_bb"
++ (insn-chain
++ (block 2
++ (edge-from entry (flags "FALLTHRU"))
++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 228 (set (reg/i:DI sp)
++ (reg/i:DI x0)))
++ (cinsn 101 (set (mem/c:DI
++ (plus:DI (reg/f:DI sp)
++ (const_int 32))[1 S4 A32])(reg:DI x0)))
++ (edge-to 3 (flags "FALLTHRU"))
++ ) ;; block 2
++ (block 3
++ (edge-from 2 (flags "FALLTHRU"))
++ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 10 (parallel [
++ (set (reg:DI x29)
++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
++ (set (reg:DI x30)
++ (mem:DI (plus:DI (reg/f:DI sp)
++ (const_int 16)) [1 S4 A32]))]))
++ (cinsn 11 (use (reg/i:DI sp)))
++ (cinsn 12 (use (reg/i:DI cc)))
++ (cinsn 13 (use (reg/i:DI x29)))
++ (cinsn 14 (use (reg/i:DI x30)))
++ (cinsn 15 (use (reg/i:DI x0)))
++ (edge-to exit (flags "FALLTHRU"))
++ ) ;; block 3
++ ) ;; insn-chain
++) ;; function "ldp_after_store_in_different_bb"
++}
++
++/* Verify that the output code contains exactly 2 ldp. */
++/* { dg-final { scan-assembler-times {ldp\t} 2 } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
+new file mode 100644
+index 000000000..653c30f83
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
+@@ -0,0 +1,40 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-additional-options "-fsplit-ldp-stp" } */
++/*
++ * Test is:
++ * Pattern where LDP insns should be split with rearrangement in order
++ * to deal with data dependecy betwen subinstruction.
++ * */
++
++int __RTL (startwith ("split_complex_instructions"))
++simple_ldp_after_store ()
++{
++(function "ldp_equal_registers"
++ (insn-chain
++ (block 2
++ (edge-from entry (flags "FALLTHRU"))
++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 228 (set (reg/i:DI x1)
++ (reg/i:DI x0)))
++ (cinsn 101 (set (mem/c:DI
++ (plus:DI (reg/f:DI x1)
++ (const_int 8))[1 S4 A32])(reg:DI x0)))
++ (cinsn 10 (parallel [
++ (set (reg:DI x1)
++ (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32]))
++ (set (reg:DI x2)
++ (mem:DI (plus:DI (reg/f:DI x1)
++ (const_int 16)) [1 S4 A32]))]))
++ (cinsn 11 (use (reg/i:DI sp)))
++ (cinsn 12 (use (reg/i:DI cc)))
++ (cinsn 13 (use (reg/i:DI x0)))
++ (cinsn 14 (use (reg/i:DI x1)))
++ (cinsn 15 (use (reg/i:DI x2)))
++ (edge-to exit (flags "FALLTHRU"))
++ ) ;; block 2
++ ) ;; insn-chain
++) ;; function "ldp_equal_registers"
++}
++
++/* Verify that the output code rearrange ldrs. */
++/* { dg-final { scan-assembler-times ".*ldr.*x2.*x1,.*16.*ldr.*x1.*x1.*8" 1 } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
+new file mode 100644
+index 000000000..dc9f26efb
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
+@@ -0,0 +1,174 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */
++/*
++ * Tests are:
++ * Patterns where LDP insns should be split
++ * */
++
++int __RTL (startwith ("split_complex_instructions"))
++simple_ldp_after_store ()
++{
++(function "simple_ldp_after_store"
++ (insn-chain
++ (block 2
++ (edge-from entry (flags "FALLTHRU"))
++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 228 (set (reg/i:DI sp)
++ (reg/i:DI x0)))
++ (cinsn 238 (set (reg/i:DI x1)
++ (reg/i:DI x0)))
++
++ (cinsn 101 (set (mem/c:DI
++ (plus:DI (reg/f:DI sp)
++ (const_int 8))[1 S4 A32])(reg:DI x0)))
++ (cinsn 10 (parallel [
++ (set (reg:DI x29)
++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
++ (set (reg:DI x30)
++ (mem:DI (plus:DI (reg/f:DI sp)
++ (const_int 16)) [1 S4 A32]))]))
++
++ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
++ (const_int -16)) [1 S4 A32])
++ (reg:DI x0)))
++ (cinsn 11 (parallel [
++ (set (reg:DI x3)
++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
++ (set (reg:DI x4)
++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
++ ]))
++
++ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
++ (reg:DI x0)))
++ (cinsn 12 (parallel [
++ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
++ (const_int 8)) [1 S4 A32]))
++ ]))
++
++ (cinsn 13 (use (reg/i:DI sp)))
++ (cinsn 14 (use (reg/i:DI cc)))
++ (cinsn 15 (use (reg/i:DI x29)))
++ (cinsn 16 (use (reg/i:DI x30)))
++ (cinsn 17 (use (reg/i:DI x0)))
++ (cinsn 18 (use (reg/i:DI x3)))
++ (cinsn 19 (use (reg/i:DI x4)))
++ (cinsn 20 (use (reg/i:DI x5)))
++ (cinsn 21 (use (reg/i:DI x6)))
++ (edge-to exit (flags "FALLTHRU"))
++ ) ;; block 2
++ ) ;; insn-chain
++) ;; function "simple_ldp_after_store"
++}
++
++int __RTL (startwith ("split_complex_instructions"))
++ldp_ti_after_store ()
++{
++ (function "ldp_ti_after_store"
++ (insn-chain
++ (block 2
++ (edge-from entry (flags "FALLTHRU"))
++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 228 (set (reg/i:DI sp)
++ (reg/i:DI x0)))
++ (cinsn 238 (set (reg/i:DI x2)
++ (reg/i:DI x0)))
++
++ (cinsn 101 (set (mem/c:DI
++ (plus:DI (reg/f:DI sp)
++ (const_int 136))[1 S4 A32])(reg:DI x0)))
++ (insn 81 (set (reg:TI x0 [1 S4 A32])
++ (mem/c:TI (plus:DI (reg/f:DI sp)
++ (const_int 136 )) [1 S4 A32]))
++ (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp)
++ (const_int -24 )) [1 S4 A32])
++ (nil)))
++
++ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2)
++ (const_int -16)) [1 S4 A32])
++ (reg:DI x0)))
++ (insn 82 (set (reg:TI x3 [1 S4 A32])
++ (mem/c:TI (plus:DI (reg/f:DI x2)
++ (const_int -16)) [1 S4 A32])))
++
++ (cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32])
++ (reg:DI x0)))
++ (insn 83 (set (reg:TI x5 [1 S4 A32])
++ (mem/c:TI (reg/f:DI x2) [1 S4 A32])))
++
++ (cinsn 11 (use (reg/i:DI sp)))
++ (cinsn 12 (use (reg/i:DI cc)))
++ (cinsn 13 (use (reg/i:DI x29)))
++ (cinsn 14 (use (reg/i:DI x30)))
++ (cinsn 15 (use (reg/i:DI x0)))
++ (cinsn 16 (use (reg/i:DI x3)))
++ (cinsn 17 (use (reg/i:DI x5)))
++ (cinsn 18 (use (reg/i:DI x1)))
++ (cinsn 19 (use (reg/i:DI x4)))
++ (cinsn 20 (use (reg/i:DI x6)))
++ (edge-to exit (flags "FALLTHRU"))
++ ) ;; block 2
++ ) ;; insn-chain
++) ;; function "ldp_ti_after_store"
++}
++
++int __RTL (startwith ("split_complex_instructions"))
++ldp_after_store_in_different_bb ()
++{
++(function "ldp_after_store_in_different_bb"
++ (insn-chain
++ (block 2
++ (edge-from entry (flags "FALLTHRU"))
++ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 228 (set (reg/i:DI sp)
++ (reg/i:DI x0)))
++ (cinsn 238 (set (reg/i:DI x1)
++ (reg/i:DI x0)))
++
++ (cinsn 101 (set (mem/c:DI
++ (plus:DI (reg/f:DI sp)
++ (const_int 8))[1 S4 A32])(reg:DI x0)))
++ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
++ (const_int -16)) [1 S4 A32])
++ (reg:DI x0)))
++ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
++ (reg:DI x0)))
++ (edge-to 3 (flags "FALLTHRU"))
++ ) ;; block 2
++ (block 3
++ (edge-from 2 (flags "FALLTHRU"))
++ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
++ (cinsn 10 (parallel [
++ (set (reg:DI x29)
++ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
++ (set (reg:DI x30)
++ (mem:DI (plus:DI (reg/f:DI sp)
++ (const_int 16)) [1 S4 A32]))]))
++ (cinsn 11 (parallel [
++ (set (reg:DI x3)
++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
++ (set (reg:DI x4)
++ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
++ ]))
++ (cinsn 12 (parallel [
++ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
++ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
++ (const_int 8)) [1 S4 A32]))
++ ]))
++ (cinsn 13 (use (reg/i:DI sp)))
++ (cinsn 14 (use (reg/i:DI cc)))
++ (cinsn 15 (use (reg/i:DI x29)))
++ (cinsn 16 (use (reg/i:DI x30)))
++ (cinsn 17 (use (reg/i:DI x0)))
++ (cinsn 18 (use (reg/i:DI x3)))
++ (cinsn 19 (use (reg/i:DI x4)))
++ (cinsn 20 (use (reg/i:DI x5)))
++ (cinsn 21 (use (reg/i:DI x6)))
++ (edge-to exit (flags "FALLTHRU"))
++ ) ;; block 3
++ ) ;; insn-chain
++) ;; function "ldp_after_store_in_different_bb"
++}
++
++/* Verify that the output code doesn't contain ldp. */
++/* { dg-final { scan-assembler-not {ldp\t} } } */
+\ No newline at end of file
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index 1e7d4e74b..2ccecffb5 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -280,6 +280,7 @@ DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
+ DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")
+ DEFTIMEVAR (TV_REE , "ree")
+ DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
++DEFTIMEVAR (TV_SPLIT_CMP_INS , "split complex instructions")
+ DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
+ DEFTIMEVAR (TV_SPLIT_PATHS , "split paths")
+ DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index 86f38e2f2..6daac7fc1 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -612,6 +612,7 @@ extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
+ extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
+ *ctxt);
+ extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
++extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt);
+ extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
+ extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
+ extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
+--
+2.33.0
+