summaryrefslogtreecommitdiff
path: root/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
diff options
context:
space:
mode:
Diffstat (limited to '0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch')
-rw-r--r--0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch1007
1 files changed, 1007 insertions, 0 deletions
diff --git a/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch b/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
new file mode 100644
index 0000000..baeff90
--- /dev/null
+++ b/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
@@ -0,0 +1,1007 @@
+From d334ec1579fb0668da5e23ced3b782d7f6f35d77 Mon Sep 17 00:00:00 2001
+From: benniaobufeijiushiji <linda7@huawei.com>
+Date: Mon, 17 Oct 2022 17:21:57 +0800
+Subject: [PATCH 30/35] [Loop-distribution] Add isomorphic stmts analysis
+
+Use option -ftree-slp-transpose-vectorize
+
+Check if loop is vectorizable before analysis. For unvectorizable
+loops, try to find isomorphic stmts from grouped load as new seed stmts
+for distribution.
+---
+ gcc/tree-loop-distribution.c | 858 +++++++++++++++++++++++++++++++++++
+ gcc/tree-vect-loop.c | 37 +-
+ gcc/tree-vectorizer.h | 3 +-
+ 3 files changed, 894 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
+index 888af4894..c08af6562 100644
+--- a/gcc/tree-loop-distribution.c
++++ b/gcc/tree-loop-distribution.c
+@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see
+ data reuse. */
+
+ #include "config.h"
++#define INCLUDE_MAP
++#define INCLUDE_ALGORITHM
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+@@ -115,6 +117,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "tree-vectorizer.h"
+ #include "tree-eh.h"
+ #include "gimple-fold.h"
++#include "optabs-tree.h"
+
+
+ #define MAX_DATAREFS_NUM \
+@@ -183,6 +186,52 @@ struct rdg_vertex
+ #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
+ #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
+
++/* Results of isomorphic group analysis. */
++#define UNINITIALIZED (0)
++#define ISOMORPHIC (1)
++#define HETEROGENEOUS (1 << 1)
++#define UNCERTAIN (1 << 2)
++
++/* Information of a stmt while analyzing isomorphic use in group. */
++
++typedef struct _group_info
++{
++ gimple *stmt;
++
++ /* True if stmt can be a cut point. */
++ bool cut_point;
++
++ /* For use_stmt with two rhses, one of which is the lhs of stmt.
++ If the other is unknown to be isomorphic, mark it uncertain. */
++ bool uncertain;
++
++ /* Searching of isomorphic stmt reaches heterogeneous groups or reaches
++ MEM stmts. */
++ bool done;
++
++ _group_info ()
++ {
++ stmt = NULL;
++ cut_point = false;
++ uncertain = false;
++ done = false;
++ }
++} *group_info;
++
++/* PAIR of cut points and corresponding profit. */
++typedef std::pair<vec<gimple *> *, int> stmts_profit;
++
++/* MAP of vector factor VF and corresponding stmts_profit PAIR. */
++typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map;
++
++/* PAIR of group_num and iteration_num. We consider rhses from the same
++ group and interation are isomorphic. */
++typedef std::pair<unsigned, unsigned> group_iteration;
++
++/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and
++ the iteration_num when we insert this stmt to this map. */
++typedef std::map<tree, group_iteration> isomer_stmt_lhs;
++
+ /* Data dependence type. */
+
+ enum rdg_dep_type
+@@ -640,6 +689,18 @@ class loop_distribution
+ void finalize_partitions (class loop *loop, vec<struct partition *>
+ *partitions, vec<ddr_p> *alias_ddrs);
+
++ /* Analyze loop form and if it's vectorizable to decide if we need to
++ insert temp arrays to distribute it. */
++ bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg,
++ control_dependences *cd);
++
++ /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1. */
++ void reset_gimple_uid (loop_p loop);
++
++ bool check_loop_vectorizable (loop_p loop);
++
++ inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
++ control_dependences *cd);
+ /* Distributes the code from LOOP in such a way that producer statements
+ are placed before consumer statements. Tries to separate only the
+ statements from STMTS into separate loops. Returns the number of
+@@ -2900,6 +2961,803 @@ loop_distribution::finalize_partitions (class loop *loop,
+ fuse_memset_builtins (partitions);
+ }
+
++/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function
++ vect_analyze_loop, reset them to -1. */
++
++void
++loop_distribution::reset_gimple_uid (loop_p loop)
++{
++ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
++ bb_top_order_cmp_r);
++ for (int i = 0; i < int (loop->num_nodes); i++)
++ {
++ basic_block bb = bbs[i];
++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++ gsi_next (&gsi))
++ {
++ gimple *stmt = gsi_stmt (gsi);
++ if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL)
++ gimple_set_uid (stmt, -1);
++ }
++ }
++ free (bbs);
++}
++
++bool
++loop_distribution::check_loop_vectorizable (loop_p loop)
++{
++ vec_info_shared shared;
++ vect_analyze_loop (loop, &shared, true);
++ loop_vec_info vinfo = loop_vec_info_for_loop (loop);
++ reset_gimple_uid (loop);
++ if (vinfo == NULL)
++ {
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ fprintf (dump_file,
++ "Loop %d no temp array insertion: bad data access pattern,"
++ " unable to generate loop_vinfo.\n", loop->num);
++ return false;
++ }
++ if (vinfo->vectorizable)
++ {
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ fprintf (dump_file, "Loop %d no temp array insertion: original loop"
++ " can be vectorized without distribution.\n",
++ loop->num);
++ delete vinfo;
++ loop->aux = NULL;
++ return false;
++ }
++ if (vinfo->grouped_loads.length () == 0)
++ {
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ fprintf (dump_file, "Loop %d no temp array insertion: original loop"
++ " has no grouped loads.\n" , loop->num);
++ delete vinfo;
++ loop->aux = NULL;
++ return false;
++ }
++ return true;
++}
++
++inline void
++loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg,
++ control_dependences *cd)
++{
++ free_rdg (rdg);
++ rdg = build_rdg (loop, cd);
++ gcc_checking_assert (rdg != NULL);
++}
++
++bool
++loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg,
++ control_dependences *cd)
++{
++ if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize))
++ return false;
++
++ /* Only loops with two basic blocks HEADER and LATCH are supported. HEADER
++ is the main body of a LOOP and LATCH is the basic block that controls the
++ LOOP execution. Size of temp array is determined by loop execution time,
++ so it must be a const. */
++ tree loop_extent = number_of_latch_executions (loop);
++ if (loop->inner != NULL || loop->num_nodes > 2
++ || rdg->n_vertices > param_slp_max_insns_in_bb
++ || TREE_CODE (loop_extent) != INTEGER_CST)
++ {
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ fprintf (dump_file, "Loop %d: no temp array insertion: bad loop"
++ " form.\n", loop->num);
++ return false;
++ }
++
++ if (loop->dont_vectorize)
++ {
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ fprintf (dump_file, "Loop %d: no temp array insertion: this loop"
++ " should never be vectorized.\n",
++ loop->num);
++ return false;
++ }
++
++ /* Do not distribute a LOOP that is able to be vectorized without
++ distribution. */
++ if (!check_loop_vectorizable (loop))
++ {
++ rebuild_rdg (loop, rdg, cd);
++ return false;
++ }
++
++ rebuild_rdg (loop, rdg, cd);
++ return true;
++}
++
++/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n.
++ Otherwise, return 0. */
++
++static unsigned
++get_max_vf (loop_vec_info vinfo)
++{
++ unsigned size = 0;
++ unsigned max = 0;
++ stmt_vec_info stmt_info;
++ unsigned i = 0;
++ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
++ {
++ size = stmt_info->size;
++ if (!pow2p_hwi (size))
++ return 0;
++ max = size > max ? size : max;
++ }
++ return max;
++}
++
++/* Convert grouped_loads from linked list to vector with length vf. Init
++ group_info of each stmt in the same group and put then into a vector. And
++ these vectors consist WORKLISTS. We will re-analyze a group if it is
++ uncertain, so we regard WORKLISTS as a circular queue. */
++
++static unsigned
++build_queue (loop_vec_info vinfo, unsigned vf,
++ vec<vec<group_info> *> &worklists)
++{
++ stmt_vec_info stmt_info;
++ unsigned i = 0;
++ group_info ginfo = NULL;
++ vec<group_info> *worklist = NULL;
++ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
++ {
++ unsigned group_size = stmt_info->size;
++ stmt_vec_info c_stmt_info = stmt_info;
++ while (group_size >= vf)
++ {
++ vec_alloc (worklist, vf);
++ for (unsigned j = 0; j < vf; ++j)
++ {
++ ginfo = new _group_info ();
++ ginfo->stmt = c_stmt_info->stmt;
++ worklist->safe_push (ginfo);
++ c_stmt_info = c_stmt_info->next_element;
++ }
++ worklists.safe_push (worklist);
++ group_size -= vf;
++ }
++ }
++ return worklists.length ();
++}
++
++static bool
++check_same_oprand_type (tree op1, tree op2)
++{
++ tree type1 = TREE_TYPE (op1);
++ tree type2 = TREE_TYPE (op2);
++ if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE)
++ return false;
++
++ return (TREE_CODE (type1) == TREE_CODE (type2)
++ && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2)
++ && TYPE_PRECISION (type1) == TYPE_PRECISION (type2));
++}
++
++static bool
++bit_field_p (gimple *stmt)
++{
++ unsigned i = 0;
++ auto_vec<data_reference_p, 2> datarefs_vec;
++ data_reference_p dr;
++ if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec))
++ return true;
++
++ FOR_EACH_VEC_ELT (datarefs_vec, i, dr)
++ {
++ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
++ && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
++ return true;
++ }
++ return false;
++}
++
++static inline bool
++shift_operation (enum tree_code op)
++{
++ return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR
++ || op == RROTATE_EXPR;
++}
++
++/* Return relationship between USE_STMT and the first use_stmt of the group.
++ RHS1 is the lhs of stmt recorded in group_info. If another rhs of use_stmt
++ is not a constant, return UNCERTAIN and re-check it later. */
++
++static unsigned
++check_isomorphic (gimple *use_stmt, gimple *first,
++ tree rhs1, vec<tree> &hetero_lhs)
++{
++ /* Check same operation. */
++ enum tree_code rhs_code_first = gimple_assign_rhs_code (first);
++ enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt);
++ if (rhs_code_first != rhs_code_current)
++ return HETEROGENEOUS;
++
++ /* For shift operations, oprands should be equal. */
++ if (shift_operation (rhs_code_current))
++ {
++ tree shift_op_first = gimple_assign_rhs2 (first);
++ tree shift_op_current = gimple_assign_rhs2 (use_stmt);
++ if (!operand_equal_p (shift_op_first, shift_op_current, 0)
++ || !TREE_CONSTANT (shift_op_first))
++ return HETEROGENEOUS;
++
++ return ISOMORPHIC;
++ }
++ /* Type convertion expr or assignment. */
++ if (gimple_num_ops (first) == 2)
++ return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR
++ || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS;
++
++ /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and
++ the other one as rhs2. Check if define-stmt of current rhs2 is isomorphic
++ with define-stmt of rhs2 in the first USE_STMT at this group. */
++ tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1
++ ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first);
++ tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1
++ ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt);
++
++ if (check_same_oprand_type (rhs2_first, rhs2_curr))
++ {
++ if (TREE_CONSTANT (rhs2_curr))
++ return ISOMORPHIC;
++ else if (hetero_lhs.contains (rhs2_curr))
++ return HETEROGENEOUS;
++
++ /* Provisionally set the stmt as uncertain and analyze the whole group
++ in function CHECK_UNCERTAIN later if all use_stmts are uncertain. */
++ return UNCERTAIN;
++ }
++ return HETEROGENEOUS;
++}
++
++static bool
++unsupported_operations (gimple *stmt)
++{
++ enum tree_code code = gimple_assign_rhs_code (stmt);
++ return code == COND_EXPR;
++}
++
++/* Check if the single use_stmt of STMT is isomorphic with the first one's
++ use_stmt in current group. */
++
++static unsigned
++check_use_stmt (group_info elmt, gimple *&first,
++ vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs)
++{
++ if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN)
++ return HETEROGENEOUS;
++ use_operand_p dummy;
++ tree lhs = gimple_assign_lhs (elmt->stmt);
++ gimple *use_stmt = NULL;
++ single_imm_use (lhs, &dummy, &use_stmt);
++ /* STMTs with three rhs are not supported, e.g., GIMPLE_COND. */
++ if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN
++ || unsupported_operations (use_stmt) || bit_field_p (use_stmt))
++ return HETEROGENEOUS;
++ tmp_stmts.safe_push (use_stmt);
++ if (first == NULL)
++ {
++ first = use_stmt;
++ return UNINITIALIZED;
++ }
++ /* Check if current use_stmt and the first menber's use_stmt in the group
++ are of the same type. */
++ tree first_lhs = gimple_assign_lhs (first);
++ tree curr_lhs = gimple_assign_lhs (use_stmt);
++ if (!check_same_oprand_type (first_lhs, curr_lhs))
++ return HETEROGENEOUS;
++ return check_isomorphic (use_stmt, first, lhs, hetero_lhs);
++}
++
++/* Replace stmt field in group with stmts in TMP_STMTS, and insert their
++ lhs_info to ISOMER_LHS. */
++
++static void
++update_isomer_lhs (vec<group_info> *group, unsigned group_num,
++ unsigned iteration, isomer_stmt_lhs &isomer_lhs,
++ vec<gimple *> tmp_stmts, int &profit,
++ vec<unsigned> &merged_groups)
++{
++ group_info elmt = NULL;
++ /* Do not insert temp array if isomorphic stmts from grouped load have
++ only casting operations. Once isomorphic calculation has 3 oprands,
++ such as plus operation, this group can be regarded as cut point. */
++ bool operated = (gimple_num_ops (tmp_stmts[0]) == 3);
++ /* Do not insert temp arrays if search of iosomophic stmts reaches
++ MEM stmts. */
++ bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL;
++ bool merge = false;
++ for (unsigned i = 0; i < group->length (); i++)
++ {
++ elmt = (*group)[i];
++ elmt->stmt = has_vdef ? NULL : tmp_stmts[i];
++ elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated);
++ elmt->uncertain = false;
++ elmt->done = has_vdef;
++ tree lhs = gimple_assign_lhs (tmp_stmts[i]);
++ if (isomer_lhs.find (lhs) != isomer_lhs.end ())
++ {
++ merge = true;
++ continue;
++ }
++ isomer_lhs[lhs] = std::make_pair (group_num, iteration);
++ }
++ if (merge)
++ {
++ merged_groups.safe_push (group_num);
++ profit = 0;
++ return;
++ }
++ enum vect_cost_for_stmt kind = scalar_stmt;
++ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++ profit = (tmp_stmts.length () - 1) * scalar_cost;
++}
++
++/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num
++ and iteration are same, GROUP is isomorphic. */
++
++static unsigned
++check_isomorphic_rhs (vec<group_info> *group, vec<gimple *> &tmp_stmts,
++ isomer_stmt_lhs &isomer_lhs)
++{
++ group_info elmt = NULL;
++ gimple *stmt = NULL;
++ unsigned j = 0;
++ unsigned group_num = -1u;
++ unsigned iteration = -1u;
++ tree rhs1 = NULL;
++ tree rhs2 = NULL;
++ unsigned status = UNINITIALIZED;
++ FOR_EACH_VEC_ELT (*group, j, elmt)
++ {
++ rhs1 = gimple_assign_lhs (elmt->stmt);
++ stmt = tmp_stmts[j];
++ rhs2 = (rhs1 == gimple_assign_rhs1 (stmt))
++ ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
++ isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2);
++ if (iter != isomer_lhs.end ())
++ {
++ if (group_num == -1u)
++ {
++ group_num = iter->second.first;
++ iteration = iter->second.second;
++ status |= ISOMORPHIC;
++ continue;
++ }
++ if (iter->second.first == group_num
++ && iter->second.second == iteration)
++ {
++ status |= ISOMORPHIC;
++ continue;
++ }
++ return HETEROGENEOUS;
++ }
++ else
++ status |= UNCERTAIN;
++ }
++ return status;
++}
++
++/* Update group_info for uncertain groups. */
++
++static void
++update_uncertain_stmts (vec<group_info> *group, unsigned group_num,
++ unsigned iteration, vec<gimple *> &tmp_stmts)
++{
++ unsigned j = 0;
++ group_info elmt = NULL;
++ FOR_EACH_VEC_ELT (*group, j, elmt)
++ {
++ elmt->uncertain = true;
++ elmt->done = false;
++ }
++}
++
++/* Push stmts in TMP_STMTS into HETERO_LHS. */
++
++static void
++set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs,
++ vec<gimple *> &tmp_stmts)
++{
++ group_info elmt = NULL;
++ unsigned i = 0;
++ for (i = 0; i < group->length (); i++)
++ {
++ elmt = (*group)[i];
++ elmt->uncertain = false;
++ elmt->done = true;
++ }
++ gimple *stmt = NULL;
++ FOR_EACH_VEC_ELT (tmp_stmts, i, stmt)
++ if (stmt != NULL)
++ hetero_lhs.safe_push (gimple_assign_lhs (stmt));
++}
++
++/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP.
++ Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT.
++
++ Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num
++ and iteration, this uncertain group is isomorphic.
++
++ If no rhs matched, this GROUP remains uncertain and update group_info.
++
++ Otherwise, this GROUP is heterogeneous and return true to end analysis
++ for this group. */
++
++static bool
++check_uncertain (vec<group_info> *group, unsigned group_num,
++ unsigned iteration, int &profit,
++ vec<gimple *> &tmp_stmts, isomer_stmt_lhs &isomer_lhs,
++ vec<tree> &hetero_lhs, vec<unsigned> &merged_groups)
++{
++ unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs);
++ bool done = false;
++ switch (status)
++ {
++ case UNCERTAIN:
++ update_uncertain_stmts (group, group_num, iteration, tmp_stmts);
++ break;
++ case ISOMORPHIC:
++ update_isomer_lhs (group, group_num, iteration, isomer_lhs,
++ tmp_stmts, profit, merged_groups);
++ break;
++ default:
++ set_hetero (group, hetero_lhs, tmp_stmts);
++ done = true;
++ }
++ return done;
++}
++
++/* Return false if analysis of this group is not finished, e.g., isomorphic or
++ uncertain. Calculate the profit if vectorized. */
++
++static bool
++check_group (vec<group_info> *group, unsigned group_num, unsigned iteration,
++ int &profit, vec<unsigned> &merged_groups,
++ isomer_stmt_lhs &isomer_lhs, vec<tree> &hetero_lhs)
++{
++ unsigned j = 0;
++ group_info elmt = NULL;
++ gimple *first = NULL;
++ unsigned res = 0;
++ /* Record single use stmts in TMP_STMTS and decide whether replace stmts in
++ ginfo in succeeding processes. */
++ auto_vec<gimple *> tmp_stmts;
++ FOR_EACH_VEC_ELT (*group, j, elmt)
++ {
++ if (merged_groups.contains (group_num))
++ return true;
++ res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs);
++ }
++
++ /* Update each group member according to RES. */
++ switch (res)
++ {
++ case ISOMORPHIC:
++ update_isomer_lhs (group, group_num, iteration, isomer_lhs,
++ tmp_stmts, profit, merged_groups);
++ return false;
++ case UNCERTAIN:
++ return check_uncertain (group, group_num, iteration, profit,
++ tmp_stmts, isomer_lhs, hetero_lhs,
++ merged_groups);
++ default:
++ set_hetero (group, hetero_lhs, tmp_stmts);
++ return true;
++ }
++}
++
++/* Return true if all analysises are done except uncertain groups. */
++
++static bool
++end_of_search (vec<vec<group_info> *> &circular_queue,
++ vec<unsigned> &merged_groups)
++{
++ unsigned i = 0;
++ vec<group_info> *group = NULL;
++ group_info elmt = NULL;
++ FOR_EACH_VEC_ELT (circular_queue, i, group)
++ {
++ if (merged_groups.contains (i))
++ continue;
++ elmt = (*group)[0];
++ /* If there is any isomorphic use_stmts, continue analysis of isomorphic
++ use_stmts. */
++ if (!elmt->done && !elmt->uncertain)
++ return false;
++ }
++ return true;
++}
++
++/* Push valid stmts to STMTS as cutpoints. */
++
++static bool
++check_any_cutpoints (vec<vec<group_info> *> &circular_queue,
++ vec<gimple *> *&stmts, vec<unsigned> &merged_groups)
++{
++ unsigned front = 0;
++ vec<group_info> *group = NULL;
++ group_info elmt = NULL;
++ unsigned max = circular_queue.length () * circular_queue[0]->length ();
++ vec_alloc (stmts, max);
++ while (front < circular_queue.length ())
++ {
++ unsigned i = 0;
++ if (merged_groups.contains (front))
++ {
++ front++;
++ continue;
++ }
++ group = circular_queue[front++];
++ FOR_EACH_VEC_ELT (*group, i, elmt)
++ if (elmt->stmt != NULL && elmt->done && elmt->cut_point)
++ stmts->safe_push (elmt->stmt);
++ }
++ return stmts->length () != 0;
++}
++
++/* Grouped loads are isomorphic. Make pair for group number and iteration,
++ map load stmt to this pair. We set iteration 0 here. */
++
++static void
++init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs)
++{
++ vec<group_info> *group = NULL;
++ group_info elmt = NULL;
++ unsigned i = 0;
++ FOR_EACH_VEC_ELT (groups, i, group)
++ {
++ unsigned j = 0;
++ FOR_EACH_VEC_ELT (*group, j, elmt)
++ isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0);
++ }
++}
++
++/* It's not a strict analysis of load/store profit. Assume scalar and vector
++ load/store are of the same cost. The result PROFIT equals profit form
++ vectorizing of scalar loads/stores minus cost of a vectorized load/store. */
++
++static int
++load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops)
++{
++ int profit = 0;
++ enum vect_cost_for_stmt kind = scalar_load;
++ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++ profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost;
++ profit -= new_mem_ops / vf * scalar_cost;
++ kind = scalar_store;
++ scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++ profit -= new_mem_ops / vf * scalar_cost;
++ return profit;
++}
++
++/* Breadth first search the graph consisting of define-use chain starting from
++ the circular queue initialized by function BUILD_QUEUE. Find single use of
++ each stmt in group and check if they are isomorphic. Isomorphic is defined
++ as same rhs type, same operator, and isomorphic calculation of each rhs
++ starting from load. If another rhs is uncertain to be isomorphic, put it
++ at the end of circular queue and re-analyze it during the next iteration.
++ If a group shares the same use_stmt with another group, skip one of them in
++ succeedor prcoesses as merged. Iterate the circular queue until all
++ remianing groups heterogeneous or reaches MEN stmts. If all other groups
++ have finishes the analysis, and the remaining groups are uncertain,
++ return false to avoid endless loop. */
++
++bool
++bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue,
++ stmts_profit &profit_pair, unsigned vf,
++ bool &reach_vdef)
++{
++ isomer_stmt_lhs isomer_lhs;
++ auto_vec<tree> hetero_lhs;
++ auto_vec<unsigned> merged_groups;
++ vec<group_info> *group = NULL;
++ /* True if analysis finishes. */
++ bool done = false;
++ int profit_sum = 0;
++ vec<gimple *> *stmts = NULL;
++ init_isomer_lhs (circular_queue, isomer_lhs);
++ for (unsigned i = 1; !done; ++i)
++ {
++ unsigned front = 0;
++ /* Re-initialize DONE to TRUE while a new iteration begins. */
++ done = true;
++ while (front < circular_queue.length ())
++ {
++ int profit = 0;
++ group = circular_queue[front];
++ done &= check_group (group, front, i, profit, merged_groups,
++ isomer_lhs, hetero_lhs);
++ profit_sum += profit;
++ if (profit != 0 && (*group)[0]->stmt == NULL)
++ {
++ reach_vdef = true;
++ return false;
++ }
++ ++front;
++ }
++ /* Uncertain result, return. */
++ if (!done && end_of_search (circular_queue, merged_groups))
++ return false;
++ }
++ if (check_any_cutpoints (circular_queue, stmts, merged_groups))
++ {
++ profit_pair.first = stmts;
++ unsigned loads = circular_queue.length () * circular_queue[0]->length ();
++ profit_pair.second = profit_sum + load_store_profit (loads, vf,
++ stmts->length ());
++ if (profit_pair.second > 0)
++ return true;
++ }
++ return false;
++}
++
++/* Free memory allocated by ginfo. */
++
++static void
++free_ginfos (vec<vec<group_info> *> &worklists)
++{
++ vec<group_info> *worklist;
++ unsigned i = 0;
++ while (i < worklists.length ())
++ {
++ worklist = worklists[i++];
++ group_info ginfo;
++ unsigned j = 0;
++ FOR_EACH_VEC_ELT (*worklist, j, ginfo)
++ delete ginfo;
++ }
++}
++
++static void
++release_tmp_stmts (vf_stmts_profit_map &candi_stmts)
++{
++ vf_stmts_profit_map::iterator iter;
++ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
++ iter->second.first->release ();
++}
++
++/* Choose the group of stmt with maximun profit. */
++
++static bool
++decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts)
++{
++ vf_stmts_profit_map::iterator iter;
++ int profit = 0;
++ int max = 0;
++ vec<gimple *> *tmp = NULL;
++ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
++ {
++ profit = iter->second.second;
++ if (profit > max)
++ {
++ tmp = iter->second.first;
++ max = profit;
++ }
++ }
++ if (max == 0)
++ {
++ release_tmp_stmts (candi_stmts);
++ return false;
++ }
++ unsigned i = 0;
++ gimple *stmt = NULL;
++ FOR_EACH_VEC_ELT (*tmp, i, stmt)
++ stmts.safe_push (stmt);
++ release_tmp_stmts (candi_stmts);
++ return stmts.length () != 0;
++}
++
++/* Find isomorphic stmts from grouped loads with vector factor VF.
++
++ Given source code as follows and ignore casting.
++
++ a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16);
++ a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16);
++ a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16);
++ a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16);
++
++ We get grouped loads in VINFO as
++
++ GROUP_1 GROUP_2
++ _1 = *a _11 = *b
++ _2 = *(a + 1) _12 = *(b + 1)
++ _3 = *(a + 2) _13 = *(b + 2)
++ _4 = *(a + 3) _14 = *(b + 3)
++ _5 = *(a + 4) _15 = *(b + 4)
++ _6 = *(a + 5) _16 = *(b + 5)
++ _7 = *(a + 6) _17 = *(b + 6)
++ _8 = *(a + 7) _18 = *(b + 7)
++
++ First we try VF = 8, we get two worklists
++
++ WORKLIST_1 WORKLIST_2
++ _1 = *a _11 = *b
++ _2 = *(a + 1) _12 = *(b + 1)
++ _3 = *(a + 2) _13 = *(b + 2)
++ _4 = *(a + 3) _14 = *(b + 3)
++ _5 = *(a + 4) _15 = *(b + 4)
++ _6 = *(a + 5) _16 = *(b + 5)
++ _7 = *(a + 6) _17 = *(b + 6)
++ _8 = *(a + 7) _18 = *(b + 7)
++
++ We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic,
++ so we try VF = VF / 2.
++
++ GROUP_1 GROUP_2
++ _1 = *a _5 = *(a + 4)
++ _2 = *(a + 1) _6 = *(a + 5)
++ _3 = *(a + 2) _7 = *(a + 6)
++ _4 = *(a + 3) _8 = *(a + 7)
++
++ GROUP_3 GROUP_4
++ _11 = *b _15 = *(b + 4)
++ _12 = *(b + 1) _16 = *(b + 5)
++ _13 = *(b + 2) _17 = *(b + 6)
++ _14 = *(b + 3) _18 = *(b + 7)
++
++ We first analyze group_1, and find all operations are isomorphic, then
++ replace stmts in group_1 with their use_stmts. Group_2 as well.
++
++ GROUP_1 GROUP_2
++ _111 = _1 + _11 _115 = _5 - _15
++ _112 = _2 + _12 _116 = _6 - _16
++ _113 = _3 + _13 _117 = _7 - _17
++ _114 = _4 + _14 _118 = _8 - _18
++
++ When analyzing group_3 and group_4, we find their use_stmts are the same
++ as group_1 and group_2. So group_3 is regarded as being merged to group_1
++ and group_4 being merged to group_2. In future procedures, we will skip
++ group_3 and group_4.
++
++ We repeat such processing until opreations are not isomorphic or searching
++ reaches MEM stmts. In our given case, searching end up at a0, a1, a2 and
++ a3. */
++
++static bool
++find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
++{
++ unsigned vf = get_max_vf (vinfo);
++ if (vf == 0)
++ return false;
++ auto_vec<vec<group_info> *> circular_queue;
++ /* Map of vector factor and corresponding vectorizing profit. */
++ stmts_profit profit_map;
++ /* Map of cut_points and vector factor. */
++ vf_stmts_profit_map candi_stmts;
++ bool reach_vdef = false;
++ while (vf > 2)
++ {
++ if (build_queue (vinfo, vf, circular_queue) == 0)
++ return false;
++ if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef))
++ {
++ if (reach_vdef)
++ {
++ release_tmp_stmts (candi_stmts);
++ free_ginfos (circular_queue);
++ circular_queue.release ();
++ return false;
++ }
++ vf /= 2;
++ free_ginfos (circular_queue);
++ circular_queue.release ();
++ continue;
++ }
++ candi_stmts[vf] = profit_map;
++ free_ginfos (circular_queue);
++ vf /= 2;
++ circular_queue.release ();
++ }
++ return decide_stmts_by_profit (candi_stmts, stmts);
++}
++
+ /* Distributes the code from LOOP in such a way that producer statements
+ are placed before consumer statements. Tries to separate only the
+ statements from STMTS into separate loops. Returns the number of
+diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+index 7990e31de..1e332d3c5 100644
+--- a/gcc/tree-vect-loop.c
++++ b/gcc/tree-vect-loop.c
+@@ -2516,9 +2516,11 @@ vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts)
+
+ Apply a set of analyses on LOOP, and create a loop_vec_info struct
+ for it. The different analyses will record information in the
+- loop_vec_info struct. */
++ loop_vec_info struct. When RESULT_ONLY_P is true, quit analysis
++ if loop is vectorizable, otherwise, do not delete vinfo.*/
+ opt_loop_vec_info
+-vect_analyze_loop (class loop *loop, vec_info_shared *shared)
++vect_analyze_loop (class loop *loop, vec_info_shared *shared,
++ bool result_only_p)
+ {
+ auto_vector_modes vector_modes;
+
+@@ -2545,6 +2547,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ unsigned n_stmts = 0;
+ machine_mode autodetected_vector_mode = VOIDmode;
+ opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
++ /* Loop_vinfo for loop-distribution pass. */
++ opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL);
+ machine_mode next_vector_mode = VOIDmode;
+ poly_uint64 lowest_th = 0;
+ unsigned vectorized_loops = 0;
+@@ -2633,6 +2637,13 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ if (res)
+ {
+ LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
++ /* In loop-distribution pass, we only need to get loop_vinfo, do not
++ conduct further operations. */
++ if (result_only_p)
++ {
++ loop->aux = (loop_vec_info) loop_vinfo;
++ return loop_vinfo;
++ }
+ vectorized_loops++;
+
+ /* Once we hit the desired simdlen for the first time,
+@@ -2724,7 +2735,19 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ }
+ else
+ {
+- delete loop_vinfo;
++ /* If current analysis shows LOOP is unable to vectorize, loop_vinfo
++ will be deleted. If LOOP is under ldist analysis, backup it before
++ it is deleted and return it if all modes are analyzed and still
++ fail to vectorize. */
++ if (result_only_p && (mode_i == vector_modes.length ()
++ || autodetected_vector_mode == VOIDmode))
++ {
++ fail_loop_vinfo = loop_vinfo;
++ }
++ else
++ {
++ delete loop_vinfo;
++ }
+ if (fatal)
+ {
+ gcc_checking_assert (first_loop_vinfo == NULL);
+@@ -2773,6 +2796,14 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ return first_loop_vinfo;
+ }
+
++ /* Return loop_vinfo for ldist if loop is unvectorizable. */
++ if (result_only_p && (mode_i == vector_modes.length ()
++ || autodetected_vector_mode == VOIDmode))
++ {
++ loop->aux = (loop_vec_info) fail_loop_vinfo;
++ return fail_loop_vinfo;
++ }
++
+ return opt_loop_vec_info::propagate_failure (res);
+ }
+
+diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+index 1c4a6c421..dc8175f00 100644
+--- a/gcc/tree-vectorizer.h
++++ b/gcc/tree-vectorizer.h
+@@ -1896,7 +1896,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
+ enum tree_code);
+ extern bool needs_fold_left_reduction_p (tree, tree_code);
+ /* Drive for loop analysis stage. */
+-extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *);
++extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *,
++ bool result_only_p = false);
+ extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
+ extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
+ tree *, bool);
+--
+2.27.0.windows.1
+