diff options
Diffstat (limited to '0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch')
| -rw-r--r-- | 0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch | 1007 | 
1 files changed, 1007 insertions, 0 deletions
diff --git a/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch b/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch new file mode 100644 index 0000000..baeff90 --- /dev/null +++ b/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch @@ -0,0 +1,1007 @@ +From d334ec1579fb0668da5e23ced3b782d7f6f35d77 Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji <linda7@huawei.com> +Date: Mon, 17 Oct 2022 17:21:57 +0800 +Subject: [PATCH 30/35] [Loop-distribution] Add isomorphic stmts analysis + +Use option -ftree-slp-transpose-vectorize + +Check if loop is vectorizable before analysis.  For unvectorizable +loops, try to find isomorphic stmts from grouped load as new seed stmts +for distribution. +--- + gcc/tree-loop-distribution.c | 858 +++++++++++++++++++++++++++++++++++ + gcc/tree-vect-loop.c         |  37 +- + gcc/tree-vectorizer.h        |   3 +- + 3 files changed, 894 insertions(+), 4 deletions(-) + +diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c +index 888af4894..c08af6562 100644 +--- a/gcc/tree-loop-distribution.c ++++ b/gcc/tree-loop-distribution.c +@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3.  If not see + 	data reuse.  */ +  + #include "config.h" ++#define INCLUDE_MAP ++#define INCLUDE_ALGORITHM + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -115,6 +117,7 @@ along with GCC; see the file COPYING3.  If not see + #include "tree-vectorizer.h" + #include "tree-eh.h" + #include "gimple-fold.h" ++#include "optabs-tree.h" +  +  + #define MAX_DATAREFS_NUM \ +@@ -183,6 +186,52 @@ struct rdg_vertex + #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) + #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) +  ++/* Results of isomorphic group analysis.  */ ++#define UNINITIALIZED	(0) ++#define ISOMORPHIC	(1) ++#define HETEROGENEOUS	(1 << 1) ++#define UNCERTAIN	(1 << 2) ++ ++/* Information of a stmt while analyzing isomorphic use in group.  */ ++ ++typedef struct _group_info ++{ ++  gimple *stmt; ++ ++  /* True if stmt can be a cut point.  */ ++  bool cut_point; ++ ++  /* For use_stmt with two rhses, one of which is the lhs of stmt. ++     If the other is unknown to be isomorphic, mark it uncertain.  */ ++  bool uncertain; ++ ++  /* Searching of isomorphic stmt reaches heterogeneous groups or reaches ++     MEM stmts.  */ ++  bool done; ++ ++  _group_info () ++    { ++      stmt = NULL; ++      cut_point = false; ++      uncertain = false; ++      done = false; ++    } ++} *group_info; ++ ++/* PAIR of cut points and corresponding profit.  */ ++typedef std::pair<vec<gimple *> *, int> stmts_profit; ++ ++/* MAP of vector factor VF and corresponding stmts_profit PAIR.  */ ++typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map; ++ ++/* PAIR of group_num and iteration_num.  We consider rhses from the same ++   group and interation are isomorphic.  */ ++typedef std::pair<unsigned, unsigned> group_iteration; ++ ++/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and ++   the iteration_num when we insert this stmt to this map.  */ ++typedef std::map<tree, group_iteration> isomer_stmt_lhs; ++ + /* Data dependence type.  */ +  + enum rdg_dep_type +@@ -640,6 +689,18 @@ class loop_distribution +   void finalize_partitions (class loop *loop, vec<struct partition *> + 			    *partitions, vec<ddr_p> *alias_ddrs); +  ++  /* Analyze loop form and if it's vectorizable to decide if we need to ++     insert temp arrays to distribute it.  */ ++  bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg, ++			       control_dependences *cd); ++ ++  /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1.  */ ++  void reset_gimple_uid (loop_p loop); ++ ++  bool check_loop_vectorizable (loop_p loop); ++ ++  inline void rebuild_rdg (loop_p loop, struct graph *&rdg, ++			   control_dependences *cd); +   /* Distributes the code from LOOP in such a way that producer statements +      are placed before consumer statements.  Tries to separate only the +      statements from STMTS into separate loops.  Returns the number of +@@ -2900,6 +2961,803 @@ loop_distribution::finalize_partitions (class loop *loop, +     fuse_memset_builtins (partitions); + } +  ++/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function ++   vect_analyze_loop, reset them to -1.  */ ++ ++void ++loop_distribution::reset_gimple_uid (loop_p loop) ++{ ++  basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++						    bb_top_order_cmp_r); ++  for (int i = 0; i < int (loop->num_nodes); i++) ++    { ++      basic_block bb = bbs[i]; ++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++	   gsi_next (&gsi)) ++	{ ++	  gimple *stmt = gsi_stmt (gsi); ++	  if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL) ++	    gimple_set_uid (stmt, -1); ++	} ++    } ++  free (bbs); ++} ++ ++bool ++loop_distribution::check_loop_vectorizable (loop_p loop) ++{ ++  vec_info_shared shared; ++  vect_analyze_loop (loop, &shared, true); ++  loop_vec_info vinfo = loop_vec_info_for_loop (loop); ++  reset_gimple_uid (loop); ++  if (vinfo == NULL) ++    { ++      if (dump_file && (dump_flags & TDF_DETAILS)) ++	fprintf (dump_file, ++		 "Loop %d no temp array insertion: bad data access pattern," ++		 " unable to generate loop_vinfo.\n", loop->num); ++      return false; ++    } ++  if (vinfo->vectorizable) ++    { ++      if (dump_file && (dump_flags & TDF_DETAILS)) ++	fprintf (dump_file, "Loop %d no temp array insertion: original loop" ++			    " can be vectorized without distribution.\n", ++			    loop->num); ++      delete vinfo; ++      loop->aux = NULL; ++      return false; ++    } ++  if (vinfo->grouped_loads.length () == 0) ++    { ++      if (dump_file && (dump_flags & TDF_DETAILS)) ++	fprintf (dump_file, "Loop %d no temp array insertion: original loop" ++			    " has no grouped loads.\n" , loop->num); ++      delete vinfo; ++      loop->aux = NULL; ++      return false; ++    } ++  return true; ++} ++ ++inline void ++loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg, ++				control_dependences *cd) ++{ ++  free_rdg (rdg); ++  rdg = build_rdg (loop, cd); ++  gcc_checking_assert (rdg != NULL); ++} ++ ++bool ++loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg, ++					   control_dependences *cd) ++{ ++  if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize)) ++    return false; ++ ++  /* Only loops with two basic blocks HEADER and LATCH are supported.  HEADER ++     is the main body of a LOOP and LATCH is the basic block that controls the ++     LOOP execution.  Size of temp array is determined by loop execution time, ++     so it must be a const.  */ ++  tree loop_extent = number_of_latch_executions (loop); ++  if (loop->inner != NULL || loop->num_nodes > 2 ++      || rdg->n_vertices > param_slp_max_insns_in_bb ++      || TREE_CODE (loop_extent) != INTEGER_CST) ++    { ++      if (dump_file && (dump_flags & TDF_DETAILS)) ++	fprintf (dump_file, "Loop %d: no temp array insertion: bad loop" ++			    " form.\n", loop->num); ++      return false; ++    } ++ ++  if (loop->dont_vectorize) ++    { ++      if (dump_file && (dump_flags & TDF_DETAILS)) ++	fprintf (dump_file, "Loop %d: no temp array insertion: this loop" ++			    " should never be vectorized.\n", ++			    loop->num); ++      return false; ++    } ++ ++  /* Do not distribute a LOOP that is able to be vectorized without ++     distribution.  */ ++  if (!check_loop_vectorizable (loop)) ++    { ++      rebuild_rdg (loop, rdg, cd); ++      return false; ++    } ++ ++  rebuild_rdg (loop, rdg, cd); ++  return true; ++} ++ ++/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n. ++   Otherwise, return 0.  */ ++ ++static unsigned ++get_max_vf (loop_vec_info vinfo) ++{ ++  unsigned size = 0; ++  unsigned max = 0; ++  stmt_vec_info stmt_info; ++  unsigned i = 0; ++  FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) ++    { ++      size = stmt_info->size; ++      if (!pow2p_hwi (size)) ++	return 0; ++      max = size > max ? size : max; ++    } ++  return max; ++} ++ ++/* Convert grouped_loads from linked list to vector with length vf.  Init ++   group_info of each stmt in the same group and put then into a vector.  And ++   these vectors consist WORKLISTS.  We will re-analyze a group if it is ++   uncertain, so we regard WORKLISTS as a circular queue.  */ ++ ++static unsigned ++build_queue (loop_vec_info vinfo, unsigned vf, ++	     vec<vec<group_info> *> &worklists) ++{ ++  stmt_vec_info stmt_info; ++  unsigned i = 0; ++  group_info ginfo = NULL; ++  vec<group_info> *worklist = NULL; ++  FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) ++    { ++      unsigned group_size = stmt_info->size; ++      stmt_vec_info c_stmt_info = stmt_info; ++      while (group_size >= vf) ++	{ ++	  vec_alloc (worklist, vf); ++	  for (unsigned j = 0; j < vf; ++j) ++	    { ++	      ginfo = new _group_info (); ++	      ginfo->stmt = c_stmt_info->stmt; ++	      worklist->safe_push (ginfo); ++	      c_stmt_info = c_stmt_info->next_element; ++	    } ++	  worklists.safe_push (worklist); ++	  group_size -= vf; ++	} ++    } ++  return worklists.length (); ++} ++ ++static bool ++check_same_oprand_type (tree op1, tree op2) ++{ ++  tree type1 = TREE_TYPE (op1); ++  tree type2 = TREE_TYPE (op2); ++  if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE) ++    return false; ++ ++  return (TREE_CODE (type1) == TREE_CODE (type2) ++	  && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2) ++	  && TYPE_PRECISION (type1) == TYPE_PRECISION (type2)); ++} ++ ++static bool ++bit_field_p (gimple *stmt) ++{ ++  unsigned i = 0; ++  auto_vec<data_reference_p, 2> datarefs_vec; ++  data_reference_p dr; ++  if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec)) ++    return true; ++ ++  FOR_EACH_VEC_ELT (datarefs_vec, i, dr) ++    { ++      if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF ++	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) ++	return true; ++    } ++  return false; ++} ++ ++static inline bool ++shift_operation (enum tree_code op) ++{ ++  return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR ++	 || op == RROTATE_EXPR; ++} ++ ++/* Return relationship between USE_STMT and the first use_stmt of the group. ++   RHS1 is the lhs of stmt recorded in group_info.  If another rhs of use_stmt ++   is not a constant, return UNCERTAIN and re-check it later.  */ ++ ++static unsigned ++check_isomorphic (gimple *use_stmt, gimple *first, ++		  tree rhs1, vec<tree> &hetero_lhs) ++{ ++  /* Check same operation.  */ ++  enum tree_code rhs_code_first = gimple_assign_rhs_code (first); ++  enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt); ++  if (rhs_code_first != rhs_code_current) ++    return HETEROGENEOUS; ++ ++  /* For shift operations, oprands should be equal.  */ ++  if (shift_operation (rhs_code_current)) ++    { ++      tree shift_op_first = gimple_assign_rhs2 (first); ++      tree shift_op_current = gimple_assign_rhs2 (use_stmt); ++      if (!operand_equal_p (shift_op_first, shift_op_current, 0) ++	  || !TREE_CONSTANT (shift_op_first)) ++	return HETEROGENEOUS; ++ ++      return ISOMORPHIC; ++    } ++  /* Type convertion expr or assignment.  */ ++  if (gimple_num_ops (first) == 2) ++    return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR ++	      || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS; ++ ++  /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and ++     the other one as rhs2.  Check if define-stmt of current rhs2 is isomorphic ++     with define-stmt of rhs2 in the first USE_STMT at this group.  */ ++  tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1 ++		    ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first); ++  tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1 ++	      ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt); ++ ++  if (check_same_oprand_type (rhs2_first, rhs2_curr)) ++    { ++      if (TREE_CONSTANT (rhs2_curr)) ++	return ISOMORPHIC; ++      else if (hetero_lhs.contains (rhs2_curr)) ++	return HETEROGENEOUS; ++ ++      /* Provisionally set the stmt as uncertain and analyze the whole group ++	 in function CHECK_UNCERTAIN later if all use_stmts are uncertain.  */ ++      return UNCERTAIN; ++    } ++  return HETEROGENEOUS; ++} ++ ++static bool ++unsupported_operations (gimple *stmt) ++{ ++  enum tree_code code = gimple_assign_rhs_code (stmt); ++  return code == COND_EXPR; ++} ++ ++/* Check if the single use_stmt of STMT is isomorphic with the first one's ++   use_stmt in current group.  */ ++ ++static unsigned ++check_use_stmt (group_info elmt, gimple *&first, ++		vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs) ++{ ++  if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN) ++    return HETEROGENEOUS; ++  use_operand_p dummy; ++  tree lhs = gimple_assign_lhs (elmt->stmt); ++  gimple *use_stmt = NULL; ++  single_imm_use (lhs, &dummy, &use_stmt); ++  /* STMTs with three rhs are not supported, e.g., GIMPLE_COND.  */ ++  if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN ++      || unsupported_operations (use_stmt) || bit_field_p (use_stmt)) ++    return HETEROGENEOUS; ++  tmp_stmts.safe_push (use_stmt); ++  if (first == NULL) ++    { ++      first = use_stmt; ++      return UNINITIALIZED; ++    } ++  /* Check if current use_stmt and the first menber's use_stmt in the group ++     are of the same type.  */ ++  tree first_lhs = gimple_assign_lhs (first); ++  tree curr_lhs = gimple_assign_lhs (use_stmt); ++  if (!check_same_oprand_type (first_lhs, curr_lhs)) ++    return HETEROGENEOUS; ++  return check_isomorphic (use_stmt, first, lhs, hetero_lhs); ++} ++ ++/* Replace stmt field in group with stmts in TMP_STMTS, and insert their ++   lhs_info to ISOMER_LHS.  */ ++ ++static void ++update_isomer_lhs (vec<group_info> *group, unsigned group_num, ++		   unsigned iteration, isomer_stmt_lhs &isomer_lhs, ++		   vec<gimple *> tmp_stmts, int &profit, ++		   vec<unsigned> &merged_groups) ++{ ++  group_info elmt = NULL; ++  /* Do not insert temp array if isomorphic stmts from grouped load have ++     only casting operations.  Once isomorphic calculation has 3 oprands, ++     such as plus operation, this group can be regarded as cut point.  */ ++  bool operated = (gimple_num_ops (tmp_stmts[0]) == 3); ++  /* Do not insert temp arrays if search of iosomophic stmts reaches ++     MEM stmts.  */ ++  bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL; ++  bool merge = false; ++  for (unsigned i = 0; i < group->length (); i++) ++    { ++      elmt = (*group)[i]; ++      elmt->stmt = has_vdef ? NULL : tmp_stmts[i]; ++      elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated); ++      elmt->uncertain = false; ++      elmt->done = has_vdef; ++      tree lhs = gimple_assign_lhs (tmp_stmts[i]); ++      if (isomer_lhs.find (lhs) != isomer_lhs.end ()) ++	{ ++	  merge = true; ++	  continue; ++	} ++      isomer_lhs[lhs] = std::make_pair (group_num, iteration); ++    } ++  if (merge) ++    { ++      merged_groups.safe_push (group_num); ++      profit = 0; ++      return; ++    } ++  enum vect_cost_for_stmt kind = scalar_stmt; ++  int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++  profit = (tmp_stmts.length () - 1) * scalar_cost; ++} ++ ++/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num ++   and iteration are same, GROUP is isomorphic.  */ ++ ++static unsigned ++check_isomorphic_rhs (vec<group_info> *group, vec<gimple *> &tmp_stmts, ++		      isomer_stmt_lhs &isomer_lhs) ++{ ++  group_info elmt = NULL; ++  gimple *stmt = NULL; ++  unsigned j = 0; ++  unsigned group_num = -1u; ++  unsigned iteration = -1u; ++  tree rhs1 = NULL; ++  tree rhs2 = NULL; ++  unsigned status = UNINITIALIZED; ++  FOR_EACH_VEC_ELT (*group, j, elmt) ++    { ++      rhs1 = gimple_assign_lhs (elmt->stmt); ++      stmt = tmp_stmts[j]; ++      rhs2 = (rhs1 == gimple_assign_rhs1 (stmt)) ++	     ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt); ++      isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2); ++      if (iter != isomer_lhs.end ()) ++	{ ++	  if (group_num == -1u) ++	    { ++	      group_num = iter->second.first; ++	      iteration = iter->second.second; ++	      status |= ISOMORPHIC; ++	      continue; ++	    } ++	  if (iter->second.first == group_num ++	      && iter->second.second == iteration) ++	    { ++	      status |= ISOMORPHIC; ++	      continue; ++	    } ++	  return HETEROGENEOUS; ++	} ++      else ++	status |= UNCERTAIN; ++    } ++  return status; ++} ++ ++/* Update group_info for uncertain groups.  */ ++ ++static void ++update_uncertain_stmts (vec<group_info> *group, unsigned group_num, ++			 unsigned iteration, vec<gimple *> &tmp_stmts) ++{ ++  unsigned j = 0; ++  group_info elmt = NULL; ++  FOR_EACH_VEC_ELT (*group, j, elmt) ++    { ++      elmt->uncertain = true; ++      elmt->done = false; ++    } ++} ++ ++/* Push stmts in TMP_STMTS into HETERO_LHS.  */ ++ ++static void ++set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs, ++	    vec<gimple *> &tmp_stmts) ++{ ++  group_info elmt = NULL; ++  unsigned i = 0; ++  for (i = 0; i < group->length (); i++) ++    { ++      elmt = (*group)[i]; ++      elmt->uncertain = false; ++      elmt->done = true; ++    } ++  gimple *stmt = NULL; ++  FOR_EACH_VEC_ELT (tmp_stmts, i, stmt) ++    if (stmt != NULL) ++      hetero_lhs.safe_push (gimple_assign_lhs (stmt)); ++} ++ ++/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP. ++   Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT. ++ ++   Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num ++   and iteration, this uncertain group is isomorphic. ++ ++   If no rhs matched, this GROUP remains uncertain and update group_info. ++ ++   Otherwise, this GROUP is heterogeneous and return true to end analysis ++   for this group.  */ ++ ++static bool ++check_uncertain (vec<group_info> *group, unsigned group_num, ++		 unsigned iteration, int &profit, ++		 vec<gimple *> &tmp_stmts, isomer_stmt_lhs &isomer_lhs, ++		 vec<tree> &hetero_lhs, vec<unsigned> &merged_groups) ++{ ++  unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs); ++  bool done = false; ++  switch (status) ++    { ++      case UNCERTAIN: ++	update_uncertain_stmts (group, group_num, iteration, tmp_stmts); ++	break; ++      case ISOMORPHIC: ++	update_isomer_lhs (group, group_num, iteration, isomer_lhs, ++			   tmp_stmts, profit, merged_groups); ++	break; ++      default: ++	set_hetero (group, hetero_lhs, tmp_stmts); ++	done = true; ++    } ++  return done; ++} ++ ++/* Return false if analysis of this group is not finished, e.g., isomorphic or ++   uncertain.  Calculate the profit if vectorized.  */ ++ ++static bool ++check_group (vec<group_info> *group, unsigned group_num, unsigned iteration, ++	     int &profit, vec<unsigned> &merged_groups, ++	     isomer_stmt_lhs &isomer_lhs, vec<tree> &hetero_lhs) ++{ ++  unsigned j = 0; ++  group_info elmt = NULL; ++  gimple *first = NULL; ++  unsigned res = 0; ++  /* Record single use stmts in TMP_STMTS and decide whether replace stmts in ++     ginfo in succeeding processes.  */ ++  auto_vec<gimple *> tmp_stmts; ++  FOR_EACH_VEC_ELT (*group, j, elmt) ++    { ++      if (merged_groups.contains (group_num)) ++	return true; ++      res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs); ++    } ++ ++  /* Update each group member according to RES.  */ ++  switch (res) ++    { ++      case ISOMORPHIC: ++	update_isomer_lhs (group, group_num, iteration, isomer_lhs, ++			   tmp_stmts, profit, merged_groups); ++	return false; ++      case UNCERTAIN: ++	return check_uncertain (group, group_num, iteration, profit, ++				tmp_stmts, isomer_lhs, hetero_lhs, ++				merged_groups); ++      default: ++	set_hetero (group, hetero_lhs, tmp_stmts); ++	return true; ++    } ++} ++ ++/* Return true if all analysises are done except uncertain groups.  */ ++ ++static bool ++end_of_search (vec<vec<group_info> *> &circular_queue, ++	       vec<unsigned> &merged_groups) ++{ ++  unsigned i = 0; ++  vec<group_info> *group = NULL; ++  group_info elmt = NULL; ++  FOR_EACH_VEC_ELT (circular_queue, i, group) ++    { ++      if (merged_groups.contains (i)) ++	continue; ++      elmt = (*group)[0]; ++      /* If there is any isomorphic use_stmts, continue analysis of isomorphic ++	 use_stmts.  */ ++      if (!elmt->done && !elmt->uncertain) ++	return false; ++    } ++  return true; ++} ++ ++/* Push valid stmts to STMTS as cutpoints.  */ ++ ++static bool ++check_any_cutpoints (vec<vec<group_info> *> &circular_queue, ++		     vec<gimple *> *&stmts, vec<unsigned> &merged_groups) ++{ ++  unsigned front = 0; ++  vec<group_info> *group = NULL; ++  group_info elmt = NULL; ++  unsigned max = circular_queue.length () * circular_queue[0]->length (); ++  vec_alloc (stmts, max); ++  while (front < circular_queue.length ()) ++    { ++      unsigned i = 0; ++      if (merged_groups.contains (front)) ++	{ ++	  front++; ++	  continue; ++	} ++      group = circular_queue[front++]; ++      FOR_EACH_VEC_ELT (*group, i, elmt) ++	if (elmt->stmt != NULL && elmt->done && elmt->cut_point) ++	  stmts->safe_push (elmt->stmt); ++    } ++  return stmts->length () != 0; ++} ++ ++/* Grouped loads are isomorphic.  Make pair for group number and iteration, ++   map load stmt to this pair.  We set iteration 0 here.  */ ++ ++static void ++init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs) ++{ ++  vec<group_info> *group = NULL; ++  group_info elmt = NULL; ++  unsigned i = 0; ++  FOR_EACH_VEC_ELT (groups, i, group) ++    { ++      unsigned j = 0; ++      FOR_EACH_VEC_ELT (*group, j, elmt) ++	isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0); ++    } ++} ++ ++/* It's not a strict analysis of load/store profit.  Assume scalar and vector ++   load/store are of the same cost.  The result PROFIT equals profit form ++   vectorizing of scalar loads/stores minus cost of a vectorized load/store.  */ ++ ++static int ++load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops) ++{ ++  int profit = 0; ++  enum vect_cost_for_stmt kind = scalar_load; ++  int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++  profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost; ++  profit -= new_mem_ops / vf * scalar_cost; ++  kind = scalar_store; ++  scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++  profit -= new_mem_ops / vf * scalar_cost; ++  return profit; ++} ++ ++/* Breadth first search the graph consisting of define-use chain starting from ++   the circular queue initialized by function BUILD_QUEUE.  Find single use of ++   each stmt in group and check if they are isomorphic.  Isomorphic is defined ++   as same rhs type, same operator, and isomorphic calculation of each rhs ++   starting from load.  If another rhs is uncertain to be isomorphic, put it ++   at the end of circular queue and re-analyze it during the next iteration. ++   If a group shares the same use_stmt with another group, skip one of them in ++   succeedor prcoesses as merged.  Iterate the circular queue until all ++   remianing groups heterogeneous or reaches MEN stmts.  If all other groups ++   have finishes the analysis, and the remaining groups are uncertain, ++   return false to avoid endless loop.  */ ++ ++bool ++bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue, ++		       stmts_profit &profit_pair, unsigned vf, ++		       bool &reach_vdef) ++{ ++  isomer_stmt_lhs isomer_lhs; ++  auto_vec<tree> hetero_lhs; ++  auto_vec<unsigned> merged_groups; ++  vec<group_info> *group = NULL; ++  /* True if analysis finishes.  */ ++  bool done = false; ++  int profit_sum = 0; ++  vec<gimple *> *stmts = NULL; ++  init_isomer_lhs (circular_queue, isomer_lhs); ++  for (unsigned i = 1; !done; ++i) ++    { ++      unsigned front = 0; ++      /* Re-initialize DONE to TRUE while a new iteration begins.  */ ++      done = true; ++      while (front < circular_queue.length ()) ++	{ ++	  int profit = 0; ++	  group = circular_queue[front]; ++	  done &= check_group (group, front, i, profit, merged_groups, ++			       isomer_lhs, hetero_lhs); ++	  profit_sum += profit; ++	  if (profit != 0 && (*group)[0]->stmt == NULL) ++	    { ++	      reach_vdef = true; ++	      return false; ++	    } ++	  ++front; ++	} ++      /* Uncertain result, return.  */ ++      if (!done && end_of_search (circular_queue, merged_groups)) ++	return false; ++    } ++  if (check_any_cutpoints (circular_queue, stmts, merged_groups)) ++    { ++      profit_pair.first = stmts; ++      unsigned loads = circular_queue.length () * circular_queue[0]->length (); ++      profit_pair.second = profit_sum + load_store_profit (loads, vf, ++							   stmts->length ()); ++      if (profit_pair.second > 0) ++	return true; ++    } ++  return false; ++} ++ ++/* Free memory allocated by ginfo.  */ ++ ++static void ++free_ginfos (vec<vec<group_info> *> &worklists) ++{ ++  vec<group_info> *worklist; ++  unsigned i = 0; ++  while (i < worklists.length ()) ++    { ++      worklist = worklists[i++]; ++      group_info ginfo; ++      unsigned j = 0; ++      FOR_EACH_VEC_ELT (*worklist, j, ginfo) ++	delete ginfo; ++    } ++} ++ ++static void ++release_tmp_stmts (vf_stmts_profit_map &candi_stmts) ++{ ++  vf_stmts_profit_map::iterator iter; ++  for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) ++    iter->second.first->release (); ++} ++ ++/* Choose the group of stmt with maximun profit.  */ ++ ++static bool ++decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts) ++{ ++  vf_stmts_profit_map::iterator iter; ++  int profit = 0; ++  int max = 0; ++  vec<gimple *> *tmp = NULL; ++  for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) ++    { ++      profit = iter->second.second; ++      if (profit > max) ++	{ ++	  tmp = iter->second.first; ++	  max = profit; ++	} ++    } ++  if (max == 0) ++    { ++      release_tmp_stmts (candi_stmts); ++      return false; ++    } ++  unsigned i = 0; ++  gimple *stmt = NULL; ++  FOR_EACH_VEC_ELT (*tmp, i, stmt) ++    stmts.safe_push (stmt); ++  release_tmp_stmts (candi_stmts); ++  return stmts.length () != 0; ++} ++ ++/* Find isomorphic stmts from grouped loads with vector factor VF. ++ ++   Given source code as follows and ignore casting. ++ ++   a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16); ++   a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16); ++   a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16); ++   a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16); ++ ++   We get grouped loads in VINFO as ++ ++   GROUP_1		GROUP_2 ++   _1 = *a		_11 = *b ++   _2 = *(a + 1)	_12 = *(b + 1) ++   _3 = *(a + 2)	_13 = *(b + 2) ++   _4 = *(a + 3)	_14 = *(b + 3) ++   _5 = *(a + 4)	_15 = *(b + 4) ++   _6 = *(a + 5)	_16 = *(b + 5) ++   _7 = *(a + 6)	_17 = *(b + 6) ++   _8 = *(a + 7)	_18 = *(b + 7) ++ ++   First we try VF = 8, we get two worklists ++ ++   WORKLIST_1		WORKLIST_2 ++   _1 = *a		_11 = *b ++   _2 = *(a + 1)	_12 = *(b + 1) ++   _3 = *(a + 2)	_13 = *(b + 2) ++   _4 = *(a + 3)	_14 = *(b + 3) ++   _5 = *(a + 4)	_15 = *(b + 4) ++   _6 = *(a + 5)	_16 = *(b + 5) ++   _7 = *(a + 6)	_17 = *(b + 6) ++   _8 = *(a + 7)	_18 = *(b + 7) ++ ++   We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic, ++   so we try VF = VF / 2. ++ ++   GROUP_1		GROUP_2 ++   _1 = *a		_5 = *(a + 4) ++   _2 = *(a + 1)	_6 = *(a + 5) ++   _3 = *(a + 2)	_7 = *(a + 6) ++   _4 = *(a + 3)	_8 = *(a + 7) ++ ++   GROUP_3		GROUP_4 ++   _11 = *b		_15 = *(b + 4) ++   _12 = *(b + 1)	_16 = *(b + 5) ++   _13 = *(b + 2)	_17 = *(b + 6) ++   _14 = *(b + 3)	_18 = *(b + 7) ++ ++   We first analyze group_1, and find all operations are isomorphic, then ++   replace stmts in group_1 with their use_stmts.  Group_2 as well. ++ ++   GROUP_1		GROUP_2 ++   _111 = _1 + _11	_115 = _5 - _15 ++   _112 = _2 + _12	_116 = _6 - _16 ++   _113 = _3 + _13	_117 = _7 - _17 ++   _114 = _4 + _14	_118 = _8 - _18 ++ ++   When analyzing group_3 and group_4, we find their use_stmts are the same ++   as group_1 and group_2.  So group_3 is regarded as being merged to group_1 ++   and group_4 being merged to group_2.  In future procedures, we will skip ++   group_3 and group_4. ++ ++   We repeat such processing until opreations are not isomorphic or searching ++   reaches MEM stmts.  In our given case, searching end up at a0, a1, a2 and ++   a3.  */ ++ ++static bool ++find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts) ++{ ++  unsigned vf = get_max_vf (vinfo); ++  if (vf == 0) ++    return false; ++  auto_vec<vec<group_info> *> circular_queue; ++  /* Map of vector factor and corresponding vectorizing profit.  */ ++  stmts_profit profit_map; ++  /* Map of cut_points and vector factor.  */ ++  vf_stmts_profit_map candi_stmts; ++  bool reach_vdef = false; ++  while (vf > 2) ++    { ++      if (build_queue (vinfo, vf, circular_queue) == 0) ++	return false; ++      if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef)) ++	{ ++	  if (reach_vdef) ++	    { ++	      release_tmp_stmts (candi_stmts); ++	      free_ginfos (circular_queue); ++	      circular_queue.release (); ++	      return false; ++	    } ++	  vf /= 2; ++	  free_ginfos (circular_queue); ++	  circular_queue.release (); ++	  continue; ++	} ++      candi_stmts[vf] = profit_map; ++      free_ginfos (circular_queue); ++      vf /= 2; ++      circular_queue.release (); ++    } ++  return decide_stmts_by_profit (candi_stmts, stmts); ++} ++ + /* Distributes the code from LOOP in such a way that producer statements +    are placed before consumer statements.  Tries to separate only the +    statements from STMTS into separate loops.  Returns the number of +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 7990e31de..1e332d3c5 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -2516,9 +2516,11 @@ vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts) +  +    Apply a set of analyses on LOOP, and create a loop_vec_info struct +    for it.  The different analyses will record information in the +-   loop_vec_info struct.  */ ++   loop_vec_info struct.  When RESULT_ONLY_P is true, quit analysis ++   if loop is vectorizable, otherwise, do not delete vinfo.*/ + opt_loop_vec_info +-vect_analyze_loop (class loop *loop, vec_info_shared *shared) ++vect_analyze_loop (class loop *loop, vec_info_shared *shared, ++		   bool result_only_p) + { +   auto_vector_modes vector_modes; +  +@@ -2545,6 +2547,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) +   unsigned n_stmts = 0; +   machine_mode autodetected_vector_mode = VOIDmode; +   opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); ++  /* Loop_vinfo for loop-distribution pass.  */ ++  opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL); +   machine_mode next_vector_mode = VOIDmode; +   poly_uint64 lowest_th = 0; +   unsigned vectorized_loops = 0; +@@ -2633,6 +2637,13 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) +       if (res) + 	{ + 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; ++	  /* In loop-distribution pass, we only need to get loop_vinfo, do not ++	     conduct further operations.  */ ++	  if (result_only_p) ++	    { ++	      loop->aux = (loop_vec_info) loop_vinfo; ++	      return loop_vinfo; ++	    } + 	  vectorized_loops++; +  + 	  /* Once we hit the desired simdlen for the first time, +@@ -2724,7 +2735,19 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + 	} +       else + 	{ +-	  delete loop_vinfo; ++	  /* If current analysis shows LOOP is unable to vectorize, loop_vinfo ++	     will be deleted.  If LOOP is under ldist analysis, backup it before ++	     it is deleted and return it if all modes are analyzed and still ++	     fail to vectorize.  */ ++	  if (result_only_p && (mode_i == vector_modes.length () ++	      || autodetected_vector_mode == VOIDmode)) ++	    { ++	      fail_loop_vinfo = loop_vinfo; ++	    } ++	  else ++	    { ++	      delete loop_vinfo; ++	    } + 	  if (fatal) + 	    { + 	      gcc_checking_assert (first_loop_vinfo == NULL); +@@ -2773,6 +2796,14 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) +       return first_loop_vinfo; +     } +  ++  /* Return loop_vinfo for ldist if loop is unvectorizable.  */ ++  if (result_only_p && (mode_i == vector_modes.length () ++      || autodetected_vector_mode == VOIDmode)) ++    { ++      loop->aux = (loop_vec_info) fail_loop_vinfo; ++      return fail_loop_vinfo; ++    } ++ +   return opt_loop_vec_info::propagate_failure (res); + } +  +diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +index 1c4a6c421..dc8175f00 100644 +--- a/gcc/tree-vectorizer.h ++++ b/gcc/tree-vectorizer.h +@@ -1896,7 +1896,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, + 				  enum tree_code); + extern bool needs_fold_left_reduction_p (tree, tree_code); + /* Drive for loop analysis stage.  */ +-extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); ++extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *, ++					    bool result_only_p = false); + extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); + extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, + 					 tree *, bool); +--  +2.27.0.windows.1 +  | 
