diff options
Diffstat (limited to '0287-Add-dynamic-memory-access-checks.patch')
-rw-r--r-- | 0287-Add-dynamic-memory-access-checks.patch | 774 |
1 files changed, 774 insertions, 0 deletions
diff --git a/0287-Add-dynamic-memory-access-checks.patch b/0287-Add-dynamic-memory-access-checks.patch new file mode 100644 index 0000000..e23d8f6 --- /dev/null +++ b/0287-Add-dynamic-memory-access-checks.patch @@ -0,0 +1,774 @@ +From 08fb60d0a0707af4004b20358f4a921e4ae6cca6 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Thu, 22 Aug 2024 15:23:36 +0800 +Subject: [PATCH 156/157] Add dynamic memory access checks + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 622 +++++++++++++++++++++++++++++++++++++------- + gcc/params.opt | 4 + + 2 files changed, 525 insertions(+), 101 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index 94290ea9c..b000d4d75 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -368,6 +368,7 @@ typedef std::map<memref_t *, tree> memref_tree_map; + typedef std::set<gimple *> stmt_set; + typedef std::set<tree> tree_set; + typedef std::map<tree, tree> tree_map; ++typedef std::map<tree, poly_offset_int> tree_poly_offset_map; + + tree_memref_map *tm_map; + funct_mrs_map *fmrs_map; +@@ -710,6 +711,20 @@ get_mem_ref_address_ssa_name (tree mem, tree base) + return NULL_TREE; + } + ++static void ++dump_base_addr (tree base_addr) ++{ ++ if (base_addr) ++ { ++ fprintf (dump_file, "Base addr (%s): ", ++ get_tree_code_name (TREE_CODE (base_addr))); ++ print_generic_expr (dump_file, base_addr); ++ } ++ else ++ fprintf (dump_file, "Base addr (%s): ", "null"); ++ fprintf (dump_file, "\n"); ++} ++ + static void + analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) + { +@@ -736,14 +751,7 @@ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) + { + tree base_addr = get_mem_ref_address_ssa_name (mem, base); + if (dump_file) +- { +- fprintf (dump_file, "Base addr (%s): ", +- base_addr ? get_tree_code_name (TREE_CODE (base_addr)) +- : "null"); +- if (base_addr) +- print_generic_expr (dump_file, base_addr); +- fprintf (dump_file, "\n"); +- } ++ dump_base_addr (base_addr); + if (base_addr) + { + mr->base = analyse_addr_eval (base_addr, mr); +@@ -1187,7 +1195,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec) + } + + static void +-find_nearest_common_dominator (memref_t *mr, basic_block &dom) ++find_nearest_common_post_dominator (memref_t *mr, basic_block &dom) + { + for (unsigned int i = 0; i < mr->stmts.length (); i++) + { +@@ -1196,7 +1204,7 @@ find_nearest_common_dominator (memref_t *mr, basic_block &dom) + if (dom == bb) + continue; + if (dom) +- dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb); ++ dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, bb); + else + dom = bb; + } +@@ -1495,10 +1503,13 @@ gimple_copy_and_remap (gimple *stmt) + + static gimple * + gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, +- int last_idx, stmt_set &processed) ++ int first_idx, int last_idx, ++ stmt_set &processed) + { + gimple *last_stmt = NULL; +- for (int i = mr->stmts.length () - 1; i >= last_idx ; i--) ++ if (first_idx == 0) ++ first_idx = mr->stmts.length () - 1; ++ for (int i = first_idx; i >= last_idx; i--) + { + if (processed.count (mr->stmts[i])) + continue; +@@ -1515,6 +1526,436 @@ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, + return last_stmt; + } + ++/* Check if prefetch insertion may be always unsafe in this case. For now ++ reject cases with access to arrays with no domain or with no elements. */ ++ ++static bool ++check_prefetch_safety (vec<memref_t *> &mrs, memref_t *cmr) ++{ ++ for (unsigned int i = 0; i < mrs.length (); i++) ++ { ++ memref_t *mr = mrs[i]; ++ if (mr == cmr || mr->used_mrs.empty ()) ++ continue; ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts[0], &is_store); ++ if (mem == NULL || TREE_CODE (*mem) != ARRAY_REF) ++ continue; ++ tree array = TREE_OPERAND (*mem, 0); ++ tree atype = TREE_TYPE (array); ++ gcc_assert (atype); ++ tree domain = TYPE_DOMAIN (atype); ++ if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Unsupported array type: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++ } ++ unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain)); ++ unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain)); ++ if (min_val == 0 && max_val == 0) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Unsupported array type's bounds: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++ } ++ } ++ return true; ++} ++ ++/* Collect base addresses which we need to check. */ ++ ++static void ++collect_base_addresses (vec<memref_t *> &used_mr_vec, HOST_WIDE_INT dist_val, ++ memref_t *comp_mr, tree_poly_offset_map &offset_map) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Collect base addresses which we need to check.\n"); ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ { ++ memref_t *mr = used_mr_vec[i]; ++ if (mr == comp_mr || mr->used_mrs.empty ()) ++ continue; ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts[0], &is_store); ++ if (mem == NULL || TREE_CODE (*mem) != MEM_REF) ++ continue; ++ tree base = get_base_address (*mem); ++ tree base_addr = get_mem_ref_address_ssa_name (*mem, base); ++ if (!base_addr) ++ continue; ++ if (dump_file) ++ { ++ dump_base_addr (base_addr); ++ if (base) ++ { ++ fprintf (dump_file, "Base:"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ if (!TREE_OPERAND (base, 1)) ++ continue; ++ poly_offset_int curr_offset = mem_ref_offset (base); ++ poly_offset_int saved_offset = 0; ++ if (offset_map.count (base_addr)) ++ { ++ saved_offset = offset_map[base_addr]; ++ if ((dist_val > 0 && known_gt (curr_offset, saved_offset)) ++ || (dist_val < 0 && known_lt (curr_offset, saved_offset))) ++ offset_map[base_addr] = curr_offset; ++ else if (dump_file) ++ fprintf (dump_file, "Off: step=%ld gt=%d lt=%d\n", dist_val, ++ known_gt (curr_offset, saved_offset), ++ known_lt (curr_offset, saved_offset)); ++ } ++ else ++ offset_map[base_addr] = curr_offset; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Final list of base addresses:\n"); ++ for (tree_poly_offset_map::iterator it1 = offset_map.begin (); ++ it1 != offset_map.end (); ++it1) ++ { ++ tree base_addr = it1->first; ++ poly_offset_int off = it1->second; ++ fprintf (dump_file, "Base:"); ++ print_generic_expr (dump_file, base_addr); ++ HOST_WIDE_INT val = estimated_poly_value (off.force_shwi (), ++ POLY_VALUE_LIKELY); ++ fprintf (dump_file, "\nOff: %ld\n", val); ++ } ++ fprintf (dump_file, "Finish collecting base addresses.\n"); ++ } ++} ++ ++/* Return true if we need page check to access memory at this address. */ ++ ++static bool ++need_page_check (tree base_addr, tree_set &checked_base_addrs) ++{ ++ if (dump_file) ++ dump_base_addr (base_addr); ++ if (base_addr == NULL) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Base address not found\n"); ++ return false; ++ } ++ if (checked_base_addrs.count (base_addr)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Base address is already checked\n"); ++ return false; ++ } ++ return true; ++} ++ ++/* Insert instructions to check the original address and newly evaluated ++ adress for prefetch correspond the same page. */ ++ ++static gimple * ++insert_page_check (tree addr, tree_poly_offset_map &offset_map, ++ gimple_seq &stmts) ++{ ++ poly_offset_int offset = 0; ++ if (offset_map.count (addr)) ++ offset = offset_map[addr]; ++ tree addr_type = TREE_TYPE (addr); ++ tree utype = unsigned_type_for (addr_type); ++ tree new_addr = build_int_cst (addr_type, 0); ++ if (decl_map->count (addr)) ++ new_addr = (*decl_map)[addr]; ++ tree t1 = make_ssa_name (utype); ++ tree t2 = make_ssa_name (utype); ++ unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1); ++ tree pmask_cst = build_int_cst (utype, pmask); ++ tree off_tree = wide_int_to_tree (sizetype, offset); ++ gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE); ++ tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR, ++ addr_type, addr, off_tree); ++ tree conv_addr = make_ssa_name (utype); ++ tree conv_new_addr = make_ssa_name (utype); ++ gimple *conv1 = gimple_build_assign (conv_addr, ++ fold_convert (utype, addr_with_offset)); ++ gimple *conv2 = gimple_build_assign (conv_new_addr, ++ fold_convert (utype, new_addr)); ++ gimple *paddr = gimple_build_assign (t1, BIT_AND_EXPR, ++ conv_addr, pmask_cst); ++ gimple *new_paddr = gimple_build_assign (t2, BIT_AND_EXPR, ++ conv_new_addr, pmask_cst); ++ gcond *cond = gimple_build_cond (EQ_EXPR, t1, t2, NULL, NULL); ++ gimple_seq_add_stmt (&stmts, conv1); ++ gimple_seq_add_stmt (&stmts, paddr); ++ gimple_seq_add_stmt (&stmts, conv2); ++ gimple_seq_add_stmt (&stmts, new_paddr); ++ gimple_seq_add_stmt (&stmts, cond); ++ return cond; ++} ++ ++/* Check if this array access needs dynamic address verification. Support only ++ arrays with 1-d indexing. */ ++ ++static bool ++need_array_index_check (tree mem) ++{ ++ /* Check pattern: t1 = (type) t0; ld/st array[t1]. If any index of type (t0) ++ does not go beyond the bounds of the array, we don't need the check. */ ++ tree array = TREE_OPERAND (mem, 0); ++ tree atype = TREE_TYPE (array); ++ tree index = TREE_OPERAND (mem, 1); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Array ind: "); ++ print_generic_expr (dump_file, index); ++ fprintf (dump_file, "\nMem: "); ++ print_generic_expr (dump_file, array); ++ fprintf (dump_file, "\nInd type: "); ++ print_generic_expr (dump_file, TREE_TYPE (index)); ++ fprintf (dump_file, "\nMem type: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ tree domain = TYPE_DOMAIN (atype); ++ if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Unsupported array type domain.\n"); ++ return true; ++ } ++ unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain)); ++ unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain)); ++ if (dump_file) ++ fprintf (dump_file, "Array bounds (%ld, %ld)\n", min_val, max_val); ++ if (TREE_CODE (index) != SSA_NAME) ++ return true; ++ ++ gimple *stmt = SSA_NAME_DEF_STMT (index); ++ if (!is_gimple_assign (stmt)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Is not assign, stop analysis: "); ++ print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); ++ } ++ return true; ++ } ++ tree *lhs = gimple_assign_lhs_ptr (stmt); ++ tree *rhs = gimple_assign_rhs1_ptr (stmt); ++ tree lhs_type = TREE_TYPE (*lhs); ++ tree rhs_type = TREE_TYPE (*rhs); ++ tree ind_type = (TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)) ++ ? lhs_type : rhs_type; ++ if (!ind_type || !tree_fits_uhwi_p (TYPE_MIN_VALUE (ind_type)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (ind_type))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Unsupported index type.\n"); ++ return true; ++ } ++ int prec = tree_to_uhwi (TYPE_SIZE (ind_type)); ++ unsigned HOST_WIDE_INT t_max_val = tree_to_uhwi (TYPE_MAX_VALUE (ind_type)); ++ unsigned HOST_WIDE_INT t_min_val = tree_to_uhwi (TYPE_MIN_VALUE (ind_type)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Index type (%d, %ld, %ld): ", prec, ++ t_min_val, t_max_val); ++ print_generic_expr (dump_file, ind_type); ++ fprintf (dump_file, "\n"); ++ } ++ return !((t_max_val <= max_val) && (t_min_val >= min_val)); ++} ++ ++/* Insert instructions to check the new index is within the array bounds. */ ++ ++static gimple * ++insert_index_check (tree mem, gimple_seq &stmts) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Insert array index check\n"); ++ tree atype = TREE_TYPE (TREE_OPERAND (mem, 0)); ++ tree ind = TREE_OPERAND (mem, 1); ++ if (decl_map->count (ind)) ++ ind = (*decl_map)[ind]; ++ tree domain = TYPE_DOMAIN (atype); ++ gcc_assert (domain && tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ && tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))); ++ ++ tree ind_min_val = TYPE_MIN_VALUE (domain); ++ tree ind_max_val = TYPE_MAX_VALUE (domain); ++ tree t1 = make_ssa_name (boolean_type_node); ++ tree t2 = make_ssa_name (boolean_type_node); ++ tree t3 = make_ssa_name (boolean_type_node); ++ t1 = fold_build2 (LE_EXPR, boolean_type_node, ind, ind_max_val); ++ t2 = fold_build2 (GE_EXPR, boolean_type_node, ind, ind_min_val); ++ t3 = fold_build2 (TRUTH_ANDIF_EXPR, boolean_type_node, t1, t2); ++ gcond *cond = gimple_build_cond (EQ_EXPR, t3, boolean_true_node, NULL, NULL); ++ gimple_seq_add_stmt (&stmts, cond); ++ return cond; ++} ++ ++/* Insert safety checks for memory access stmts newly created to evaluate ++ prefetch addresses. */ ++ ++static void ++process_used_mr (memref_t *mr, tree_poly_offset_map &offset_map, ++ tree_set &checked_base_addrs, gimple_seq &stmts, ++ vec<gimple *> &bbends) ++{ ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts[0], &is_store); ++ if (mem == NULL) ++ return; ++ if (dump_file) ++ { ++ fprintf (dump_file, "MR (%d) maybe need to insert address check: ", ++ mr->mr_id); ++ print_generic_expr (dump_file, *mem); ++ fprintf (dump_file, "\n"); ++ } ++ gimple *bbend = NULL; ++ if (TREE_CODE (*mem) == MEM_REF) ++ { ++ tree base = get_base_address (*mem); ++ tree base_addr = get_mem_ref_address_ssa_name (*mem, base); ++ if (!need_page_check (base_addr, checked_base_addrs)) ++ return; ++ bbend = insert_page_check (base_addr, offset_map, stmts); ++ checked_base_addrs.insert (base_addr); ++ } ++ else if (TREE_CODE (*mem) == ARRAY_REF && need_array_index_check (*mem)) ++ bbend = insert_index_check (*mem, stmts); ++ if (bbend) ++ bbends.safe_push (bbend); ++} ++ ++/* Create new variables and insert new stmts to evaluate prefetch addresses. */ ++ ++static void ++create_stmts_for_used_mrs (vec<memref_t *> &used_mr_vec, vec<gimple *> &bbends, ++ gimple_seq &stmts, stmt_set &processed_stmts, ++ HOST_WIDE_INT dist_val, memref_t *comp_mr) ++{ ++ tree_poly_offset_map offset_map; ++ collect_base_addresses (used_mr_vec, dist_val, comp_mr, offset_map); ++ ++ /* Insert stmts to evaluate prefetch addresses. */ ++ tree_set checked_base_addrs; ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ { ++ memref_t *mr = used_mr_vec[i]; ++ if (mr == comp_mr) ++ continue; ++ gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 1, ++ processed_stmts); ++ if (last_stmt && dump_file) ++ { ++ fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id); ++ print_generic_expr (dump_file, gimple_assign_lhs (last_stmt)); ++ fprintf (dump_file, "\n"); ++ } ++ if (!mr->used_mrs.empty ()) ++ process_used_mr (mr, offset_map, checked_base_addrs, stmts, bbends); ++ last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 0, ++ processed_stmts); ++ } ++} ++ ++/* Insert prefetch instructions. */ ++ ++static void ++insert_prefetch_stmts (vec<gimple *> &pcalls, gimple_seq &stmts, ++ gimple *&last_pref, vec<memref_t *> &vmrs, ++ stmt_set &processed_stmts) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Evaluate addresses and insert prefetch insns.\n"); ++ ++ tree local; ++ switch (param_ipa_prefetch_locality) ++ { ++ case 0: ++ local = integer_zero_node; ++ break; ++ case 1: ++ local = integer_one_node; ++ break; ++ case 2: ++ local = build_int_cst (integer_type_node, 2); ++ break; ++ default: ++ case 3: ++ local = integer_three_node; ++ break; ++ } ++ tree_set prefetched_addrs; ++ for (unsigned int i = 0; i < vmrs.length (); i++) ++ { ++ memref_t *mr = vmrs[i]; ++ /* Don't need to copy the last stmt, since we insert prefetch insn ++ instead of it. */ ++ gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 1, processed_stmts); ++ gimple *last_stmt = mr->stmts[0]; ++ gcc_assert (last_stmt); ++ ++ tree old_addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE); ++ tree new_addr = old_addr; ++ if (decl_map->count (old_addr)) ++ new_addr = (*decl_map)[old_addr]; ++ if (prefetched_addrs.count (new_addr)) ++ continue; ++ /* Insert prefetch intrinsic call. */ ++ tree write_p = mr->is_store ? integer_one_node : integer_zero_node; ++ last_pref = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), ++ 3, new_addr, write_p, local); ++ pcalls.safe_push (last_pref); ++ gimple_seq_add_stmt (&stmts, last_pref); ++ prefetched_addrs.insert (new_addr); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Insert %d prefetch stmt:\n", i); ++ print_gimple_stmt (dump_file, last_pref, 0); ++ } ++ } ++} ++ ++/* Split bbs after condition stmts and fix control flow graph. */ ++ ++static void ++correct_cfg (vec<gimple *> &bbends, gimple *last_pref, basic_block &dom_bb) ++{ ++ edge e_last = split_block (dom_bb, last_pref); ++ if (!bbends.length () || last_pref == NULL) ++ return; ++ for (int i = bbends.length () - 1; i >= 0; i--) ++ { ++ gimple *bbend = bbends[i]; ++ if (dump_file) ++ { ++ fprintf (dump_file, "Split dom_bb after condition stmts:\n"); ++ print_gimple_stmt (dump_file, bbend, 0); ++ } ++ basic_block last_bb = e_last->dest; ++ edge e = split_block (dom_bb, bbend); ++ e->flags &= ~EDGE_FALLTHRU; ++ e->flags |= EDGE_TRUE_VALUE; ++ edge e_false = make_edge (dom_bb, last_bb, EDGE_FALSE_VALUE); ++ e_false->probability = profile_probability::never (); ++ } ++} ++ + static void + create_cgraph_edge (cgraph_node *n, gimple *stmt) + { +@@ -1529,6 +1970,17 @@ create_cgraph_edge (cgraph_node *n, gimple *stmt) + ipa_call_summaries->get_create (e); + } + ++/* Modify cgraph inserting calls to prefetch intrinsics. */ ++ ++static void ++modify_ipa_info (cgraph_node *n, vec<gimple *> &pcalls) ++{ ++ for (unsigned i = 0; i < pcalls.length (); i++) ++ create_cgraph_edge (n, pcalls[i]); ++ ipa_update_overall_fn_summary (n); ++ renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl)); ++} ++ + /* Insert prefetch intrinsics in this function, return nonzero on success. */ + + static int +@@ -1607,6 +2059,18 @@ optimize_function (cgraph_node *n, function *fn) + return 0; + } + ++ vec<memref_t *> used_mr_vec = vNULL; ++ for (memref_set::const_iterator it = used_mrs.begin (); ++ it != used_mrs.end (); it++) ++ used_mr_vec.safe_push (*it); ++ used_mr_vec.qsort (memref_id_cmp); ++ if (!check_prefetch_safety (used_mr_vec, comp_mr)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Prefetching may be unsafe. Skip the case.\n"); ++ return 0; ++ } ++ + /* Filter out memrefs with the same memory references. + TODO: maybe do the same with used mrs. */ + vec<memref_t *> vmrs = vNULL; +@@ -1616,18 +2080,18 @@ optimize_function (cgraph_node *n, function *fn) + /* TODO: maybe it is useful to process also used_mrs. */ + basic_block dom_bb = NULL; + for (unsigned int i = 0; i < vmrs.length (); i++) +- find_nearest_common_dominator (vmrs[i], dom_bb); ++ find_nearest_common_post_dominator (vmrs[i], dom_bb); + + if (!dom_bb) + { + if (dump_file) +- fprintf (dump_file, "Dominator bb for MRs is not found. " ++ fprintf (dump_file, "Post dominator bb for MRs is not found. " + "Skip the case.\n"); + return 0; + } + else if (dump_file) + { +- fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index); ++ fprintf (dump_file, "Post dominator bb %d for MRs:\n", dom_bb->index); + gimple_dump_bb (dump_file, dom_bb, 0, dump_flags); + fprintf (dump_file, "\n"); + } +@@ -1636,19 +2100,33 @@ optimize_function (cgraph_node *n, function *fn) + gimple *last_used = NULL; + for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); + gsi_prev (&si)) +- if (comp_mr->stmts[0] == gsi_stmt (si)) +- { +- last_used = gsi_stmt (si); +- if (dump_file) ++ { ++ bool found = false; ++ for (unsigned int i = 0; i < vmrs.length (); i++) ++ /* TODO: take into account only those MRs that should be ++ checked memory. */ ++ if (vmrs[i]->stmts[0] == gsi_stmt (si)) + { +- fprintf (dump_file, "Last used stmt in dominator bb:\n"); +- print_gimple_stmt (dump_file, last_used, 0); ++ found = true; ++ break; + } +- break; +- } ++ if (found || comp_mr->stmts[0] == gsi_stmt (si)) ++ { ++ last_used = gsi_stmt (si); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Last used stmt in post dominator bb:\n"); ++ print_gimple_stmt (dump_file, last_used, 0); ++ } ++ break; ++ } ++ } + +- split_block (dom_bb, last_used); +- gimple_stmt_iterator gsi = gsi_last_bb (dom_bb); ++ gimple_stmt_iterator gsi; ++ if (last_used) ++ gsi = gsi_for_stmt (last_used); ++ else ++ gsi = gsi_last_bb (dom_bb); + + /* Create new inc var. Insert new_var = old_var + step * factor. */ + decl_map = new tree_map; +@@ -1660,7 +2138,7 @@ optimize_function (cgraph_node *n, function *fn) + stmt_set processed_stmts; + if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0]))) + { +- gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, ++ gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0, + processed_stmts); + inc_var = gimple_assign_lhs (tmp); + } +@@ -1683,86 +2161,26 @@ optimize_function (cgraph_node *n, function *fn) + fprintf (dump_file, "\n"); + } + +- /* Create other new vars. Insert new stmts. */ +- vec<memref_t *> used_mr_vec = vNULL; +- for (memref_set::const_iterator it = used_mrs.begin (); +- it != used_mrs.end (); it++) +- used_mr_vec.safe_push (*it); +- used_mr_vec.qsort (memref_id_cmp); +- +- for (unsigned int j = 0; j < used_mr_vec.length (); j++) +- { +- memref_t *mr = used_mr_vec[j]; +- if (mr == comp_mr) +- continue; +- gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, +- processed_stmts); +- gcc_assert (last_stmt); +- if (dump_file) +- { +- fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id); +- print_generic_expr (dump_file, gimple_assign_lhs (last_stmt)); +- fprintf (dump_file, "\n"); +- } +- } +- /* On new load check page fault. */ +- /* Insert prefetch instructions. */ +- if (dump_file) +- fprintf (dump_file, "Evaluate addresses and insert prefetch insn.\n"); ++ vec<gimple *> bbends = vNULL; ++ create_stmts_for_used_mrs (used_mr_vec, bbends, stmts, processed_stmts, ++ dist_val, comp_mr); + + vec<gimple *> pcalls = vNULL; +- tree local; +- switch (param_ipa_prefetch_locality) +- { +- case 0: +- local = integer_zero_node; +- break; +- case 1: +- local = integer_one_node; +- break; +- case 2: +- local = build_int_cst (integer_type_node, 2); +- break; +- default: +- case 3: +- local = integer_three_node; +- break; +- } +- tree_set prefetched_addrs; +- for (unsigned int j = 0; j < vmrs.length (); j++) +- { +- memref_t *mr = vmrs[j]; +- /* Don't need to copy the last stmt, since we insert prefetch insn +- instead of it. */ +- gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts); +- gimple *last_stmt = mr->stmts[0]; +- gcc_assert (last_stmt); +- tree write_p = mr->is_store ? integer_one_node : integer_zero_node; +- tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE); +- if (decl_map->count (addr)) +- addr = (*decl_map)[addr]; +- if (prefetched_addrs.count (addr)) +- continue; +- last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), +- 3, addr, write_p, local); +- pcalls.safe_push (last_stmt); +- gimple_seq_add_stmt (&stmts, last_stmt); +- prefetched_addrs.insert (addr); +- if (dump_file) +- { +- fprintf (dump_file, "Insert %d prefetch stmt:\n", j); +- print_gimple_stmt (dump_file, last_stmt, 0); +- } +- } +- ++ gimple *last_pref = NULL; ++ insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts); + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ correct_cfg (bbends, last_pref, dom_bb); ++ + delete decl_map; + +- /* Modify cgraph inserting calls to prefetch intrinsics. */ +- for (unsigned i = 0; i < pcalls.length (); i++) +- create_cgraph_edge (n, pcalls[i]); +- ipa_update_overall_fn_summary (n); +- renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl)); ++ modify_ipa_info (n, pcalls); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "After optimization:\n"); ++ dump_function_to_file (cfun->decl, dump_file, (dump_flags_t)0); ++ } + + return 1; + } +@@ -1781,8 +2199,10 @@ insert_prefetch () + fprintf (dump_file, "Optimize function %s\n", n->dump_name ()); + push_cfun (DECL_STRUCT_FUNCTION (n->decl)); + calculate_dominance_info (CDI_DOMINATORS); ++ calculate_dominance_info (CDI_POST_DOMINATORS); + res |= optimize_function (n, fn); + free_dominance_info (CDI_DOMINATORS); ++ free_dominance_info (CDI_POST_DOMINATORS); + pop_cfun (); + } + return res; +diff --git a/gcc/params.opt b/gcc/params.opt +index 747d0f829..fc700ab79 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -317,6 +317,10 @@ The factor represents the number of inductive variable incrementations to evalua + Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization + The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data. + ++-param=ipa-prefetch-pagesize= ++Common Joined UInteger Var(param_ipa_prefetch_pagesize) Init(4096) Param Optimization ++The flag represents current pagesize for runtime checks of memory access addresses. ++ + -param=ira-loop-reserved-regs= + Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization + The number of registers in each class kept unused by loop invariant motion. +-- +2.33.0 + |