diff options
author | CoprDistGit <infra@openeuler.org> | 2023-10-17 02:15:03 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-10-17 02:15:03 +0000 |
commit | d82826d1a1c7ea45a761dfbf76b879712c7332ec (patch) | |
tree | 973a28470803b27c914f813f43d43f8932763ea3 /0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch | |
parent | b868000cf68cec0c9cd45fbf89a83173dea7c5eb (diff) |
automatic import of gccopeneuler22.03_LTS
Diffstat (limited to '0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch')
-rw-r--r-- | 0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch b/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch new file mode 100644 index 0000000..c004eac --- /dev/null +++ b/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch @@ -0,0 +1,267 @@ +From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji <linda7@huawei.com> +Date: Mon, 8 Aug 2022 09:13:53 +0800 +Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between + loops For vectorized stores in loop, if all succeed loops immediately use the + data, transfer data using registers instead of load store to prevent overhead + from memory access. + +--- + gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++ + gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++ + 2 files changed, 226 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c +new file mode 100644 +index 000000000..d8b29fbd5 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++static unsigned inline abs2 (unsigned a)
++{
++ unsigned s = ((a>>15)&0x10001)*0xffff;
++ return (a+s)^s;
++}
++
++int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
++{
++ unsigned tmp[4][4];
++ unsigned a0, a1, a2, a3;
++ int sum = 0;
++ for (int i = 0; i < 4; i++)
++ {
++ int t0 = a00[i] + a11[i];
++ int t1 = a00[i] - a11[i];
++ int t2 = a22[i] + a33[i];
++ int t3 = a22[i] - a33[i];
++ tmp[i][0] = t0 + t2;
++ tmp[i][2] = t0 - t2;
++ tmp[i][1] = t1 + t3;
++ tmp[i][3] = t1 - t3;
++ }
++ for (int i = 0; i < 4; i++)
++ {
++ int t0 = tmp[0][i] + tmp[1][i];
++ int t1 = tmp[0][i] - tmp[1][i];
++ int t2 = tmp[2][i] + tmp[3][i];
++ int t3 = tmp[2][i] - tmp[3][i];
++ a0 = t0 + t2;
++ a2 = t0 - t2;
++ a1 = t1 + t3;
++ a3 = t1 - t3;
++ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
++ }
++ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
+diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 2c2197022..98b233718 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) + return NULL_TREE; + } + ++/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty ++ BB in DFS. */ ++ ++static unsigned ++mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts) ++{ ++ unsigned num = 0; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); ++ !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt)) ++ continue; ++ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt) ++ && !gimple_has_volatile_ops (stmt)) ++ { ++ if (gimple_assign_rhs_code (stmt) == MEM_REF ++ || gimple_assign_rhs_code (stmt) == ARRAY_REF) ++ { ++ stmts.safe_push (stmt); ++ num++; ++ } ++ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF ++ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF) ++ num++; ++ } ++ } ++ return num; ++} ++ ++static bool ++check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr) ++{ ++ for (unsigned ui = 0; ui < datarefs->length (); ui++) ++ { ++ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0); ++ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0); ++ if (TREE_CODE (op1) != TREE_CODE (op2)) ++ continue; ++ if (TREE_CODE (op1) == ADDR_EXPR) ++ { ++ op1 = TREE_OPERAND (op1, 0); ++ op2 = TREE_OPERAND (op2, 0); ++ } ++ enum tree_code code = TREE_CODE (op1); ++ switch (code) ++ { ++ case VAR_DECL: ++ if (DECL_NAME (op1) == DECL_NAME (op2) ++ && DR_IS_READ ((*datarefs)[ui])) ++ return true; ++ break; ++ case SSA_NAME: ++ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2) ++ && DR_IS_READ ((*datarefs)[ui])) ++ return true; ++ break; ++ default: ++ break; ++ } ++ } ++ return false; ++} ++ ++/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return, ++ Otherwise, set false to SUCCESS. */ ++ ++static void ++check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts, ++ stmt_vec_info stmt_info, bool &success) ++{ ++ if (stmt_info == NULL) ++ { ++ success = false; ++ return; ++ } ++ if (DR_IS_READ (stmt_info->dr_aux.dr)) ++ { ++ success = false; ++ return; ++ } ++ unsigned ui = 0; ++ gimple *candidate = NULL; ++ FOR_EACH_VEC_ELT (stmts, ui, candidate) ++ { ++ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE) ++ continue; ++ ++ if (candidate->bb != candidate->bb->loop_father->header) ++ { ++ success = false; ++ return; ++ } ++ auto_vec<data_reference_p> datarefs; ++ tree res = find_data_references_in_bb (candidate->bb->loop_father, ++ candidate->bb, &datarefs); ++ if (res == chrec_dont_know) ++ { ++ success = false; ++ return; ++ } ++ if (check_same_base (&datarefs, stmt_info->dr_aux.dr)) ++ return; ++ } ++ success = false; ++} ++ ++/* Deep first search from present BB. If succeedor has load STMTS, ++ stop further searching. */ ++ ++static void ++dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info, ++ bool &success, vec<basic_block> &visited_bbs) ++{ ++ if (bb == cfun->cfg->x_exit_block_ptr) ++ { ++ success = false; ++ return; ++ } ++ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch) ++ return; ++ ++ visited_bbs.safe_push (bb); ++ auto_vec<gimple *> stmts; ++ unsigned num = mem_refs_in_bb (bb, stmts); ++ /* Empty BB. */ ++ if (num == 0) ++ { ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs); ++ if (!success) ++ return; ++ } ++ return; ++ } ++ /* Non-empty BB. */ ++ check_vec_use (loop_vinfo, stmts, stmt_info, success); ++} ++ ++/* For grouped store, if all succeedors of present BB have vectorized load ++ from same base of store. If so, set memory_access_type using ++ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */ ++ ++static bool ++conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo) ++{ ++ gimple *stmt = stmt_vinfo->stmt; ++ if (gimple_code (stmt) != GIMPLE_ASSIGN) ++ return false; ++ ++ if (DR_IS_READ (stmt_vinfo->dr_aux.dr)) ++ return false; ++ ++ basic_block bb = stmt->bb; ++ bool success = true; ++ auto_vec<basic_block> visited_bbs; ++ visited_bbs.safe_push (bb); ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs); ++ return success; ++} ++ + /* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT_INFO is part of a grouped load + or store. +@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, + *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; + overrun_p = would_overrun_p; + } ++ ++ if (*memory_access_type == VMAT_LOAD_STORE_LANES ++ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST ++ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters), ++ loop_vinfo->vectorization_factor) ++ && conti_perm (stmt_info, loop_vinfo) ++ && (vls_type == VLS_LOAD ++ ? vect_grouped_load_supported (vectype, single_element_p, ++ group_size) ++ : vect_grouped_store_supported (vectype, group_size))) ++ { ++ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; ++ overrun_p = would_overrun_p; ++ } + } + + /* As a last resort, trying using a gather load or scatter store. +-- +2.27.0.windows.1 + |