summaryrefslogtreecommitdiff
path: root/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-10-17 02:15:03 +0000
committerCoprDistGit <infra@openeuler.org>2023-10-17 02:15:03 +0000
commitd82826d1a1c7ea45a761dfbf76b879712c7332ec (patch)
tree973a28470803b27c914f813f43d43f8932763ea3 /0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
parentb868000cf68cec0c9cd45fbf89a83173dea7c5eb (diff)
automatic import of gccopeneuler22.03_LTS
Diffstat (limited to '0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch')
-rw-r--r--0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch267
1 files changed, 267 insertions, 0 deletions
diff --git a/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch b/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
new file mode 100644
index 0000000..c004eac
--- /dev/null
+++ b/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
@@ -0,0 +1,267 @@
+From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
+From: benniaobufeijiushiji <linda7@huawei.com>
+Date: Mon, 8 Aug 2022 09:13:53 +0800
+Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
+ loops For vectorized stores in loop, if all succeed loops immediately use the
+ data, transfer data using registers instead of load store to prevent overhead
+ from memory access.
+
+---
+ gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
+ gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
+ 2 files changed, 226 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
+
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
+new file mode 100644
+index 000000000..d8b29fbd5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
+@@ -0,0 +1,45 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++static unsigned inline abs2 (unsigned a)
++{
++ unsigned s = ((a>>15)&0x10001)*0xffff;
++ return (a+s)^s;
++}
++
++int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
++{
++ unsigned tmp[4][4];
++ unsigned a0, a1, a2, a3;
++ int sum = 0;
++ for (int i = 0; i < 4; i++)
++ {
++ int t0 = a00[i] + a11[i];
++ int t1 = a00[i] - a11[i];
++ int t2 = a22[i] + a33[i];
++ int t3 = a22[i] - a33[i];
++ tmp[i][0] = t0 + t2;
++ tmp[i][2] = t0 - t2;
++ tmp[i][1] = t1 + t3;
++ tmp[i][3] = t1 - t3;
++ }
++ for (int i = 0; i < 4; i++)
++ {
++ int t0 = tmp[0][i] + tmp[1][i];
++ int t1 = tmp[0][i] - tmp[1][i];
++ int t2 = tmp[2][i] + tmp[3][i];
++ int t3 = tmp[2][i] - tmp[3][i];
++ a0 = t0 + t2;
++ a2 = t0 - t2;
++ a1 = t1 + t3;
++ a3 = t1 - t3;
++ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
++ }
++ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
+diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+index 2c2197022..98b233718 100644
+--- a/gcc/tree-vect-stmts.c
++++ b/gcc/tree-vect-stmts.c
+@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
+ return NULL_TREE;
+ }
+
++/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
++ BB in DFS. */
++
++static unsigned
++mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
++{
++ unsigned num = 0;
++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
++ !gsi_end_p (gsi); gsi_next (&gsi))
++ {
++ gimple *stmt = gsi_stmt (gsi);
++ if (is_gimple_debug (stmt))
++ continue;
++ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
++ && !gimple_has_volatile_ops (stmt))
++ {
++ if (gimple_assign_rhs_code (stmt) == MEM_REF
++ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
++ {
++ stmts.safe_push (stmt);
++ num++;
++ }
++ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
++ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
++ num++;
++ }
++ }
++ return num;
++}
++
++static bool
++check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
++{
++ for (unsigned ui = 0; ui < datarefs->length (); ui++)
++ {
++ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
++ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
++ if (TREE_CODE (op1) != TREE_CODE (op2))
++ continue;
++ if (TREE_CODE (op1) == ADDR_EXPR)
++ {
++ op1 = TREE_OPERAND (op1, 0);
++ op2 = TREE_OPERAND (op2, 0);
++ }
++ enum tree_code code = TREE_CODE (op1);
++ switch (code)
++ {
++ case VAR_DECL:
++ if (DECL_NAME (op1) == DECL_NAME (op2)
++ && DR_IS_READ ((*datarefs)[ui]))
++ return true;
++ break;
++ case SSA_NAME:
++ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
++ && DR_IS_READ ((*datarefs)[ui]))
++ return true;
++ break;
++ default:
++ break;
++ }
++ }
++ return false;
++}
++
++/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
++ Otherwise, set false to SUCCESS. */
++
++static void
++check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
++ stmt_vec_info stmt_info, bool &success)
++{
++ if (stmt_info == NULL)
++ {
++ success = false;
++ return;
++ }
++ if (DR_IS_READ (stmt_info->dr_aux.dr))
++ {
++ success = false;
++ return;
++ }
++ unsigned ui = 0;
++ gimple *candidate = NULL;
++ FOR_EACH_VEC_ELT (stmts, ui, candidate)
++ {
++ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
++ continue;
++
++ if (candidate->bb != candidate->bb->loop_father->header)
++ {
++ success = false;
++ return;
++ }
++ auto_vec<data_reference_p> datarefs;
++ tree res = find_data_references_in_bb (candidate->bb->loop_father,
++ candidate->bb, &datarefs);
++ if (res == chrec_dont_know)
++ {
++ success = false;
++ return;
++ }
++ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
++ return;
++ }
++ success = false;
++}
++
++/* Deep first search from present BB. If succeedor has load STMTS,
++ stop further searching. */
++
++static void
++dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
++ bool &success, vec<basic_block> &visited_bbs)
++{
++ if (bb == cfun->cfg->x_exit_block_ptr)
++ {
++ success = false;
++ return;
++ }
++ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
++ return;
++
++ visited_bbs.safe_push (bb);
++ auto_vec<gimple *> stmts;
++ unsigned num = mem_refs_in_bb (bb, stmts);
++ /* Empty BB. */
++ if (num == 0)
++ {
++ edge e;
++ edge_iterator ei;
++ FOR_EACH_EDGE (e, ei, bb->succs)
++ {
++ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
++ if (!success)
++ return;
++ }
++ return;
++ }
++ /* Non-empty BB. */
++ check_vec_use (loop_vinfo, stmts, stmt_info, success);
++}
++
++/* For grouped store, if all succeedors of present BB have vectorized load
++ from same base of store. If so, set memory_access_type using
++ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
++
++static bool
++conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
++{
++ gimple *stmt = stmt_vinfo->stmt;
++ if (gimple_code (stmt) != GIMPLE_ASSIGN)
++ return false;
++
++ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
++ return false;
++
++ basic_block bb = stmt->bb;
++ bool success = true;
++ auto_vec<basic_block> visited_bbs;
++ visited_bbs.safe_push (bb);
++ edge e;
++ edge_iterator ei;
++ FOR_EACH_EDGE (e, ei, bb->succs)
++ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
++ return success;
++}
++
+ /* A subroutine of get_load_store_type, with a subset of the same
+ arguments. Handle the case where STMT_INFO is part of a grouped load
+ or store.
+@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ overrun_p = would_overrun_p;
+ }
++
++ if (*memory_access_type == VMAT_LOAD_STORE_LANES
++ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
++ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
++ loop_vinfo->vectorization_factor)
++ && conti_perm (stmt_info, loop_vinfo)
++ && (vls_type == VLS_LOAD
++ ? vect_grouped_load_supported (vectype, single_element_p,
++ group_size)
++ : vect_grouped_store_supported (vectype, group_size)))
++ {
++ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
++ overrun_p = would_overrun_p;
++ }
+ }
+
+ /* As a last resort, trying using a gather load or scatter store.
+--
+2.27.0.windows.1
+