summaryrefslogtreecommitdiff
path: root/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
committerCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
commit73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree8e28b63e478c43c252f18b49836dff7313affe54 /0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
parent49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch')
-rw-r--r--0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch75
1 files changed, 75 insertions, 0 deletions
diff --git a/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch b/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
new file mode 100644
index 0000000..16a75f8
--- /dev/null
+++ b/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
@@ -0,0 +1,75 @@
+From 90518c07dfb770b680fd8bdba76dd1b39103277d Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Fri, 10 Nov 2023 12:39:11 +0100
+Subject: [PATCH 151/157] [Backport][SME] tree-optimization/110221 - SLP and
+ loop mask/len
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169
+
+The following fixes the issue that when SLP stmts are internal defs
+but appear invariant because they end up only using invariant defs
+then they get scheduled outside of the loop. This nice optimization
+breaks down when loop masks or lens are applied since those are not
+explicitly tracked as dependences. The following makes sure to never
+schedule internal defs outside of the vectorized loop when the
+loop uses masks/lens.
+
+ PR tree-optimization/110221
+ * tree-vect-slp.cc (vect_schedule_slp_node): When loop
+ masking / len is applied make sure to not schedule
+ intenal defs outside of the loop.
+
+ * gfortran.dg/pr110221.f: New testcase.
+---
+ gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++
+ gcc/tree-vect-slp.cc | 10 ++++++++++
+ 2 files changed, 27 insertions(+)
+ create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f
+
+diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f
+new file mode 100644
+index 000000000..8b5738431
+--- /dev/null
++++ b/gcc/testsuite/gfortran.dg/pr110221.f
+@@ -0,0 +1,17 @@
++C PR middle-end/68146
++C { dg-do compile }
++C { dg-options "-O2 -w" }
++C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } }
++ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY)
++ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y)
++ IMPLICIT COMPLEX*16 (C,Z)
++ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*)
++ N=INT(V)
++ CALL GAMMA2(VG,GA)
++ DO 65 K=1,N
++ CBY(K)=CYY
++65 CONTINUE
++ CDJ(0)=V0/Z*CBJ(0)-CBJ(1)
++ DO 70 K=1,N
++70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1)
++ END
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index d02f0ce37..e3e246977 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -8531,6 +8531,16 @@ vect_schedule_slp_node (vec_info *vinfo,
+ /* Emit other stmts after the children vectorized defs which is
+ earliest possible. */
+ gimple *last_stmt = NULL;
++ if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
++ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
++ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
++ {
++ /* But avoid scheduling internal defs outside of the loop when
++ we might have only implicitly tracked loop mask/len defs. */
++ gimple_stmt_iterator si
++ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
++ last_stmt = *si;
++ }
+ bool seen_vector_def = false;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+--
+2.33.0
+