summaryrefslogtreecommitdiff
path: root/0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
diff options
context:
space:
mode:
Diffstat (limited to '0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch')
-rw-r--r--0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch111
1 files changed, 111 insertions, 0 deletions
diff --git a/0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch b/0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
new file mode 100644
index 0000000..2918d3e
--- /dev/null
+++ b/0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
@@ -0,0 +1,111 @@
+From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Tue, 20 Jun 2023 15:41:00 +0800
+Subject: [PATCH 05/28] Refine maskloadmn pattern with UNSPEC_MASKLOAD.
+
+If mem_addr points to a memory region with less than whole vector size
+bytes of accessible memory and k is a mask that would prevent reading
+the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
+it to be transformed to vpblendd.
+
+gcc/ChangeLog:
+
+ PR target/110309
+ * config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
+ Refine pattern with UNSPEC_MASKLOAD.
+ (maskload<mode><avx512fmaskmodelower>): Ditto.
+ (*<avx512>_load<mode>_mask): Extend mode iterator to
+ VI12HF_AVX512VL.
+ (*<avx512>_load<mode>): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/i386/pr110309.c: New test.
+---
+ gcc/config/i386/sse.md | 32 +++++++++++++-----------
+ gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
+ 2 files changed, 28 insertions(+), 14 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c
+
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index eb767e56c..b30e96cb1 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -1411,12 +1411,12 @@
+ })
+
+ (define_insn "*<avx512>_load<mode>_mask"
+- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+- (vec_merge:VI12_AVX512VL
+- (unspec:VI12_AVX512VL
+- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
++ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
++ (vec_merge:VI12HF_AVX512VL
++ (unspec:VI12HF_AVX512VL
++ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
++ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+ "TARGET_AVX512BW"
+ "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+@@ -1425,9 +1425,9 @@
+ (set_attr "mode" "<sseinsnmode>")])
+
+ (define_insn_and_split "*<avx512>_load<mode>"
+- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+- (unspec:VI12_AVX512VL
+- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
++ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
++ (unspec:VI12HF_AVX512VL
++ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD))]
+ "TARGET_AVX512BW"
+ "#"
+@@ -25973,17 +25973,21 @@
+ "TARGET_AVX")
+
+ (define_expand "maskload<mode><avx512fmaskmodelower>"
+- [(set (match_operand:V48H_AVX512VL 0 "register_operand")
+- (vec_merge:V48H_AVX512VL
+- (match_operand:V48H_AVX512VL 1 "memory_operand")
++ [(set (match_operand:V48_AVX512VL 0 "register_operand")
++ (vec_merge:V48_AVX512VL
++ (unspec:V48_AVX512VL
++ [(match_operand:V48_AVX512VL 1 "memory_operand")]
++ UNSPEC_MASKLOAD)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ "TARGET_AVX512F")
+
+ (define_expand "maskload<mode><avx512fmaskmodelower>"
+- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
+- (vec_merge:VI12_AVX512VL
+- (match_operand:VI12_AVX512VL 1 "memory_operand")
++ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
++ (vec_merge:VI12HF_AVX512VL
++ (unspec:VI12HF_AVX512VL
++ [(match_operand:VI12HF_AVX512VL 1 "memory_operand")]
++ UNSPEC_MASKLOAD)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ "TARGET_AVX512BW")
+diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
+new file mode 100644
+index 000000000..f6e9e9c3c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr110309.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
++/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
++
++
++void foo (int * __restrict a, int *b)
++{
++ for (int i = 0; i < 6; ++i)
++ a[i] = b[i] + 42;
++}
+--
+2.31.1
+