diff options
Diffstat (limited to '0094-Backport-Fix-zero-masking-for-vcvtps2ph-when-dest-op.patch')
-rw-r--r-- | 0094-Backport-Fix-zero-masking-for-vcvtps2ph-when-dest-op.patch | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/0094-Backport-Fix-zero-masking-for-vcvtps2ph-when-dest-op.patch b/0094-Backport-Fix-zero-masking-for-vcvtps2ph-when-dest-op.patch new file mode 100644 index 0000000..10dc835 --- /dev/null +++ b/0094-Backport-Fix-zero-masking-for-vcvtps2ph-when-dest-op.patch @@ -0,0 +1,172 @@ +From 5b473317f6b1890238f1778d0fdebf8ed09292d9 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Fri, 29 May 2020 13:38:49 +0800 +Subject: [PATCH 3/5] [Backport] Fix zero-masking for vcvtps2ph when dest + operand is memory. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=43088bb4dadd3d14b6b594c5f9363fe879f3d7f7 + +When dest is memory, zero-masking is not valid, only merging-masking is available, + +2020-06-24 Hongtao Liu <hongtao.liu@inte.com> + +gcc/ChangeLog: + PR target/95254 + * config/i386/sse.md (*vcvtps2ph_store<merge_mask_name>): + Refine from *vcvtps2ph_store<mask_name>. + (vcvtps2ph256<mask_name>): Refine constraint from vm to v. + (<mask_codefor>avx512f_vcvtps2ph512<mask_name>): Ditto. + (*vcvtps2ph256<merge_mask_name>): New define_insn. + (*avx512f_vcvtps2ph512<merge_mask_name>): Ditto. + * config/i386/subst.md (merge_mask): New define_subst. + (merge_mask_name): New define_subst_attr. + (merge_mask_operand3): Ditto. + +gcc/testsuite/ChangeLog: + * gcc.target/i386/avx512f-vcvtps2ph-pr95254.c: New test. + * gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c: Ditto. +--- + gcc/config/i386/sse.md | 32 ++++++++++++++++--- + gcc/config/i386/subst.md | 12 +++++++ + .../i386/avx512f-vcvtps2ph-pr95254.c | 12 +++++++ + .../i386/avx512vl-vcvtps2ph-pr95254.c | 18 +++++++++++ + 4 files changed, 70 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c + create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index bf01e1d74..915b8e3d2 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -21354,19 +21354,19 @@ + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "V4SF")]) + +-(define_insn "*vcvtps2ph_store<mask_name>" ++(define_insn "*vcvtps2ph_store<merge_mask_name>" + [(set (match_operand:V4HI 0 "memory_operand" "=m") + (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] + UNSPEC_VCVTPS2PH))] + "TARGET_F16C || TARGET_AVX512VL" +- "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" ++ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "V4SF")]) + + (define_insn "vcvtps2ph256<mask_name>" +- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm") ++ [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] + UNSPEC_VCVTPS2PH))] +@@ -21377,8 +21377,20 @@ + (set_attr "btver2_decode" "vector") + (set_attr "mode" "V8SF")]) + ++(define_insn "*vcvtps2ph256<merge_mask_name>" ++ [(set (match_operand:V8HI 0 "memory_operand" "=m") ++ (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") ++ (match_operand:SI 2 "const_0_to_255_operand" "N")] ++ UNSPEC_VCVTPS2PH))] ++ "TARGET_F16C || TARGET_AVX512VL" ++ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "maybe_evex") ++ (set_attr "btver2_decode" "vector") ++ (set_attr "mode" "V8SF")]) ++ + (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>" +- [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm") ++ [(set (match_operand:V16HI 0 "register_operand" "=v") + (unspec:V16HI + [(match_operand:V16SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] +@@ -21389,6 +21401,18 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "V16SF")]) + ++(define_insn "*avx512f_vcvtps2ph512<merge_mask_name>" ++ [(set (match_operand:V16HI 0 "memory_operand" "=m") ++ (unspec:V16HI ++ [(match_operand:V16SF 1 "register_operand" "v") ++ (match_operand:SI 2 "const_0_to_255_operand" "N")] ++ UNSPEC_VCVTPS2PH))] ++ "TARGET_AVX512F" ++ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "V16SF")]) ++ + ;; For gather* insn patterns + (define_mode_iterator VEC_GATHER_MODE + [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) +diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md +index 4a1c9b080..27eb3430d 100644 +--- a/gcc/config/i386/subst.md ++++ b/gcc/config/i386/subst.md +@@ -75,6 +75,18 @@ + (match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]) + ++(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask") ++(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}") ++(define_subst "merge_mask" ++ [(set (match_operand:SUBST_V 0) ++ (match_operand:SUBST_V 1))] ++ "TARGET_AVX512F" ++ [(set (match_dup 0) ++ (vec_merge:SUBST_V ++ (match_dup 1) ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]) ++ + (define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask") + (define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}") + (define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}") +diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c +new file mode 100644 +index 000000000..9e0da9473 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx512f" } */ ++ ++#include<immintrin.h> ++extern __m256i res; ++void ++foo (__m512 a, __mmask16 m) ++{ ++ res = _mm512_maskz_cvtps_ph (m, a, 10); ++} ++ ++/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c +new file mode 100644 +index 000000000..0c685ea66 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx512vl -mavx512f" } */ ++ ++#include<immintrin.h> ++extern __m128i res; ++void ++foo (__m256 a, __mmask8 m) ++{ ++ res = _mm256_maskz_cvtps_ph (m, a, 10); ++} ++ ++void ++foo1 (__m128 a, __mmask8 m) ++{ ++ res = _mm_maskz_cvtps_ph (m, a, 10); ++} ++ ++/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ +-- +2.33.0 + |