From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 26 Jun 2023 21:07:09 +0800 Subject: [PATCH 06/28] Refine maskstore patterns with UNSPEC_MASKMOV. Similar like r14-2070-gc79476da46728e If mem_addr points to a memory region with less than whole vector size bytes of accessible memory and k is a mask that would prevent reading the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent it to be transformed to any other whole memory access instructions. gcc/ChangeLog: PR rtl-optimization/110237 * config/i386/sse.md (_store_mask): Refine with UNSPEC_MASKMOV. (maskstore_store_mask): New define_insn, it's renamed from original _store_mask. --- gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b30e96cb1..3af159896 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1554,7 +1554,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "_store_mask" +(define_insn "*_store_mask" [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") (vec_merge:V48_AVX512VL (match_operand:V48_AVX512VL 1 "register_operand" "v") @@ -1582,7 +1582,7 @@ (set_attr "memory" "store") (set_attr "mode" "")]) -(define_insn "_store_mask" +(define_insn "*_store_mask" [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") (vec_merge:VI12HF_AVX512VL (match_operand:VI12HF_AVX512VL 1 "register_operand" "v") @@ -26002,21 +26002,66 @@ "TARGET_AVX") (define_expand "maskstore" - [(set (match_operand:V48H_AVX512VL 0 "memory_operand") - (vec_merge:V48H_AVX512VL - (match_operand:V48H_AVX512VL 1 "register_operand") - (match_dup 0) - (match_operand: 2 "register_operand")))] + [(set (match_operand:V48_AVX512VL 0 "memory_operand") + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "register_operand") + (match_dup 0) + (match_operand: 2 "register_operand")] + UNSPEC_MASKMOV))] "TARGET_AVX512F") (define_expand "maskstore" - [(set (match_operand:VI12_AVX512VL 0 "memory_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "register_operand") - (match_dup 0) - (match_operand: 2 "register_operand")))] + [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand") + (unspec:VI12HF_AVX512VL + [(match_operand:VI12HF_AVX512VL 1 "register_operand") + (match_dup 0) + (match_operand: 2 "register_operand")] + UNSPEC_MASKMOV))] "TARGET_AVX512BW") +(define_insn "_store_mask" + [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "register_operand" "v") + (match_dup 0) + (match_operand: 2 "register_operand" "Yk")] + UNSPEC_MASKMOV))] + "TARGET_AVX512F" +{ + if (FLOAT_MODE_P (GET_MODE_INNER (mode))) + { + if (misaligned_operand (operands[0], mode)) + return "vmovu\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + else + return "vmova\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + } + else + { + if (misaligned_operand (operands[0], mode)) + return "vmovdqu\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + else + return "vmovdqa\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + } +} + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "memory" "store") + (set_attr "mode" "")]) + +(define_insn "_store_mask" + [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") + (unspec:VI12HF_AVX512VL + [(match_operand:VI12HF_AVX512VL 1 "register_operand" "v") + (match_dup 0) + (match_operand: 2 "register_operand" "Yk")] + UNSPEC_MASKMOV))] + "TARGET_AVX512BW" + "vmovdqu\t{%1, %0%{%2%}|%0%{%2%}, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "memory" "store") + (set_attr "mode" "")]) + (define_expand "cbranch4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:VI48_AVX 1 "register_operand") -- 2.31.1