diff options
author | CoprDistGit <infra@openeuler.org> | 2024-08-18 17:52:37 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-08-18 17:52:37 +0000 |
commit | fc3df9d7d0ac60faa3bd55068b68a1711f467f3f (patch) | |
tree | dbab988e007f32829af4ace1f726c0f0c6c25b4b /0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch | |
parent | c017c1889f2a79f52676011db04bcbf5aba4e177 (diff) |
automatic import of llvmopeneuler23.09
Diffstat (limited to '0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch')
-rw-r--r-- | 0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch b/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch new file mode 100644 index 0000000..c43bbd3 --- /dev/null +++ b/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch @@ -0,0 +1,74 @@ +From 678cf3a36644847cac4b0be2d919aba77416088a Mon Sep 17 00:00:00 2001 +From: Nikita Popov <npopov@redhat.com> +Date: Mon, 04 Mar 2024 07:00:37 +0800 +Subject: [PATCH] [Backport][X86][Inline] Skip inline asm in inlining target + feature check + +When inlining across functions with different target features, we +perform roughly two checks: +1. The caller features must be a superset of the callee features. +2. Calls in the callee cannot use types where the target features would +change the call ABI (e.g. by changing whether something is passed in a +zmm or two ymm registers). The latter check is very crude right now. + +The latter check currently also catches inline asm "calls". I believe +that inline asm should be excluded from this check, as it is independent +from the usual call ABI, and instead governed by the inline asm +constraint string. +--- + .../lib/Target/X86/X86TargetTransformInfo.cpp | 4 +++ + .../Inline/X86/call-abi-compatibility.ll | 26 +++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +index 129a2646d..9c7954230 100644 +--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp ++++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +@@ -6046,6 +6046,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, + + for (const Instruction &I : instructions(Callee)) { + if (const auto *CB = dyn_cast<CallBase>(&I)) { ++ // Having more target features is fine for inline ASM. ++ if (CB->isInlineAsm()) ++ continue; ++ + SmallVector<Type *, 8> Types; + for (Value *Arg : CB->args()) + Types.push_back(Arg->getType()); +diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +index 3a30980fe..6f582cab2 100644 +--- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll ++++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +@@ -93,3 +93,29 @@ define internal void @caller_not_avx4() { + } + + declare i64 @caller_unknown_simple(i64) ++ ++; This call should get inlined, because the callee only contains ++; inline ASM, not real calls. ++define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 { ++; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm ++; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] { ++; CHECK-NEXT: [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64 ++; CHECK-NEXT: [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64 ++; CHECK-NEXT: [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64 ++; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]]) ++; CHECK-NEXT: ret <8 x i64> [[TMP1]] ++; ++ %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) ++ ret <8 x i64> %call ++} ++ ++define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 { ++ %src = load <8 x i64>, ptr %p0, align 64 ++ %a = load <8 x i64>, ptr %p1, align 64 ++ %b = load <8 x i64>, ptr %p2, align 64 ++ %1 = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 %k, <8 x i64> %a, <8 x i64> %b, <8 x i64> %src) #2 ++ ret <8 x i64> %1 ++} ++ ++attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } ++attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } +-- +2.33.0 + |