summaryrefslogtreecommitdiff
path: root/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch
diff options
context:
space:
mode:
Diffstat (limited to '0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch')
-rw-r--r--0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch74
1 files changed, 74 insertions, 0 deletions
diff --git a/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch b/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch
new file mode 100644
index 0000000..c43bbd3
--- /dev/null
+++ b/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch
@@ -0,0 +1,74 @@
+From 678cf3a36644847cac4b0be2d919aba77416088a Mon Sep 17 00:00:00 2001
+From: Nikita Popov <npopov@redhat.com>
+Date: Mon, 04 Mar 2024 07:00:37 +0800
+Subject: [PATCH] [Backport][X86][Inline] Skip inline asm in inlining target
+ feature check
+
+When inlining across functions with different target features, we
+perform roughly two checks:
+1. The caller features must be a superset of the callee features.
+2. Calls in the callee cannot use types where the target features would
+change the call ABI (e.g. by changing whether something is passed in a
+zmm or two ymm registers). The latter check is very crude right now.
+
+The latter check currently also catches inline asm "calls". I believe
+that inline asm should be excluded from this check, as it is independent
+from the usual call ABI, and instead governed by the inline asm
+constraint string.
+---
+ .../lib/Target/X86/X86TargetTransformInfo.cpp | 4 +++
+ .../Inline/X86/call-abi-compatibility.ll | 26 +++++++++++++++++++
+ 2 files changed, 30 insertions(+)
+
+diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+index 129a2646d..9c7954230 100644
+--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
++++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+@@ -6046,6 +6046,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
+
+ for (const Instruction &I : instructions(Callee)) {
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
++ // Having more target features is fine for inline ASM.
++ if (CB->isInlineAsm())
++ continue;
++
+ SmallVector<Type *, 8> Types;
+ for (Value *Arg : CB->args())
+ Types.push_back(Arg->getType());
+diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll
+index 3a30980fe..6f582cab2 100644
+--- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll
++++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll
+@@ -93,3 +93,29 @@ define internal void @caller_not_avx4() {
+ }
+
+ declare i64 @caller_unknown_simple(i64)
++
++; This call should get inlined, because the callee only contains
++; inline ASM, not real calls.
++define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 {
++; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm
++; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] {
++; CHECK-NEXT: [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64
++; CHECK-NEXT: [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64
++; CHECK-NEXT: [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64
++; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]])
++; CHECK-NEXT: ret <8 x i64> [[TMP1]]
++;
++ %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2)
++ ret <8 x i64> %call
++}
++
++define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 {
++ %src = load <8 x i64>, ptr %p0, align 64
++ %a = load <8 x i64>, ptr %p1, align 64
++ %b = load <8 x i64>, ptr %p2, align 64
++ %1 = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 %k, <8 x i64> %a, <8 x i64> %b, <8 x i64> %src) #2
++ ret <8 x i64> %1
++}
++
++attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
++attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
+--
+2.33.0
+