summaryrefslogtreecommitdiff
path: root/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2024-08-18 17:52:37 +0000
committerCoprDistGit <infra@openeuler.org>2024-08-18 17:52:37 +0000
commitfc3df9d7d0ac60faa3bd55068b68a1711f467f3f (patch)
treedbab988e007f32829af4ace1f726c0f0c6c25b4b /0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch
parentc017c1889f2a79f52676011db04bcbf5aba4e177 (diff)
automatic import of llvmopeneuler23.09
Diffstat (limited to '0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch')
-rw-r--r--0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch2474
1 files changed, 2474 insertions, 0 deletions
diff --git a/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch b/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch
new file mode 100644
index 0000000..e40be81
--- /dev/null
+++ b/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch
@@ -0,0 +1,2474 @@
+From 0bce68310dc0ff6a09ec2cf5c3ae32400c631324 Mon Sep 17 00:00:00 2001
+From: zhanglimin <zhanglimin@loongson.cn>
+Date: Tue, 12 Sep 2023 09:51:16 +0800
+Subject: [PATCH 01/14] [sanitizer][msan] VarArgHelper for loongarch64
+
+This patch adds support for variadic argument for loongarch64,
+which is based on MIPS64. And `check-msan` all pass.
+
+Reviewed By: vitalybuka
+
+Differential Revision: https://reviews.llvm.org/D158587
+
+(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3)
+---
+ .../Instrumentation/MemorySanitizer.cpp | 7 ++
+ .../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++
+ 2 files changed, 85 insertions(+)
+ create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
+
+diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+index 83d90049abc3..362fd6e4151f 100644
+--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
+ };
+
+ /// MIPS64-specific implementation of VarArgHelper.
++/// NOTE: This is also used for LoongArch64.
+ struct VarArgMIPS64Helper : public VarArgHelper {
+ Function &F;
+ MemorySanitizer &MS;
+@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper {
+ }
+ };
+
++// Loongarch64 is not a MIPS, but the current vargs calling convention matches
++// the MIPS.
++using VarArgLoongArch64Helper = VarArgMIPS64Helper;
++
+ /// A no-op implementation of VarArgHelper.
+ struct VarArgNoOpHelper : public VarArgHelper {
+ VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
+@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+ return new VarArgPowerPC64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.getArch() == Triple::systemz)
+ return new VarArgSystemZHelper(Func, Msan, Visitor);
++ else if (TargetTriple.isLoongArch64())
++ return new VarArgLoongArch64Helper(Func, Msan, Visitor);
+ else
+ return new VarArgNoOpHelper(Func, Msan, Visitor);
+ }
+diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
+new file mode 100644
+index 000000000000..8a4ab59588ad
+--- /dev/null
++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
+@@ -0,0 +1,78 @@
++; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
++
++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
++target triple = "loongarch64-unknown-linux-gnu"
++
++;; First, check allocation of the save area.
++declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1
++declare void @llvm.va_start(ptr) #2
++declare void @llvm.va_end(ptr) #2
++declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
++define i32 @foo(i32 %guard, ...) {
++; CHECK-LABEL: @foo
++; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
++; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]]
++; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]]
++; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false)
++; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800)
++; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false)
++;
++ %vl = alloca ptr, align 8
++ call void @llvm.lifetime.start.p0(i64 32, ptr %vl)
++ call void @llvm.va_start(ptr %vl)
++ call void @llvm.va_end(ptr %vl)
++ call void @llvm.lifetime.end.p0(i64 32, ptr %vl)
++ ret i32 0
++}
++
++;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
++;; array.
++define i32 @bar() {
++; CHECK-LABEL: @bar
++; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8
++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
++; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
++;
++ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
++ ret i32 %1
++}
++
++;; Check multiple fixed arguments.
++declare i32 @foo2(i32 %g1, i32 %g2, ...)
++define i32 @bar2() {
++; CHECK-LABEL: @bar2
++; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8
++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
++; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
++;
++ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
++ ret i32 %1
++}
++
++;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
++;; passed to a variadic function.
++declare i64 @sum(i64 %n, ...)
++define dso_local i64 @many_args() {
++;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed.
++; CHECK-LABEL: @many_args
++; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792)
++; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800)
++;
++entry:
++ %ret = call i64 (i64, ...) @sum(i64 120,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1
++ )
++ ret i64 %ret
++}
+--
+2.20.1
+
+
+From f1265a12fa947b79967552ab520f904486c76353 Mon Sep 17 00:00:00 2001
+From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com>
+Date: Thu, 28 Sep 2023 15:26:18 +0800
+Subject: [PATCH 02/14] [LowerTypeTests] Add loongarch64 to CFI jumptables
+ (#67312)
+
+This patch implements jump tables for loongarch64.
+
+(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18)
+---
+ llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++-
+ llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++
+ llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++
+ 3 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+index 9b4b3efd7283..a89d57d12615 100644
+--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
++++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4;
+ static const unsigned kARMBTIJumpTableEntrySize = 8;
+ static const unsigned kARMv6MJumpTableEntrySize = 16;
+ static const unsigned kRISCVJumpTableEntrySize = 8;
++static const unsigned kLOONGARCH64JumpTableEntrySize = 8;
+
+ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
+ switch (JumpTableArch) {
+@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
+ case Triple::riscv32:
+ case Triple::riscv64:
+ return kRISCVJumpTableEntrySize;
++ case Triple::loongarch64:
++ return kLOONGARCH64JumpTableEntrySize;
+ default:
+ report_fatal_error("Unsupported architecture for jump tables");
+ }
+@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry(
+ } else if (JumpTableArch == Triple::riscv32 ||
+ JumpTableArch == Triple::riscv64) {
+ AsmOS << "tail $" << ArgIndex << "@plt\n";
++ } else if (JumpTableArch == Triple::loongarch64) {
++ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n"
++ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n";
+ } else {
+ report_fatal_error("Unsupported architecture for jump tables");
+ }
+@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions(
+ ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
+ if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
+ Arch == Triple::thumb || Arch == Triple::aarch64 ||
+- Arch == Triple::riscv32 || Arch == Triple::riscv64)
++ Arch == Triple::riscv32 || Arch == Triple::riscv64 ||
++ Arch == Triple::loongarch64)
+ buildBitSetsFromFunctionsNative(TypeIds, Functions);
+ else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
+ buildBitSetsFromFunctionsWASM(TypeIds, Functions);
+diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll
+index ff69abacc8e9..c765937f1991 100644
+--- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll
++++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll
+@@ -4,6 +4,7 @@
+ ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s
+ ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s
+ ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s
++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s
+
+ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) {
+ ; X86: define private void @[[JT]]() #{{.*}} align 8 {
+ ; ARM: define private void @[[JT]]() #{{.*}} align 4 {
+ ; RISCV: define private void @[[JT]]() #{{.*}} align 8 {
++; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 {
+
+ ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" {
+ ; CHECK-NEXT: entry:
+diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll
+index 968c9d434eb2..802b88d92977 100644
+--- a/llvm/test/Transforms/LowerTypeTests/function.ll
++++ b/llvm/test/Transforms/LowerTypeTests/function.ll
+@@ -5,6 +5,7 @@
+ ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
+ ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
+ ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s
++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s
+
+ ; The right format for Arm jump tables depends on the selected
+ ; subtarget, so we can't get these tests right without the Arm target
+@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64"
+ ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1)
+ ; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1)
+ ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1)
++; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1)
+
+ ; NATIVE: define hidden void @f.cfi()
+ ; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]]
+@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) {
+ ; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
+ ; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 {
+ ; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
++; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
+
+ ; X86: jmp ${0:c}@plt
+ ; X86-SAME: int3
+@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) {
+ ; RISCV: tail $0@plt
+ ; RISCV-SAME: tail $1@plt
+
++; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0)
++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0)
++; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1)
++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1)
++
+ ; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi)
+
+ ; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind }
+@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) {
+ ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" }
+ ; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" }
+ ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" }
++; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind }
+
+ ; WASM32: ![[I0]] = !{i64 1}
+ ; WASM32: ![[I1]] = !{i64 2}
+--
+2.20.1
+
+
+From 6f3143e1ad0bb759b7519af81994ed3c71dcf52b Mon Sep 17 00:00:00 2001
+From: wanglei <wanglei@loongson.cn>
+Date: Fri, 20 Oct 2023 10:44:55 +0800
+Subject: [PATCH 03/14] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR
+
+The immediate argument should be a target constant (`timm`).
+
+(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef)
+---
+ llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+index b2c4bb812ba5..166379d7d592 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>;
+ /// Intrinsics
+
+ def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12),
+- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>;
++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>;
+ def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12),
+- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>;
++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>;
+ def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>;
+ def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>;
+ def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>;
+@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk),
+ def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk),
+ (ASRTGT_D GPR:$rj, GPR:$rk)>;
+ def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8),
+- (LDDIR GPR:$rj, uimm8:$imm8)>;
++ (LDDIR GPR:$rj, timm:$imm8)>;
+ def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8),
+- (LDPTE GPR:$rj, uimm8:$imm8)>;
++ (LDPTE GPR:$rj, timm:$imm8)>;
+ } // Predicates = [IsLA64]
+
+ //===----------------------------------------------------------------------===//
+--
+2.20.1
+
+
+From d90b85e94180543fd1789f9e26d7931f2329069b Mon Sep 17 00:00:00 2001
+From: ZhaoQi <zhaoqi01@loongson.cn>
+Date: Fri, 10 Nov 2023 15:54:33 +0800
+Subject: [PATCH 04/14] [LoongArch][MC] Refine MCInstrAnalysis based on
+ registers used (#71276)
+
+MCInstrAnalysis can return properties of instructions (e.g., isCall(),
+isBranch(),...) based on the informations that MCInstrDesc can get from
+*InstrInfo*.td files. These infos are based on opcodes only, but JIRL
+can have different properties based on different registers used.
+
+So this patch refines several MCInstrAnalysis methods: isTerminator,
+isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch.
+
+This patch also allows BOLT which will be supported on LoongArch later
+to get right instruction infos.
+
+(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1)
+---
+ .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++
+ .../unittests/Target/LoongArch/CMakeLists.txt | 1 +
+ .../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++
+ 3 files changed, 184 insertions(+)
+ create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+index 942e667bc261..d580c3457fec 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+@@ -104,6 +104,82 @@ public:
+
+ return false;
+ }
++
++ bool isTerminator(const MCInst &Inst) const override {
++ if (MCInstrAnalysis::isTerminator(Inst))
++ return true;
++
++ switch (Inst.getOpcode()) {
++ default:
++ return false;
++ case LoongArch::JIRL:
++ return Inst.getOperand(0).getReg() == LoongArch::R0;
++ }
++ }
++
++ bool isCall(const MCInst &Inst) const override {
++ if (MCInstrAnalysis::isCall(Inst))
++ return true;
++
++ switch (Inst.getOpcode()) {
++ default:
++ return false;
++ case LoongArch::JIRL:
++ return Inst.getOperand(0).getReg() != LoongArch::R0;
++ }
++ }
++
++ bool isReturn(const MCInst &Inst) const override {
++ if (MCInstrAnalysis::isReturn(Inst))
++ return true;
++
++ switch (Inst.getOpcode()) {
++ default:
++ return false;
++ case LoongArch::JIRL:
++ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
++ Inst.getOperand(1).getReg() == LoongArch::R1;
++ }
++ }
++
++ bool isBranch(const MCInst &Inst) const override {
++ if (MCInstrAnalysis::isBranch(Inst))
++ return true;
++
++ switch (Inst.getOpcode()) {
++ default:
++ return false;
++ case LoongArch::JIRL:
++ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
++ Inst.getOperand(1).getReg() != LoongArch::R1;
++ }
++ }
++
++ bool isUnconditionalBranch(const MCInst &Inst) const override {
++ if (MCInstrAnalysis::isUnconditionalBranch(Inst))
++ return true;
++
++ switch (Inst.getOpcode()) {
++ default:
++ return false;
++ case LoongArch::JIRL:
++ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
++ Inst.getOperand(1).getReg() != LoongArch::R1;
++ }
++ }
++
++ bool isIndirectBranch(const MCInst &Inst) const override {
++ if (MCInstrAnalysis::isIndirectBranch(Inst))
++ return true;
++
++ switch (Inst.getOpcode()) {
++ default:
++ return false;
++ case LoongArch::JIRL:
++ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
++ Inst.getOperand(1).getReg() != LoongArch::R1;
++ }
++ }
+ };
+
+ } // end namespace
+diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt
+index fef4f8e15461..e6f8ec073721 100644
+--- a/llvm/unittests/Target/LoongArch/CMakeLists.txt
++++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt
+@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS
+
+ add_llvm_target_unittest(LoongArchTests
+ InstSizes.cpp
++ MCInstrAnalysisTest.cpp
+ )
+
+ set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
+diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+new file mode 100644
+index 000000000000..6a208d274a0d
+--- /dev/null
++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+@@ -0,0 +1,107 @@
++//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "llvm/MC/MCInstrAnalysis.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/MC/MCInstBuilder.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/TargetSelect.h"
++
++#include "gtest/gtest.h"
++
++#include <memory>
++
++using namespace llvm;
++
++namespace {
++
++class InstrAnalysisTest : public testing::TestWithParam<const char *> {
++protected:
++ std::unique_ptr<const MCInstrInfo> Info;
++ std::unique_ptr<const MCInstrAnalysis> Analysis;
++
++ static void SetUpTestSuite() {
++ LLVMInitializeLoongArchTargetInfo();
++ LLVMInitializeLoongArchTarget();
++ LLVMInitializeLoongArchTargetMC();
++ }
++
++ InstrAnalysisTest() {
++ std::string Error;
++ const Target *TheTarget =
++ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error);
++ Info = std::unique_ptr<const MCInstrInfo>(TheTarget->createMCInstrInfo());
++ Analysis = std::unique_ptr<const MCInstrAnalysis>(
++ TheTarget->createMCInstrAnalysis(Info.get()));
++ }
++};
++
++} // namespace
++
++static MCInst beq() {
++ return MCInstBuilder(LoongArch::BEQ)
++ .addReg(LoongArch::R0)
++ .addReg(LoongArch::R1)
++ .addImm(32);
++}
++
++static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); }
++
++static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) {
++ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16);
++}
++
++TEST_P(InstrAnalysisTest, IsTerminator) {
++ EXPECT_TRUE(Analysis->isTerminator(beq()));
++ EXPECT_FALSE(Analysis->isTerminator(bl()));
++ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0)));
++ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5)));
++}
++
++TEST_P(InstrAnalysisTest, IsCall) {
++ EXPECT_FALSE(Analysis->isCall(beq()));
++ EXPECT_TRUE(Analysis->isCall(bl()));
++ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1)));
++ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0)));
++}
++
++TEST_P(InstrAnalysisTest, IsReturn) {
++ EXPECT_FALSE(Analysis->isReturn(beq()));
++ EXPECT_FALSE(Analysis->isReturn(bl()));
++ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1)));
++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0)));
++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1)));
++}
++
++TEST_P(InstrAnalysisTest, IsBranch) {
++ EXPECT_TRUE(Analysis->isBranch(beq()));
++ EXPECT_FALSE(Analysis->isBranch(bl()));
++ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0)));
++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1)));
++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1)));
++}
++
++TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
++ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq()));
++ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl()));
++ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0)));
++ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1)));
++ EXPECT_FALSE(
++ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1)));
++}
++
++TEST_P(InstrAnalysisTest, IsIndirectBranch) {
++ EXPECT_FALSE(Analysis->isIndirectBranch(beq()));
++ EXPECT_FALSE(Analysis->isIndirectBranch(bl()));
++ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0)));
++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1)));
++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1)));
++}
++
++INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest,
++ testing::Values("loongarch32", "loongarch64"));
+--
+2.20.1
+
+
+From 4d3ba0892d66b21f6a8a72f1d787e42a64be8867 Mon Sep 17 00:00:00 2001
+From: ZhaoQi <zhaoqi01@loongson.cn>
+Date: Wed, 15 Nov 2023 11:12:30 +0800
+Subject: [PATCH 05/14] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for
+ instruction 'b' (#71903)
+
+The tests for 'b' which commented with FIXME are incorrect, the
+following patch will fix it.
+
+(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4)
+---
+ .../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+index 6a208d274a0d..6e1919fc2261 100644
+--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+@@ -50,6 +50,8 @@ static MCInst beq() {
+ .addImm(32);
+ }
+
++static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); }
++
+ static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); }
+
+ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) {
+@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) {
+
+ TEST_P(InstrAnalysisTest, IsTerminator) {
+ EXPECT_TRUE(Analysis->isTerminator(beq()));
++ EXPECT_TRUE(Analysis->isTerminator(b()));
+ EXPECT_FALSE(Analysis->isTerminator(bl()));
+ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0)));
+ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5)));
+@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) {
+
+ TEST_P(InstrAnalysisTest, IsCall) {
+ EXPECT_FALSE(Analysis->isCall(beq()));
++ EXPECT_FALSE(Analysis->isCall(b()));
+ EXPECT_TRUE(Analysis->isCall(bl()));
+ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1)));
+ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0)));
+@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) {
+
+ TEST_P(InstrAnalysisTest, IsReturn) {
+ EXPECT_FALSE(Analysis->isReturn(beq()));
++ EXPECT_FALSE(Analysis->isReturn(b()));
+ EXPECT_FALSE(Analysis->isReturn(bl()));
+ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1)));
+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0)));
+@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) {
+
+ TEST_P(InstrAnalysisTest, IsBranch) {
+ EXPECT_TRUE(Analysis->isBranch(beq()));
++ EXPECT_TRUE(Analysis->isBranch(b()));
+ EXPECT_FALSE(Analysis->isBranch(bl()));
+ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0)));
+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1)));
+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1)));
+ }
+
++TEST_P(InstrAnalysisTest, IsConditionalBranch) {
++ EXPECT_TRUE(Analysis->isConditionalBranch(beq()));
++ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is
++ // wrong. The following patch will fix it.
++ EXPECT_TRUE(Analysis->isConditionalBranch(b()));
++ EXPECT_FALSE(Analysis->isConditionalBranch(bl()));
++}
++
+ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq()));
++ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is
++ // wrong. The following patch will fix it.
++ EXPECT_FALSE(Analysis->isUnconditionalBranch(b()));
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl()));
+ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0)));
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1)));
+@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
+
+ TEST_P(InstrAnalysisTest, IsIndirectBranch) {
+ EXPECT_FALSE(Analysis->isIndirectBranch(beq()));
++ EXPECT_FALSE(Analysis->isIndirectBranch(b()));
+ EXPECT_FALSE(Analysis->isIndirectBranch(bl()));
+ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0)));
+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1)));
+--
+2.20.1
+
+
+From 034d4087be71c54248fff1bf7eae66291671776a Mon Sep 17 00:00:00 2001
+From: ZhaoQi <zhaoqi01@loongson.cn>
+Date: Thu, 16 Nov 2023 14:01:58 +0800
+Subject: [PATCH 06/14] [LoongArch] Set isBarrier to true for instruction 'b'
+ (#72339)
+
+Instr "b offs26" represent to an unconditional branch in LoongArch. Set
+isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return
+correctly.
+
+Fixes https://github.com/llvm/llvm-project/pull/71903.
+
+(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f)
+---
+ llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 +
+ llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+index 166379d7d592..05ae36a9781d 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+@@ -586,6 +586,7 @@ class Br_I26<bits<32> op>
+ : FmtI26<op, (outs), (ins simm26_b:$imm26), "$imm26"> {
+ let isBranch = 1;
+ let isTerminator = 1;
++ let isBarrier = 1;
+ }
+ } // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+index 6e1919fc2261..468ee79615d6 100644
+--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
+@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) {
+
+ TEST_P(InstrAnalysisTest, IsConditionalBranch) {
+ EXPECT_TRUE(Analysis->isConditionalBranch(beq()));
+- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is
+- // wrong. The following patch will fix it.
+- EXPECT_TRUE(Analysis->isConditionalBranch(b()));
++ EXPECT_FALSE(Analysis->isConditionalBranch(b()));
+ EXPECT_FALSE(Analysis->isConditionalBranch(bl()));
+ }
+
+ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq()));
+- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is
+- // wrong. The following patch will fix it.
+- EXPECT_FALSE(Analysis->isUnconditionalBranch(b()));
++ EXPECT_TRUE(Analysis->isUnconditionalBranch(b()));
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl()));
+ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0)));
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1)));
+--
+2.20.1
+
+
+From 701109dc419b8d07cd5254268d848dee1278b9ad Mon Sep 17 00:00:00 2001
+From: ZhaoQi <zhaoqi01@loongson.cn>
+Date: Tue, 21 Nov 2023 08:34:52 +0800
+Subject: [PATCH 07/14] [LoongArch][MC] Pre-commit tests for instr bl fixupkind
+ testing (#72826)
+
+This patch is used to test whether fixupkind for bl can be returned
+correctly. When BL has target-flags(loongarch-call), there is no error.
+But without this flag, an assertion error will appear. So the test is
+just tagged as "Expectedly Failed" now until the following patch fix it.
+
+(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a)
+---
+ .../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++
+ 1 file changed, 66 insertions(+)
+ create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
+
+diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
+new file mode 100644
+index 000000000000..2c1d41be7711
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
+@@ -0,0 +1,66 @@
++## Tagged as "Expectedly Failed" until the following patch fix it
++# XFAIL: *
++# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \
++# RUN: llvm-objdump -d - | FileCheck %s
++
++# REQUIRES: asserts
++
++## Check that bl can get fixupkind correctly.
++## When BL has target-flags(loongarch-call), there is no error. But without
++## this flag, an assertion error will appear:
++## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed.
++
++--- |
++ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
++ target triple = "loongarch64"
++
++ define dso_local void @test_bl_fixupkind_with_flag() {
++ ; CHECK-LABEL: test_bl_fixupkind_with_flag
++ ; CHECK: addi.d $sp, $sp, -16
++ ; CHECK-NEXT: st.d $ra, $sp, 8
++ ; CHECK-NEXT: bl 0 <test_bl_fixupkind_with_flag+0x8>
++ ; CHECK-NEXT: ld.d $ra, $sp, 8
++ ; CHECK-NEXT: addi.d $sp, $sp, 16
++ ; CHECK-NEXT: ret
++ entry:
++ call void @foo()
++ ret void
++ }
++
++ define dso_local void @test_bl_fixupkind_without_flag() {
++ ; CHECK-LABEL: test_bl_fixupkind_without_flag
++ ; CHECK: addi.d $sp, $sp, -16
++ ; CHECK-NEXT: st.d $ra, $sp, 8
++ ; CHECK-NEXT: bl 0 <test_bl_fixupkind_without_flag+0x8>
++ ; CHECK-NEXT: ld.d $ra, $sp, 8
++ ; CHECK-NEXT: addi.d $sp, $sp, 16
++ ; CHECK-NEXT: ret
++ entry:
++ call void @foo()
++ ret void
++ }
++
++ declare dso_local void @foo(...)
++...
++---
++name: test_bl_fixupkind_with_flag
++tracksRegLiveness: true
++body: |
++ bb.0.entry:
++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3
++ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3
++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
++ PseudoRET
++
++...
++---
++name: test_bl_fixupkind_without_flag
++tracksRegLiveness: true
++body: |
++ bb.0.entry:
++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3
++ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3
++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
++ PseudoRET
++
++...
+--
+2.20.1
+
+
+From a5bf03107b8738b0fab521d7718bed863056134b Mon Sep 17 00:00:00 2001
+From: ZhaoQi <zhaoqi01@loongson.cn>
+Date: Tue, 21 Nov 2023 19:00:29 +0800
+Subject: [PATCH 08/14] [LoongArch][MC] Support to get the FixupKind for BL
+ (#72938)
+
+Previously, bolt could not get FixupKind for BL correctly, because bolt
+cannot get target-flags for BL. Here just add support in MCCodeEmitter.
+
+Fixes https://github.com/llvm/llvm-project/pull/72826.
+
+(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9)
+---
+ .../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 +
+ llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+index 08c0820cb862..09d92ac9aa3a 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
+ FixupKind = LoongArch::fixup_loongarch_b21;
+ break;
+ case LoongArch::B:
++ case LoongArch::BL:
+ FixupKind = LoongArch::fixup_loongarch_b26;
+ break;
+ }
+diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
+index 2c1d41be7711..70cd5fb8d7eb 100644
+--- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
+@@ -1,14 +1,10 @@
+-## Tagged as "Expectedly Failed" until the following patch fix it
+-# XFAIL: *
+ # RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \
+ # RUN: llvm-objdump -d - | FileCheck %s
+
+ # REQUIRES: asserts
+
+-## Check that bl can get fixupkind correctly.
+-## When BL has target-flags(loongarch-call), there is no error. But without
+-## this flag, an assertion error will appear:
+-## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed.
++## Check that bl can get fixupkind correctly, whether BL contains
++## target-flags(loongarch-call) or not.
+
+ --- |
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+--
+2.20.1
+
+
+From 20421e57af53d963a95c6c318f71f9399d241188 Mon Sep 17 00:00:00 2001
+From: ZhaoQi <zhaoqi01@loongson.cn>
+Date: Thu, 23 Nov 2023 16:38:41 +0800
+Subject: [PATCH 09/14] [LoongArch][MC] Modify branch evaluation for
+ MCInstrAnalysis (#73205)
+
+Function evaluateBranch() is used to compute target address for a given
+branch instruction and return true on success. But target address of
+indirect branch cannot be simply added, so rule it out and just return
+false.
+
+This patch also add objdump tests which capture the current state of
+support for printing branch targets. Without this patch, the result of
+"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 <foo+0x64>". It is obviously
+incorrect, because this instruction represents an indirect branch whose
+target address depends on both the register value and the imm. After
+this patch, it will be right despite loss of details.
+
+(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c)
+---
+ .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +-
+ .../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++
+ .../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 +
+ 3 files changed, 80 insertions(+), 1 deletion(-)
+ create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s
+ create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg
+
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+index d580c3457fec..a4e6a09863e6 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+@@ -97,7 +97,8 @@ public:
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ unsigned NumOps = Inst.getNumOperands();
+- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) {
++ if ((isBranch(Inst) && !isIndirectBranch(Inst)) ||
++ Inst.getOpcode() == LoongArch::BL) {
+ Target = Addr + Inst.getOperand(NumOps - 1).getImm();
+ return true;
+ }
+diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s
+new file mode 100644
+index 000000000000..8cb00aef9954
+--- /dev/null
++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s
+@@ -0,0 +1,76 @@
++# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \
++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s
++# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \
++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s
++
++# CHECK-LABEL: <foo>:
++foo:
++# CHECK: beq $a0, $a1, 108 <foo+0x6c>
++beq $a0, $a1, .Llocal
++# CHECK: bne $a0, $a1, 104 <foo+0x6c>
++bne $a0, $a1, .Llocal
++# CHECK: blt $a0, $a1, 100 <foo+0x6c>
++blt $a0, $a1, .Llocal
++# CHECK: bltu $a0, $a1, 96 <foo+0x6c>
++bltu $a0, $a1, .Llocal
++# CHECK: bge $a0, $a1, 92 <foo+0x6c>
++bge $a0, $a1, .Llocal
++# CHECK: bgeu $a0, $a1, 88 <foo+0x6c>
++bgeu $a0, $a1, .Llocal
++# CHECK: beqz $a0, 84 <foo+0x6c>
++beqz $a0, .Llocal
++# CHECK: bnez $a0, 80 <foo+0x6c>
++bnez $a0, .Llocal
++# CHECK: bceqz $fcc6, 76 <foo+0x6c>
++bceqz $fcc6, .Llocal
++# CHECK: bcnez $fcc6, 72 <foo+0x6c>
++bcnez $fcc6, .Llocal
++
++# CHECK: beq $a0, $a1, 76 <bar>
++beq $a0, $a1, bar
++# CHECK: bne $a0, $a1, 72 <bar>
++bne $a0, $a1, bar
++# CHECK: blt $a0, $a1, 68 <bar>
++blt $a0, $a1, bar
++# CHECK: bltu $a0, $a1, 64 <bar>
++bltu $a0, $a1, bar
++# CHECK: bge $a0, $a1, 60 <bar>
++bge $a0, $a1, bar
++# CHECK: bgeu $a0, $a1, 56 <bar>
++bgeu $a0, $a1, bar
++# CHECK: beqz $a0, 52 <bar>
++beqz $a0, bar
++# CHECK: bnez $a0, 48 <bar>
++bnez $a0, bar
++# CHECK: bceqz $fcc6, 44 <bar>
++bceqz $fcc6, bar
++# CHECK: bcnez $fcc6, 40 <bar>
++bcnez $fcc6, bar
++
++# CHECK: b 28 <foo+0x6c>
++b .Llocal
++# CHECK: b 32 <bar>
++b bar
++
++# CHECK: bl 20 <foo+0x6c>
++bl .Llocal
++# CHECK: bl 24 <bar>
++bl bar
++
++# CHECK: jirl $zero, $a0, 4{{$}}
++jirl $zero, $a0, 4
++# CHECK: jirl $ra, $a0, 4{{$}}
++jirl $ra, $a0, 4
++# CHECK: ret
++ret
++
++.Llocal:
++# CHECK: 6c: nop
++# CHECK: nop
++nop
++nop
++
++# CHECK-LABEL: <bar>:
++bar:
++# CHECK: 74: nop
++nop
+diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg
+new file mode 100644
+index 000000000000..cc24278acbb4
+--- /dev/null
++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg
+@@ -0,0 +1,2 @@
++if not "LoongArch" in config.root.targets:
++ config.unsupported = True
+--
+2.20.1
+
+
+From 0fe85205a8637c6671f423cddd41b712085232ac Mon Sep 17 00:00:00 2001
+From: hev <wangrui@loongson.cn>
+Date: Thu, 23 Nov 2023 15:15:26 +0800
+Subject: [PATCH 10/14] [LoongArch] Precommit a test for smul with overflow
+ (NFC) (#73212)
+
+(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71)
+---
+ .../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++
+ 1 file changed, 118 insertions(+)
+ create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+
+diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+new file mode 100644
+index 000000000000..a53e77e5aa4b
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+@@ -0,0 +1,118 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
++
++define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
++; LA32-LABEL: smuloi64:
++; LA32: # %bb.0:
++; LA32-NEXT: addi.w $sp, $sp, -16
++; LA32-NEXT: .cfi_def_cfa_offset 16
++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
++; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
++; LA32-NEXT: .cfi_offset 1, -4
++; LA32-NEXT: .cfi_offset 22, -8
++; LA32-NEXT: move $fp, $a4
++; LA32-NEXT: st.w $zero, $sp, 4
++; LA32-NEXT: addi.w $a4, $sp, 4
++; LA32-NEXT: bl %plt(__mulodi4)
++; LA32-NEXT: st.w $a1, $fp, 4
++; LA32-NEXT: st.w $a0, $fp, 0
++; LA32-NEXT: ld.w $a0, $sp, 4
++; LA32-NEXT: sltu $a0, $zero, $a0
++; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
++; LA32-NEXT: addi.w $sp, $sp, 16
++; LA32-NEXT: ret
++;
++; LA64-LABEL: smuloi64:
++; LA64: # %bb.0:
++; LA64-NEXT: mul.d $a3, $a0, $a1
++; LA64-NEXT: st.d $a3, $a2, 0
++; LA64-NEXT: mulh.d $a0, $a0, $a1
++; LA64-NEXT: srai.d $a1, $a3, 63
++; LA64-NEXT: xor $a0, $a0, $a1
++; LA64-NEXT: sltu $a0, $zero, $a0
++; LA64-NEXT: ret
++ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
++ %val = extractvalue {i64, i1} %t, 0
++ %obit = extractvalue {i64, i1} %t, 1
++ store i64 %val, ptr %res
++ ret i1 %obit
++}
++
++define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
++; LA32-LABEL: smuloi128:
++; LA32: # %bb.0:
++; LA32-NEXT: addi.w $sp, $sp, -64
++; LA32-NEXT: .cfi_def_cfa_offset 64
++; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
++; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
++; LA32-NEXT: .cfi_offset 1, -4
++; LA32-NEXT: .cfi_offset 22, -8
++; LA32-NEXT: move $fp, $a2
++; LA32-NEXT: st.w $zero, $sp, 52
++; LA32-NEXT: ld.w $a2, $a1, 12
++; LA32-NEXT: st.w $a2, $sp, 12
++; LA32-NEXT: ld.w $a2, $a1, 8
++; LA32-NEXT: st.w $a2, $sp, 8
++; LA32-NEXT: ld.w $a2, $a1, 4
++; LA32-NEXT: st.w $a2, $sp, 4
++; LA32-NEXT: ld.w $a1, $a1, 0
++; LA32-NEXT: st.w $a1, $sp, 0
++; LA32-NEXT: ld.w $a1, $a0, 12
++; LA32-NEXT: st.w $a1, $sp, 28
++; LA32-NEXT: ld.w $a1, $a0, 8
++; LA32-NEXT: st.w $a1, $sp, 24
++; LA32-NEXT: ld.w $a1, $a0, 4
++; LA32-NEXT: st.w $a1, $sp, 20
++; LA32-NEXT: ld.w $a0, $a0, 0
++; LA32-NEXT: st.w $a0, $sp, 16
++; LA32-NEXT: addi.w $a0, $sp, 32
++; LA32-NEXT: addi.w $a1, $sp, 16
++; LA32-NEXT: addi.w $a2, $sp, 0
++; LA32-NEXT: addi.w $a3, $sp, 52
++; LA32-NEXT: bl %plt(__muloti4)
++; LA32-NEXT: ld.w $a0, $sp, 44
++; LA32-NEXT: st.w $a0, $fp, 12
++; LA32-NEXT: ld.w $a0, $sp, 40
++; LA32-NEXT: st.w $a0, $fp, 8
++; LA32-NEXT: ld.w $a0, $sp, 36
++; LA32-NEXT: st.w $a0, $fp, 4
++; LA32-NEXT: ld.w $a0, $sp, 32
++; LA32-NEXT: st.w $a0, $fp, 0
++; LA32-NEXT: ld.w $a0, $sp, 52
++; LA32-NEXT: sltu $a0, $zero, $a0
++; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
++; LA32-NEXT: addi.w $sp, $sp, 64
++; LA32-NEXT: ret
++;
++; LA64-LABEL: smuloi128:
++; LA64: # %bb.0:
++; LA64-NEXT: addi.d $sp, $sp, -32
++; LA64-NEXT: .cfi_def_cfa_offset 32
++; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
++; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
++; LA64-NEXT: .cfi_offset 1, -8
++; LA64-NEXT: .cfi_offset 22, -16
++; LA64-NEXT: move $fp, $a4
++; LA64-NEXT: st.d $zero, $sp, 8
++; LA64-NEXT: addi.d $a4, $sp, 8
++; LA64-NEXT: bl %plt(__muloti4)
++; LA64-NEXT: st.d $a1, $fp, 8
++; LA64-NEXT: st.d $a0, $fp, 0
++; LA64-NEXT: ld.d $a0, $sp, 8
++; LA64-NEXT: sltu $a0, $zero, $a0
++; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
++; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
++; LA64-NEXT: addi.d $sp, $sp, 32
++; LA64-NEXT: ret
++ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
++ %val = extractvalue {i128, i1} %t, 0
++ %obit = extractvalue {i128, i1} %t, 1
++ store i128 %val, ptr %res
++ ret i1 %obit
++}
++
++declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
++declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
+--
+2.20.1
+
+
+From e29ff285726046ec46c9005c67ba992e3efc8ace Mon Sep 17 00:00:00 2001
+From: hev <wangrui@loongson.cn>
+Date: Thu, 23 Nov 2023 19:34:50 +0800
+Subject: [PATCH 11/14] [LoongArch] Disable mulodi4 and muloti4 libcalls
+ (#73199)
+
+This library function only exists in compiler-rt not libgcc. So this
+would fail to link unless we were linking with compiler-rt.
+
+Fixes https://github.com/ClangBuiltLinux/linux/issues/1958
+
+(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e)
+---
+ .../LoongArch/LoongArchISelLowering.cpp | 5 +
+ .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++---
+ 2 files changed, 397 insertions(+), 71 deletions(-)
+
+diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+index f7eacd56c542..ed106cb766bc 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
+
+ // Set libcalls.
+ setLibcallName(RTLIB::MUL_I128, nullptr);
++ // The MULO libcall is not part of libgcc, only compiler-rt.
++ setLibcallName(RTLIB::MULO_I64, nullptr);
+ }
+
++ // The MULO libcall is not part of libgcc, only compiler-rt.
++ setLibcallName(RTLIB::MULO_I128, nullptr);
++
+ static const ISD::CondCode FPCCToExpand[] = {
+ ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
+ ISD::SETGE, ISD::SETNE, ISD::SETGT};
+diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+index a53e77e5aa4b..6cba4108d63c 100644
+--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+@@ -5,23 +5,53 @@
+ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
+ ; LA32-LABEL: smuloi64:
+ ; LA32: # %bb.0:
+-; LA32-NEXT: addi.w $sp, $sp, -16
+-; LA32-NEXT: .cfi_def_cfa_offset 16
+-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+-; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+-; LA32-NEXT: .cfi_offset 1, -4
+-; LA32-NEXT: .cfi_offset 22, -8
+-; LA32-NEXT: move $fp, $a4
+-; LA32-NEXT: st.w $zero, $sp, 4
+-; LA32-NEXT: addi.w $a4, $sp, 4
+-; LA32-NEXT: bl %plt(__mulodi4)
+-; LA32-NEXT: st.w $a1, $fp, 4
+-; LA32-NEXT: st.w $a0, $fp, 0
+-; LA32-NEXT: ld.w $a0, $sp, 4
++; LA32-NEXT: srai.w $a5, $a1, 31
++; LA32-NEXT: mul.w $a6, $a2, $a5
++; LA32-NEXT: mulh.wu $a7, $a2, $a5
++; LA32-NEXT: add.w $a7, $a7, $a6
++; LA32-NEXT: mul.w $a5, $a3, $a5
++; LA32-NEXT: add.w $a5, $a7, $a5
++; LA32-NEXT: srai.w $a7, $a3, 31
++; LA32-NEXT: mul.w $t0, $a7, $a1
++; LA32-NEXT: mulh.wu $t1, $a7, $a0
++; LA32-NEXT: add.w $t0, $t1, $t0
++; LA32-NEXT: mul.w $a7, $a7, $a0
++; LA32-NEXT: add.w $t0, $t0, $a7
++; LA32-NEXT: add.w $a5, $t0, $a5
++; LA32-NEXT: mulh.wu $t0, $a0, $a2
++; LA32-NEXT: mul.w $t1, $a1, $a2
++; LA32-NEXT: add.w $t0, $t1, $t0
++; LA32-NEXT: sltu $t1, $t0, $t1
++; LA32-NEXT: mulh.wu $t2, $a1, $a2
++; LA32-NEXT: add.w $t1, $t2, $t1
++; LA32-NEXT: mul.w $t2, $a0, $a3
++; LA32-NEXT: add.w $t0, $t2, $t0
++; LA32-NEXT: sltu $t2, $t0, $t2
++; LA32-NEXT: mulh.wu $t3, $a0, $a3
++; LA32-NEXT: add.w $t2, $t3, $t2
++; LA32-NEXT: add.w $a6, $a7, $a6
++; LA32-NEXT: sltu $a7, $a6, $a7
++; LA32-NEXT: add.w $a5, $a5, $a7
++; LA32-NEXT: mul.w $a0, $a0, $a2
++; LA32-NEXT: mul.w $a2, $a1, $a3
++; LA32-NEXT: mulh.wu $a1, $a1, $a3
++; LA32-NEXT: add.w $a3, $t1, $t2
++; LA32-NEXT: sltu $a7, $a3, $t1
++; LA32-NEXT: add.w $a1, $a1, $a7
++; LA32-NEXT: st.w $a0, $a4, 0
++; LA32-NEXT: add.w $a0, $a2, $a3
++; LA32-NEXT: sltu $a2, $a0, $a2
++; LA32-NEXT: add.w $a1, $a1, $a2
++; LA32-NEXT: st.w $t0, $a4, 4
++; LA32-NEXT: add.w $a1, $a1, $a5
++; LA32-NEXT: add.w $a2, $a0, $a6
++; LA32-NEXT: sltu $a0, $a2, $a0
++; LA32-NEXT: add.w $a0, $a1, $a0
++; LA32-NEXT: srai.w $a1, $t0, 31
++; LA32-NEXT: xor $a0, $a0, $a1
++; LA32-NEXT: xor $a1, $a2, $a1
++; LA32-NEXT: or $a0, $a1, $a0
+ ; LA32-NEXT: sltu $a0, $zero, $a0
+-; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+-; LA32-NEXT: addi.w $sp, $sp, 16
+ ; LA32-NEXT: ret
+ ;
+ ; LA64-LABEL: smuloi64:
+@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
+ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
+ ; LA32-LABEL: smuloi128:
+ ; LA32: # %bb.0:
+-; LA32-NEXT: addi.w $sp, $sp, -64
+-; LA32-NEXT: .cfi_def_cfa_offset 64
+-; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+-; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
++; LA32-NEXT: addi.w $sp, $sp, -96
++; LA32-NEXT: .cfi_def_cfa_offset 96
++; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill
++; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill
++; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill
+ ; LA32-NEXT: .cfi_offset 1, -4
+ ; LA32-NEXT: .cfi_offset 22, -8
+-; LA32-NEXT: move $fp, $a2
+-; LA32-NEXT: st.w $zero, $sp, 52
+-; LA32-NEXT: ld.w $a2, $a1, 12
+-; LA32-NEXT: st.w $a2, $sp, 12
+-; LA32-NEXT: ld.w $a2, $a1, 8
+-; LA32-NEXT: st.w $a2, $sp, 8
+-; LA32-NEXT: ld.w $a2, $a1, 4
+-; LA32-NEXT: st.w $a2, $sp, 4
+-; LA32-NEXT: ld.w $a1, $a1, 0
+-; LA32-NEXT: st.w $a1, $sp, 0
+-; LA32-NEXT: ld.w $a1, $a0, 12
+-; LA32-NEXT: st.w $a1, $sp, 28
+-; LA32-NEXT: ld.w $a1, $a0, 8
+-; LA32-NEXT: st.w $a1, $sp, 24
+-; LA32-NEXT: ld.w $a1, $a0, 4
+-; LA32-NEXT: st.w $a1, $sp, 20
+-; LA32-NEXT: ld.w $a0, $a0, 0
+-; LA32-NEXT: st.w $a0, $sp, 16
+-; LA32-NEXT: addi.w $a0, $sp, 32
+-; LA32-NEXT: addi.w $a1, $sp, 16
+-; LA32-NEXT: addi.w $a2, $sp, 0
+-; LA32-NEXT: addi.w $a3, $sp, 52
+-; LA32-NEXT: bl %plt(__muloti4)
+-; LA32-NEXT: ld.w $a0, $sp, 44
+-; LA32-NEXT: st.w $a0, $fp, 12
+-; LA32-NEXT: ld.w $a0, $sp, 40
+-; LA32-NEXT: st.w $a0, $fp, 8
+-; LA32-NEXT: ld.w $a0, $sp, 36
+-; LA32-NEXT: st.w $a0, $fp, 4
+-; LA32-NEXT: ld.w $a0, $sp, 32
+-; LA32-NEXT: st.w $a0, $fp, 0
+-; LA32-NEXT: ld.w $a0, $sp, 52
++; LA32-NEXT: .cfi_offset 23, -12
++; LA32-NEXT: .cfi_offset 24, -16
++; LA32-NEXT: .cfi_offset 25, -20
++; LA32-NEXT: .cfi_offset 26, -24
++; LA32-NEXT: .cfi_offset 27, -28
++; LA32-NEXT: .cfi_offset 28, -32
++; LA32-NEXT: .cfi_offset 29, -36
++; LA32-NEXT: .cfi_offset 30, -40
++; LA32-NEXT: .cfi_offset 31, -44
++; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill
++; LA32-NEXT: ld.w $a6, $a1, 0
++; LA32-NEXT: ld.w $a7, $a0, 0
++; LA32-NEXT: mulh.wu $a3, $a7, $a6
++; LA32-NEXT: ld.w $a5, $a0, 4
++; LA32-NEXT: mul.w $a4, $a5, $a6
++; LA32-NEXT: add.w $a3, $a4, $a3
++; LA32-NEXT: sltu $a4, $a3, $a4
++; LA32-NEXT: mulh.wu $t0, $a5, $a6
++; LA32-NEXT: add.w $a4, $t0, $a4
++; LA32-NEXT: ld.w $t0, $a1, 4
++; LA32-NEXT: mul.w $t1, $a7, $t0
++; LA32-NEXT: add.w $a3, $t1, $a3
++; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill
++; LA32-NEXT: sltu $t1, $a3, $t1
++; LA32-NEXT: mulh.wu $t2, $a7, $t0
++; LA32-NEXT: add.w $t1, $t2, $t1
++; LA32-NEXT: ld.w $t4, $a0, 12
++; LA32-NEXT: ld.w $t2, $a0, 8
++; LA32-NEXT: ld.w $t3, $a1, 8
++; LA32-NEXT: mulh.wu $a0, $t2, $t3
++; LA32-NEXT: mul.w $t5, $t4, $t3
++; LA32-NEXT: add.w $a0, $t5, $a0
++; LA32-NEXT: sltu $t5, $a0, $t5
++; LA32-NEXT: mulh.wu $t6, $t4, $t3
++; LA32-NEXT: add.w $t5, $t6, $t5
++; LA32-NEXT: ld.w $t7, $a1, 12
++; LA32-NEXT: mul.w $a1, $t2, $t7
++; LA32-NEXT: add.w $a0, $a1, $a0
++; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill
++; LA32-NEXT: sltu $a1, $a0, $a1
++; LA32-NEXT: mulh.wu $t6, $t2, $t7
++; LA32-NEXT: add.w $t6, $t6, $a1
++; LA32-NEXT: srai.w $s7, $t4, 31
++; LA32-NEXT: mul.w $a1, $s7, $t7
++; LA32-NEXT: mulh.wu $t8, $s7, $t3
++; LA32-NEXT: add.w $t8, $t8, $a1
++; LA32-NEXT: mulh.wu $fp, $a6, $s7
++; LA32-NEXT: mul.w $s6, $t0, $s7
++; LA32-NEXT: add.w $s8, $s6, $fp
++; LA32-NEXT: mul.w $a1, $a6, $s7
++; LA32-NEXT: add.w $ra, $a1, $s8
++; LA32-NEXT: sltu $s0, $ra, $a1
++; LA32-NEXT: add.w $a0, $fp, $s0
++; LA32-NEXT: add.w $a3, $a4, $t1
++; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill
++; LA32-NEXT: sltu $a4, $a3, $a4
++; LA32-NEXT: mulh.wu $t1, $a5, $t0
++; LA32-NEXT: add.w $a3, $t1, $a4
++; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill
++; LA32-NEXT: srai.w $s4, $t7, 31
++; LA32-NEXT: mul.w $fp, $a7, $s4
++; LA32-NEXT: mulh.wu $a4, $a7, $s4
++; LA32-NEXT: add.w $s1, $a4, $fp
++; LA32-NEXT: sltu $s0, $s1, $fp
++; LA32-NEXT: add.w $s5, $a4, $s0
++; LA32-NEXT: mul.w $a4, $s7, $t3
++; LA32-NEXT: add.w $t8, $t8, $a4
++; LA32-NEXT: add.w $s0, $ra, $t8
++; LA32-NEXT: add.w $a3, $a1, $a4
++; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill
++; LA32-NEXT: sltu $a4, $a3, $a1
++; LA32-NEXT: add.w $a3, $s0, $a4
++; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill
++; LA32-NEXT: add.w $s3, $t5, $t6
++; LA32-NEXT: sltu $a4, $s3, $t5
++; LA32-NEXT: mulh.wu $t5, $t4, $t7
++; LA32-NEXT: add.w $a3, $t5, $a4
++; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill
++; LA32-NEXT: mul.w $a4, $a7, $a6
++; LA32-NEXT: st.w $a4, $a2, 0
++; LA32-NEXT: sltu $a4, $s8, $s6
++; LA32-NEXT: mulh.wu $t5, $t0, $s7
++; LA32-NEXT: add.w $a4, $t5, $a4
++; LA32-NEXT: add.w $t1, $a4, $a0
++; LA32-NEXT: sltu $a4, $t1, $a4
++; LA32-NEXT: add.w $s2, $t5, $a4
++; LA32-NEXT: mulh.wu $a4, $a7, $t3
++; LA32-NEXT: mul.w $t5, $a5, $t3
++; LA32-NEXT: add.w $a4, $t5, $a4
++; LA32-NEXT: sltu $t5, $a4, $t5
++; LA32-NEXT: mulh.wu $t6, $a5, $t3
++; LA32-NEXT: add.w $a3, $t6, $t5
++; LA32-NEXT: mul.w $t6, $a7, $t7
++; LA32-NEXT: add.w $t5, $t6, $a4
++; LA32-NEXT: sltu $a4, $t5, $t6
++; LA32-NEXT: mulh.wu $t6, $a7, $t7
++; LA32-NEXT: add.w $a4, $t6, $a4
++; LA32-NEXT: mulh.wu $t6, $t2, $a6
++; LA32-NEXT: mul.w $s7, $t4, $a6
++; LA32-NEXT: add.w $t6, $s7, $t6
++; LA32-NEXT: sltu $s7, $t6, $s7
++; LA32-NEXT: mulh.wu $s8, $t4, $a6
++; LA32-NEXT: add.w $a0, $s8, $s7
++; LA32-NEXT: mul.w $s7, $t2, $t0
++; LA32-NEXT: add.w $t6, $s7, $t6
++; LA32-NEXT: sltu $s7, $t6, $s7
++; LA32-NEXT: mulh.wu $s8, $t2, $t0
++; LA32-NEXT: add.w $a2, $s8, $s7
++; LA32-NEXT: mul.w $s8, $a5, $s4
++; LA32-NEXT: add.w $s7, $s1, $s8
++; LA32-NEXT: add.w $s1, $s7, $ra
++; LA32-NEXT: add.w $a1, $fp, $a1
++; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill
++; LA32-NEXT: sltu $ra, $a1, $fp
++; LA32-NEXT: add.w $a1, $s1, $ra
++; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill
++; LA32-NEXT: xor $s0, $a1, $s7
++; LA32-NEXT: sltui $s0, $s0, 1
++; LA32-NEXT: sltu $a1, $a1, $s7
++; LA32-NEXT: masknez $s1, $a1, $s0
++; LA32-NEXT: maskeqz $s0, $ra, $s0
++; LA32-NEXT: add.w $t1, $s6, $t1
++; LA32-NEXT: sltu $s6, $t1, $s6
++; LA32-NEXT: add.w $s2, $s2, $s6
++; LA32-NEXT: add.w $a2, $a0, $a2
++; LA32-NEXT: sltu $a0, $a2, $a0
++; LA32-NEXT: mulh.wu $s6, $t4, $t0
++; LA32-NEXT: add.w $t8, $s6, $a0
++; LA32-NEXT: add.w $a4, $a3, $a4
++; LA32-NEXT: sltu $a3, $a4, $a3
++; LA32-NEXT: mulh.wu $s6, $a5, $t7
++; LA32-NEXT: add.w $a3, $s6, $a3
++; LA32-NEXT: mul.w $s6, $t4, $t7
++; LA32-NEXT: mul.w $t7, $a5, $t7
++; LA32-NEXT: mul.w $ra, $t4, $t0
++; LA32-NEXT: mul.w $t0, $a5, $t0
++; LA32-NEXT: mul.w $t4, $t4, $s4
++; LA32-NEXT: mul.w $a7, $a7, $t3
++; LA32-NEXT: mul.w $a6, $t2, $a6
++; LA32-NEXT: mul.w $t3, $t2, $t3
++; LA32-NEXT: mul.w $a0, $t2, $s4
++; LA32-NEXT: mulh.wu $t2, $t2, $s4
++; LA32-NEXT: mulh.wu $a5, $s4, $a5
++; LA32-NEXT: sltu $s4, $s7, $s8
++; LA32-NEXT: add.w $s4, $a5, $s4
++; LA32-NEXT: add.w $s4, $s5, $s4
++; LA32-NEXT: sltu $s5, $s4, $s5
++; LA32-NEXT: add.w $s5, $a5, $s5
++; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload
++; LA32-NEXT: add.w $a1, $t0, $a1
++; LA32-NEXT: sltu $a5, $a1, $t0
++; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload
++; LA32-NEXT: add.w $t0, $t0, $a5
++; LA32-NEXT: or $s0, $s0, $s1
++; LA32-NEXT: add.w $a4, $t7, $a4
++; LA32-NEXT: sltu $a5, $a4, $t7
++; LA32-NEXT: add.w $t7, $a3, $a5
++; LA32-NEXT: add.w $s1, $ra, $a2
++; LA32-NEXT: sltu $a2, $s1, $ra
++; LA32-NEXT: add.w $t8, $t8, $a2
++; LA32-NEXT: add.w $a5, $s6, $s3
++; LA32-NEXT: sltu $a2, $a5, $s6
++; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload
++; LA32-NEXT: add.w $a2, $a3, $a2
++; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload
++; LA32-NEXT: st.w $a3, $s6, 4
++; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload
++; LA32-NEXT: add.w $a3, $s2, $a3
++; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload
++; LA32-NEXT: add.w $s2, $t1, $s2
++; LA32-NEXT: sltu $t1, $s2, $t1
++; LA32-NEXT: add.w $a3, $a3, $t1
++; LA32-NEXT: add.w $t1, $s8, $s4
++; LA32-NEXT: sltu $s3, $t1, $s8
++; LA32-NEXT: add.w $s3, $s5, $s3
++; LA32-NEXT: add.w $t2, $t2, $a0
++; LA32-NEXT: add.w $t2, $t2, $t4
++; LA32-NEXT: add.w $t2, $t2, $s7
++; LA32-NEXT: add.w $t4, $a0, $fp
++; LA32-NEXT: sltu $a0, $t4, $a0
++; LA32-NEXT: add.w $a0, $t2, $a0
++; LA32-NEXT: add.w $a0, $s3, $a0
++; LA32-NEXT: add.w $t2, $t1, $t4
++; LA32-NEXT: sltu $t1, $t2, $t1
++; LA32-NEXT: add.w $a0, $a0, $t1
++; LA32-NEXT: add.w $a0, $a0, $a3
++; LA32-NEXT: add.w $t1, $t2, $s2
++; LA32-NEXT: sltu $a3, $t1, $t2
++; LA32-NEXT: add.w $a0, $a0, $a3
++; LA32-NEXT: add.w $a3, $t6, $t0
++; LA32-NEXT: add.w $a1, $a6, $a1
++; LA32-NEXT: sltu $a6, $a1, $a6
++; LA32-NEXT: add.w $t0, $a3, $a6
++; LA32-NEXT: add.w $a1, $a7, $a1
++; LA32-NEXT: sltu $a7, $a1, $a7
++; LA32-NEXT: add.w $a3, $t5, $t0
++; LA32-NEXT: add.w $a3, $a3, $a7
++; LA32-NEXT: sltu $t2, $a3, $t5
++; LA32-NEXT: xor $t4, $a3, $t5
++; LA32-NEXT: sltui $t4, $t4, 1
++; LA32-NEXT: masknez $t2, $t2, $t4
++; LA32-NEXT: maskeqz $a7, $a7, $t4
++; LA32-NEXT: st.w $a1, $s6, 8
++; LA32-NEXT: or $a1, $a7, $t2
++; LA32-NEXT: sltu $a7, $t0, $t6
++; LA32-NEXT: xor $t0, $t0, $t6
++; LA32-NEXT: sltui $t0, $t0, 1
++; LA32-NEXT: masknez $a7, $a7, $t0
++; LA32-NEXT: maskeqz $a6, $a6, $t0
++; LA32-NEXT: or $a6, $a6, $a7
++; LA32-NEXT: add.w $a6, $s1, $a6
++; LA32-NEXT: sltu $a7, $a6, $s1
++; LA32-NEXT: add.w $a7, $t8, $a7
++; LA32-NEXT: add.w $a1, $a4, $a1
++; LA32-NEXT: sltu $a4, $a1, $a4
++; LA32-NEXT: add.w $a4, $t7, $a4
++; LA32-NEXT: add.w $t0, $t1, $s0
++; LA32-NEXT: sltu $t1, $t0, $t1
++; LA32-NEXT: add.w $a0, $a0, $t1
++; LA32-NEXT: st.w $a3, $s6, 12
++; LA32-NEXT: add.w $a1, $a6, $a1
++; LA32-NEXT: sltu $a6, $a1, $a6
++; LA32-NEXT: add.w $a4, $a7, $a4
++; LA32-NEXT: add.w $a4, $a4, $a6
++; LA32-NEXT: sltu $t1, $a4, $a7
++; LA32-NEXT: xor $a7, $a4, $a7
++; LA32-NEXT: sltui $a7, $a7, 1
++; LA32-NEXT: masknez $t1, $t1, $a7
++; LA32-NEXT: maskeqz $a6, $a6, $a7
++; LA32-NEXT: or $a6, $a6, $t1
++; LA32-NEXT: add.w $a6, $a5, $a6
++; LA32-NEXT: sltu $a5, $a6, $a5
++; LA32-NEXT: add.w $a2, $a2, $a5
++; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload
++; LA32-NEXT: add.w $a4, $t1, $a4
++; LA32-NEXT: add.w $a1, $t3, $a1
++; LA32-NEXT: sltu $a5, $a1, $t3
++; LA32-NEXT: add.w $a4, $a4, $a5
++; LA32-NEXT: sltu $a7, $a4, $t1
++; LA32-NEXT: xor $t1, $a4, $t1
++; LA32-NEXT: sltui $t1, $t1, 1
++; LA32-NEXT: masknez $a7, $a7, $t1
++; LA32-NEXT: maskeqz $a5, $a5, $t1
++; LA32-NEXT: or $a5, $a5, $a7
++; LA32-NEXT: add.w $a5, $a6, $a5
++; LA32-NEXT: sltu $a6, $a5, $a6
++; LA32-NEXT: add.w $a2, $a2, $a6
++; LA32-NEXT: add.w $a0, $a2, $a0
++; LA32-NEXT: add.w $a2, $a5, $t0
++; LA32-NEXT: sltu $a5, $a2, $a5
++; LA32-NEXT: add.w $a0, $a0, $a5
++; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload
++; LA32-NEXT: add.w $a5, $a1, $a5
++; LA32-NEXT: sltu $a1, $a5, $a1
++; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload
++; LA32-NEXT: add.w $a6, $a4, $a6
++; LA32-NEXT: add.w $a6, $a6, $a1
++; LA32-NEXT: sltu $a7, $a6, $a4
++; LA32-NEXT: xor $a4, $a6, $a4
++; LA32-NEXT: sltui $a4, $a4, 1
++; LA32-NEXT: masknez $a7, $a7, $a4
++; LA32-NEXT: maskeqz $a1, $a1, $a4
++; LA32-NEXT: or $a1, $a1, $a7
++; LA32-NEXT: add.w $a1, $a2, $a1
++; LA32-NEXT: sltu $a2, $a1, $a2
++; LA32-NEXT: add.w $a0, $a0, $a2
++; LA32-NEXT: srai.w $a2, $a3, 31
++; LA32-NEXT: xor $a3, $a6, $a2
++; LA32-NEXT: xor $a0, $a0, $a2
++; LA32-NEXT: or $a0, $a3, $a0
++; LA32-NEXT: xor $a3, $a5, $a2
++; LA32-NEXT: xor $a1, $a1, $a2
++; LA32-NEXT: or $a1, $a3, $a1
++; LA32-NEXT: or $a0, $a1, $a0
+ ; LA32-NEXT: sltu $a0, $zero, $a0
+-; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+-; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+-; LA32-NEXT: addi.w $sp, $sp, 64
++; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload
++; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload
++; LA32-NEXT: addi.w $sp, $sp, 96
+ ; LA32-NEXT: ret
+ ;
+ ; LA64-LABEL: smuloi128:
+ ; LA64: # %bb.0:
+-; LA64-NEXT: addi.d $sp, $sp, -32
+-; LA64-NEXT: .cfi_def_cfa_offset 32
+-; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+-; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+-; LA64-NEXT: .cfi_offset 1, -8
+-; LA64-NEXT: .cfi_offset 22, -16
+-; LA64-NEXT: move $fp, $a4
+-; LA64-NEXT: st.d $zero, $sp, 8
+-; LA64-NEXT: addi.d $a4, $sp, 8
+-; LA64-NEXT: bl %plt(__muloti4)
+-; LA64-NEXT: st.d $a1, $fp, 8
+-; LA64-NEXT: st.d $a0, $fp, 0
+-; LA64-NEXT: ld.d $a0, $sp, 8
++; LA64-NEXT: srai.d $a5, $a1, 63
++; LA64-NEXT: mul.d $a6, $a2, $a5
++; LA64-NEXT: mulh.du $a7, $a2, $a5
++; LA64-NEXT: add.d $a7, $a7, $a6
++; LA64-NEXT: mul.d $a5, $a3, $a5
++; LA64-NEXT: add.d $a5, $a7, $a5
++; LA64-NEXT: srai.d $a7, $a3, 63
++; LA64-NEXT: mul.d $t0, $a7, $a1
++; LA64-NEXT: mulh.du $t1, $a7, $a0
++; LA64-NEXT: add.d $t0, $t1, $t0
++; LA64-NEXT: mul.d $a7, $a7, $a0
++; LA64-NEXT: add.d $t0, $t0, $a7
++; LA64-NEXT: add.d $a5, $t0, $a5
++; LA64-NEXT: mulh.du $t0, $a0, $a2
++; LA64-NEXT: mul.d $t1, $a1, $a2
++; LA64-NEXT: add.d $t0, $t1, $t0
++; LA64-NEXT: sltu $t1, $t0, $t1
++; LA64-NEXT: mulh.du $t2, $a1, $a2
++; LA64-NEXT: add.d $t1, $t2, $t1
++; LA64-NEXT: mul.d $t2, $a0, $a3
++; LA64-NEXT: add.d $t0, $t2, $t0
++; LA64-NEXT: sltu $t2, $t0, $t2
++; LA64-NEXT: mulh.du $t3, $a0, $a3
++; LA64-NEXT: add.d $t2, $t3, $t2
++; LA64-NEXT: add.d $a6, $a7, $a6
++; LA64-NEXT: sltu $a7, $a6, $a7
++; LA64-NEXT: add.d $a5, $a5, $a7
++; LA64-NEXT: mul.d $a0, $a0, $a2
++; LA64-NEXT: mul.d $a2, $a1, $a3
++; LA64-NEXT: mulh.du $a1, $a1, $a3
++; LA64-NEXT: add.d $a3, $t1, $t2
++; LA64-NEXT: sltu $a7, $a3, $t1
++; LA64-NEXT: add.d $a1, $a1, $a7
++; LA64-NEXT: st.d $a0, $a4, 0
++; LA64-NEXT: add.d $a0, $a2, $a3
++; LA64-NEXT: sltu $a2, $a0, $a2
++; LA64-NEXT: add.d $a1, $a1, $a2
++; LA64-NEXT: st.d $t0, $a4, 8
++; LA64-NEXT: add.d $a1, $a1, $a5
++; LA64-NEXT: add.d $a2, $a0, $a6
++; LA64-NEXT: sltu $a0, $a2, $a0
++; LA64-NEXT: add.d $a0, $a1, $a0
++; LA64-NEXT: srai.d $a1, $t0, 63
++; LA64-NEXT: xor $a0, $a0, $a1
++; LA64-NEXT: xor $a1, $a2, $a1
++; LA64-NEXT: or $a0, $a1, $a0
+ ; LA64-NEXT: sltu $a0, $zero, $a0
+-; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+-; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+-; LA64-NEXT: addi.d $sp, $sp, 32
+ ; LA64-NEXT: ret
+ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
+ %val = extractvalue {i128, i1} %t, 0
+--
+2.20.1
+
+
+From 01ced6193e2abfbd50fbd9d40066cf27f9f9067b Mon Sep 17 00:00:00 2001
+From: wanglei <wanglei@loongson.cn>
+Date: Wed, 29 Nov 2023 15:21:21 +0800
+Subject: [PATCH 12/14] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions
+ (#73742)
+
+```
+when a=c=-0.0, b=0.0:
+-(a * b + (-c)) = -0.0
+-a * b + c = 0.0
+(fneg (fma a, b (-c))) != (fma (fneg a), b ,c)
+```
+
+See https://reviews.llvm.org/D90901 for a similar discussion on X86.
+
+(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9)
+---
+ .../LoongArch/LoongArchFloat32InstrInfo.td | 8 +-
+ .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +-
+ llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++--
+ llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++--
+ 4 files changed, 483 insertions(+), 49 deletions(-)
+
+diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+index 826db54febd3..65120c083f49 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
++++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)),
+ def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)),
+ (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+-// fnmsub.s: -fj * fk + fa
+-def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
++// fnmsub.s: -(fj * fk - fa)
++def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))),
++ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
++
++// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
++def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
+ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+ } // Predicates = [HasBasicF]
+
+diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+index 5118474725b6..437c1e4d7be2 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
++++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)),
+ (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+ // fnmsub.d: -(fj * fk - fa)
+-def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
++def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))),
++ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
++
++// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
++def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
+ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+ } // Predicates = [HasBasicD]
+
+diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll
+index 6dd628479433..58d20c62a668 100644
+--- a/llvm/test/CodeGen/LoongArch/double-fma.ll
++++ b/llvm/test/CodeGen/LoongArch/double-fma.ll
+@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
+ ; LA32-CONTRACT-ON-LABEL: fnmsub_d:
+ ; LA32-CONTRACT-ON: # %bb.0:
+ ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
+-; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2
++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
+ ; LA32-CONTRACT-ON-NEXT: ret
+ ;
+ ; LA32-CONTRACT-OFF-LABEL: fnmsub_d:
+ ; LA32-CONTRACT-OFF: # %bb.0:
+ ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
+-; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2
++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
+ ; LA32-CONTRACT-OFF-NEXT: ret
+ ;
+ ; LA64-CONTRACT-FAST-LABEL: fnmsub_d:
+@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
+ ; LA64-CONTRACT-ON-LABEL: fnmsub_d:
+ ; LA64-CONTRACT-ON: # %bb.0:
+ ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
+-; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2
++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
+ ; LA64-CONTRACT-ON-NEXT: ret
+ ;
+ ; LA64-CONTRACT-OFF-LABEL: fnmsub_d:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+ ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2
++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-OFF-NEXT: ret
++ %negc = fneg double %c
++ %mul = fmul double %a, %b
++ %add = fadd double %mul, %negc
++ %neg = fneg double %add
++ ret double %neg
++}
++
++define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA64-CONTRACT-OFF-NEXT: ret
++ %nega = fneg nsz double %a
++ %mul = fmul nsz double %nega, %b
++ %add = fadd nsz double %mul, %c
++ ret double %add
++}
++
++;; Check that fnmsub.d is not emitted.
++define double @not_fnmsub_d(double %a, double %b, double %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
+ ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %nega = fneg double %a
+@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind {
+ ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+ ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-OFF-NEXT: ret
++ %negc = fneg contract double %c
++ %mul = fmul contract double %a, %b
++ %add = fadd contract double %mul, %negc
++ %neg = fneg contract double %add
++ ret double %neg
++}
++
++define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-OFF-NEXT: ret
++ %nega = fneg contract nsz double %a
++ %mul = fmul contract nsz double %nega, %b
++ %add = fadd contract nsz double %mul, %c
++ ret double %add
++}
++
++;; Check that fnmsub.d is not emitted.
++define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %nega = fneg contract double %a
+ %mul = fmul contract double %nega, %b
+@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind {
+ ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %fma = call double @llvm.fma.f64(double %a, double %b, double %c)
+- %neg = fneg double %fma
+- ret double %neg
++ %negfma = fneg double %fma
++ ret double %negfma
+ }
+
+ define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind {
+@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind {
+ ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+ ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-OFF-NEXT: ret
++ %negc = fneg double %c
++ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc)
++ %negfma = fneg double %fma
++ ret double %negfma
++}
++
++define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %nega = fneg double %a
+- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c)
++ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c)
+ ret double %fma
+ }
+
+-define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind {
+-; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics:
++;; Check that fnmsub.d is not emitted.
++define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics:
+ ; LA32-CONTRACT-FAST: # %bb.0:
+-; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA32-CONTRACT-FAST-NEXT: ret
+ ;
+-; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics:
++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics:
+ ; LA32-CONTRACT-ON: # %bb.0:
+-; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA32-CONTRACT-ON-NEXT: ret
+ ;
+-; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics:
++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics:
+ ; LA32-CONTRACT-OFF: # %bb.0:
+-; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA32-CONTRACT-OFF-NEXT: ret
+ ;
+-; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics:
++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics:
+ ; LA64-CONTRACT-FAST: # %bb.0:
+-; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-FAST-NEXT: ret
+ ;
+-; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics:
++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics:
+ ; LA64-CONTRACT-ON: # %bb.0:
+-; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-ON-NEXT: ret
+ ;
+-; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics:
++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+-; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+- %negb = fneg double %b
+- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c)
++ %nega = fneg double %a
++ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c)
+ ret double %fma
+ }
+
+@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
+ ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %mul = fmul contract double %a, %b
+- %sub = fsub contract double %c, %mul
+- ret double %sub
++ %negc = fneg contract double %c
++ %add = fadd contract double %negc, %mul
++ %negadd = fneg contract double %add
++ ret double %negadd
+ }
+diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll
+index 54dc56784006..c236255d971a 100644
+--- a/llvm/test/CodeGen/LoongArch/float-fma.ll
++++ b/llvm/test/CodeGen/LoongArch/float-fma.ll
+@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
+ ; LA32-CONTRACT-ON-LABEL: fnmsub_s:
+ ; LA32-CONTRACT-ON: # %bb.0:
+ ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
+-; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2
++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
+ ; LA32-CONTRACT-ON-NEXT: ret
+ ;
+ ; LA32-CONTRACT-OFF-LABEL: fnmsub_s:
+ ; LA32-CONTRACT-OFF: # %bb.0:
+ ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
+-; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2
++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
+ ; LA32-CONTRACT-OFF-NEXT: ret
+ ;
+ ; LA64-CONTRACT-FAST-LABEL: fnmsub_s:
+@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
+ ; LA64-CONTRACT-ON-LABEL: fnmsub_s:
+ ; LA64-CONTRACT-ON: # %bb.0:
+ ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
+-; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2
++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
+ ; LA64-CONTRACT-ON-NEXT: ret
+ ;
+ ; LA64-CONTRACT-OFF-LABEL: fnmsub_s:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+ ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2
++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-OFF-NEXT: ret
++ %negc = fneg float %c
++ %mul = fmul float %a, %b
++ %add = fadd float %mul, %negc
++ %neg = fneg float %add
++ ret float %neg
++}
++
++define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA64-CONTRACT-OFF-NEXT: ret
++ %nega = fneg nsz float %a
++ %mul = fmul nsz float %nega, %b
++ %add = fadd nsz float %mul, %c
++ ret float %add
++}
++
++;; Check that fnmsub.s is not emitted.
++define float @not_fnmsub_s(float %a, float %b, float %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
+ ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %nega = fneg float %a
+@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind {
+ ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+ ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-OFF-NEXT: ret
++ %negc = fneg contract float %c
++ %mul = fmul contract float %a, %b
++ %add = fadd contract float %mul, %negc
++ %neg = fneg contract float %add
++ ret float %neg
++}
++
++define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-OFF-NEXT: ret
++ %nega = fneg contract nsz float %a
++ %mul = fmul contract nsz float %nega, %b
++ %add = fadd contract nsz float %mul, %c
++ ret float %add
++}
++
++;; Check that fnmsub.s is not emitted.
++define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %nega = fneg contract float %a
+ %mul = fmul contract float %nega, %b
+@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind {
+ ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %fma = call float @llvm.fma.f64(float %a, float %b, float %c)
+- %neg = fneg float %fma
+- ret float %neg
++ %negfma = fneg float %fma
++ ret float %negfma
+ }
+
+ define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind {
+@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind {
+ ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+ ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-OFF-NEXT: ret
++ %negc = fneg float %c
++ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc)
++ %negfma = fneg float %fma
++ ret float %negfma
++}
++
++define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics:
++; LA32-CONTRACT-FAST: # %bb.0:
++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-FAST-NEXT: ret
++;
++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics:
++; LA32-CONTRACT-ON: # %bb.0:
++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-ON-NEXT: ret
++;
++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics:
++; LA32-CONTRACT-OFF: # %bb.0:
++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA32-CONTRACT-OFF-NEXT: ret
++;
++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics:
++; LA64-CONTRACT-FAST: # %bb.0:
++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-FAST-NEXT: ret
++;
++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics:
++; LA64-CONTRACT-ON: # %bb.0:
++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
++; LA64-CONTRACT-ON-NEXT: ret
++;
++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics:
++; LA64-CONTRACT-OFF: # %bb.0:
++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %nega = fneg float %a
+- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c)
++ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c)
+ ret float %fma
+ }
+
+-define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind {
+-; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics:
++;; Check that fnmsub.s is not emitted.
++define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind {
++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics:
+ ; LA32-CONTRACT-FAST: # %bb.0:
+-; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA32-CONTRACT-FAST-NEXT: ret
+ ;
+-; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics:
++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics:
+ ; LA32-CONTRACT-ON: # %bb.0:
+-; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA32-CONTRACT-ON-NEXT: ret
+ ;
+-; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics:
++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics:
+ ; LA32-CONTRACT-OFF: # %bb.0:
+-; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA32-CONTRACT-OFF-NEXT: ret
+ ;
+-; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics:
++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics:
+ ; LA64-CONTRACT-FAST: # %bb.0:
+-; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-FAST-NEXT: ret
+ ;
+-; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics:
++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics:
+ ; LA64-CONTRACT-ON: # %bb.0:
+-; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-ON-NEXT: ret
+ ;
+-; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics:
++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics:
+ ; LA64-CONTRACT-OFF: # %bb.0:
+-; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+- %negb = fneg float %b
+- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c)
++ %nega = fneg float %a
++ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c)
+ ret float %fma
+ }
+
+@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
+ ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
+ ; LA64-CONTRACT-OFF-NEXT: ret
+ %mul = fmul contract float %a, %b
+- %sub = fsub contract float %c, %mul
+- ret float %sub
++ %negc = fneg contract float %c
++ %add = fadd contract float %negc, %mul
++ %negadd = fneg contract float %add
++ ret float %negadd
+ }
+--
+2.20.1
+
+
+From 7a3bd125d9c1d0265b265ce238a88d0d4550e5a0 Mon Sep 17 00:00:00 2001
+From: Weining Lu <luweining@loongson.cn>
+Date: Wed, 3 Jan 2024 13:59:12 +0800
+Subject: [PATCH 13/14] [LoongArch] Fix the procossor series mask
+
+Refer PRID_SERIES_MASK definition in linux kernel:
+arch/loongarch/include/asm/cpu.h.
+
+(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0)
+---
+ llvm/lib/TargetParser/Host.cpp | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
+index 81309280a44b..d11dc605e188 100644
+--- a/llvm/lib/TargetParser/Host.cpp
++++ b/llvm/lib/TargetParser/Host.cpp
+@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() {
+ // Use processor id to detect cpu name.
+ uint32_t processor_id;
+ __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
+- switch (processor_id & 0xff00) {
++ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
++ switch (processor_id & 0xf000) {
+ case 0xc000: // Loongson 64bit, 4-issue
+ return "la464";
+ // TODO: Others.
+--
+2.20.1
+
+
+From 3634ac4cbc475509c46521f5b8a3fcbeca6d06c7 Mon Sep 17 00:00:00 2001
+From: wanglei <wanglei@loongson.cn>
+Date: Mon, 11 Mar 2024 08:59:17 +0800
+Subject: [PATCH 14/14] [LoongArch] Make sure that the LoongArchISD::BSTRINS
+ node uses the correct `MSB` value (#84454)
+
+The `MSB` must not be greater than `GRLen`. Without this patch, newly
+added test cases will crash with LoongArch32, resulting in a 'cannot
+select' error.
+
+(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b)
+(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626)
+---
+ llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++-
+ llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+index ed106cb766bc..5affaf37ad5a 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+@@ -2310,7 +2310,9 @@ Retry:
+ return DAG.getNode(
+ LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
+- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
++ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
++ : (MaskIdx0 + MaskLen0 - 1),
++ DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll
+index dfbe000841cd..e008caacad2a 100644
+--- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll
++++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll
+@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind {
+ ret i32 %or
+ }
+
++;; The high bits of `const` are zero.
++define i32 @pat5_high_zeros(i32 %a) nounwind {
++; CHECK-LABEL: pat5_high_zeros:
++; CHECK: # %bb.0:
++; CHECK-NEXT: lu12i.w $a1, 1
++; CHECK-NEXT: ori $a1, $a1, 564
++; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16
++; CHECK-NEXT: ret
++ %and = and i32 %a, 65535 ; 0x0000ffff
++ %or = or i32 %and, 305397760 ; 0x12340000
++ ret i32 %or
++}
++
+ ;; Pattern 6: a = b | ((c & mask) << shamt)
+ ;; In this testcase b is 0x10000002, but in fact we do not require b being a
+ ;; constant. As long as all positions in b to be overwritten by the incoming
+--
+2.20.1
+