From fc3df9d7d0ac60faa3bd55068b68a1711f467f3f Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Sun, 18 Aug 2024 17:52:37 +0000 Subject: automatic import of llvm --- ...ve-the-support-for-compiler-rt-and-bugfix.patch | 2474 ++++++++++++++++++++ 1 file changed, 2474 insertions(+) create mode 100644 0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch (limited to '0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch') diff --git a/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch b/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch new file mode 100644 index 0000000..e40be81 --- /dev/null +++ b/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch @@ -0,0 +1,2474 @@ +From 0bce68310dc0ff6a09ec2cf5c3ae32400c631324 Mon Sep 17 00:00:00 2001 +From: zhanglimin +Date: Tue, 12 Sep 2023 09:51:16 +0800 +Subject: [PATCH 01/14] [sanitizer][msan] VarArgHelper for loongarch64 + +This patch adds support for variadic argument for loongarch64, +which is based on MIPS64. And `check-msan` all pass. + +Reviewed By: vitalybuka + +Differential Revision: https://reviews.llvm.org/D158587 + +(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3) +--- + .../Instrumentation/MemorySanitizer.cpp | 7 ++ + .../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++ + 2 files changed, 85 insertions(+) + create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll + +diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index 83d90049abc3..362fd6e4151f 100644 +--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper { + }; + + /// MIPS64-specific implementation of VarArgHelper. ++/// NOTE: This is also used for LoongArch64. + struct VarArgMIPS64Helper : public VarArgHelper { + Function &F; + MemorySanitizer &MS; +@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper { + } + }; + ++// Loongarch64 is not a MIPS, but the current vargs calling convention matches ++// the MIPS. ++using VarArgLoongArch64Helper = VarArgMIPS64Helper; ++ + /// A no-op implementation of VarArgHelper. + struct VarArgNoOpHelper : public VarArgHelper { + VarArgNoOpHelper(Function &F, MemorySanitizer &MS, +@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.isLoongArch64()) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +new file mode 100644 +index 000000000000..8a4ab59588ad +--- /dev/null ++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +@@ -0,0 +1,78 @@ ++; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64-unknown-linux-gnu" ++ ++;; First, check allocation of the save area. ++declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 ++declare void @llvm.va_start(ptr) #2 ++declare void @llvm.va_end(ptr) #2 ++declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 ++define i32 @foo(i32 %guard, ...) { ++; CHECK-LABEL: @foo ++; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls ++; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] ++; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] ++; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) ++; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) ++; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) ++; ++ %vl = alloca ptr, align 8 ++ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) ++ call void @llvm.va_start(ptr %vl) ++ call void @llvm.va_end(ptr %vl) ++ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) ++ ret i32 0 ++} ++ ++;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ++;; array. ++define i32 @bar() { ++; CHECK-LABEL: @bar ++; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 ++; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Check multiple fixed arguments. ++declare i32 @foo2(i32 %g1, i32 %g2, ...) ++define i32 @bar2() { ++; CHECK-LABEL: @bar2 ++; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ++;; passed to a variadic function. ++declare i64 @sum(i64 %n, ...) ++define dso_local i64 @many_args() { ++;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. ++; CHECK-LABEL: @many_args ++; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) ++; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) ++; ++entry: ++ %ret = call i64 (i64, ...) @sum(i64 120, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 ++ ) ++ ret i64 %ret ++} +-- +2.20.1 + + +From f1265a12fa947b79967552ab520f904486c76353 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:26:18 +0800 +Subject: [PATCH 02/14] [LowerTypeTests] Add loongarch64 to CFI jumptables + (#67312) + +This patch implements jump tables for loongarch64. + +(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18) +--- + llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++- + llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++ + llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++ + 3 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +index 9b4b3efd7283..a89d57d12615 100644 +--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp ++++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4; + static const unsigned kARMBTIJumpTableEntrySize = 8; + static const unsigned kARMv6MJumpTableEntrySize = 16; + static const unsigned kRISCVJumpTableEntrySize = 8; ++static const unsigned kLOONGARCH64JumpTableEntrySize = 8; + + unsigned LowerTypeTestsModule::getJumpTableEntrySize() { + switch (JumpTableArch) { +@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { + case Triple::riscv32: + case Triple::riscv64: + return kRISCVJumpTableEntrySize; ++ case Triple::loongarch64: ++ return kLOONGARCH64JumpTableEntrySize; + default: + report_fatal_error("Unsupported architecture for jump tables"); + } +@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry( + } else if (JumpTableArch == Triple::riscv32 || + JumpTableArch == Triple::riscv64) { + AsmOS << "tail $" << ArgIndex << "@plt\n"; ++ } else if (JumpTableArch == Triple::loongarch64) { ++ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n" ++ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n"; + } else { + report_fatal_error("Unsupported architecture for jump tables"); + } +@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions( + ArrayRef TypeIds, ArrayRef Functions) { + if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || + Arch == Triple::thumb || Arch == Triple::aarch64 || +- Arch == Triple::riscv32 || Arch == Triple::riscv64) ++ Arch == Triple::riscv32 || Arch == Triple::riscv64 || ++ Arch == Triple::loongarch64) + buildBitSetsFromFunctionsNative(TypeIds, Functions); + else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) + buildBitSetsFromFunctionsWASM(TypeIds, Functions); +diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll +index ff69abacc8e9..c765937f1991 100644 +--- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll ++++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll +@@ -4,6 +4,7 @@ + ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s ++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s + + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" +@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) { + ; X86: define private void @[[JT]]() #{{.*}} align 8 { + ; ARM: define private void @[[JT]]() #{{.*}} align 4 { + ; RISCV: define private void @[[JT]]() #{{.*}} align 8 { ++; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 { + + ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" { + ; CHECK-NEXT: entry: +diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll +index 968c9d434eb2..802b88d92977 100644 +--- a/llvm/test/Transforms/LowerTypeTests/function.ll ++++ b/llvm/test/Transforms/LowerTypeTests/function.ll +@@ -5,6 +5,7 @@ + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s + ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s ++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s + + ; The right format for Arm jump tables depends on the selected + ; subtarget, so we can't get these tests right without the Arm target +@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64" + ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) + ; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1) + ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) ++; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) + + ; NATIVE: define hidden void @f.cfi() + ; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]] +@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) { + ; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { + ; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 { + ; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { ++; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 { + + ; X86: jmp ${0:c}@plt + ; X86-SAME: int3 +@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) { + ; RISCV: tail $0@plt + ; RISCV-SAME: tail $1@plt + ++; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0) ++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0) ++; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1) ++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1) ++ + ; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi) + + ; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind } +@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) { + ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" } + ; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" } + ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" } ++; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind } + + ; WASM32: ![[I0]] = !{i64 1} + ; WASM32: ![[I1]] = !{i64 2} +-- +2.20.1 + + +From 6f3143e1ad0bb759b7519af81994ed3c71dcf52b Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 20 Oct 2023 10:44:55 +0800 +Subject: [PATCH 03/14] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR + +The immediate argument should be a target constant (`timm`). + +(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef) +--- + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index b2c4bb812ba5..166379d7d592 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; + /// Intrinsics + + def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), +- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; ++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; + def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), +- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; ++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; + def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; + def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; + def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; +@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), + def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), + (ASRTGT_D GPR:$rj, GPR:$rk)>; + def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), +- (LDDIR GPR:$rj, uimm8:$imm8)>; ++ (LDDIR GPR:$rj, timm:$imm8)>; + def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), +- (LDPTE GPR:$rj, uimm8:$imm8)>; ++ (LDPTE GPR:$rj, timm:$imm8)>; + } // Predicates = [IsLA64] + + //===----------------------------------------------------------------------===// +-- +2.20.1 + + +From d90b85e94180543fd1789f9e26d7931f2329069b Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Fri, 10 Nov 2023 15:54:33 +0800 +Subject: [PATCH 04/14] [LoongArch][MC] Refine MCInstrAnalysis based on + registers used (#71276) + +MCInstrAnalysis can return properties of instructions (e.g., isCall(), +isBranch(),...) based on the informations that MCInstrDesc can get from +*InstrInfo*.td files. These infos are based on opcodes only, but JIRL +can have different properties based on different registers used. + +So this patch refines several MCInstrAnalysis methods: isTerminator, +isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch. + +This patch also allows BOLT which will be supported on LoongArch later +to get right instruction infos. + +(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1) +--- + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++ + .../unittests/Target/LoongArch/CMakeLists.txt | 1 + + .../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++ + 3 files changed, 184 insertions(+) + create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index 942e667bc261..d580c3457fec 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -104,6 +104,82 @@ public: + + return false; + } ++ ++ bool isTerminator(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isTerminator(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0; ++ } ++ } ++ ++ bool isCall(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isCall(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() != LoongArch::R0; ++ } ++ } ++ ++ bool isReturn(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isReturn(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() == LoongArch::R1; ++ } ++ } ++ ++ bool isBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } ++ ++ bool isUnconditionalBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isUnconditionalBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } ++ ++ bool isIndirectBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isIndirectBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } + }; + + } // end namespace +diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt +index fef4f8e15461..e6f8ec073721 100644 +--- a/llvm/unittests/Target/LoongArch/CMakeLists.txt ++++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt +@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS + + add_llvm_target_unittest(LoongArchTests + InstSizes.cpp ++ MCInstrAnalysisTest.cpp + ) + + set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +new file mode 100644 +index 000000000000..6a208d274a0d +--- /dev/null ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -0,0 +1,107 @@ ++//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/MC/MCInstrAnalysis.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/MC/MCInstBuilder.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/TargetSelect.h" ++ ++#include "gtest/gtest.h" ++ ++#include ++ ++using namespace llvm; ++ ++namespace { ++ ++class InstrAnalysisTest : public testing::TestWithParam { ++protected: ++ std::unique_ptr Info; ++ std::unique_ptr Analysis; ++ ++ static void SetUpTestSuite() { ++ LLVMInitializeLoongArchTargetInfo(); ++ LLVMInitializeLoongArchTarget(); ++ LLVMInitializeLoongArchTargetMC(); ++ } ++ ++ InstrAnalysisTest() { ++ std::string Error; ++ const Target *TheTarget = ++ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); ++ Info = std::unique_ptr(TheTarget->createMCInstrInfo()); ++ Analysis = std::unique_ptr( ++ TheTarget->createMCInstrAnalysis(Info.get())); ++ } ++}; ++ ++} // namespace ++ ++static MCInst beq() { ++ return MCInstBuilder(LoongArch::BEQ) ++ .addReg(LoongArch::R0) ++ .addReg(LoongArch::R1) ++ .addImm(32); ++} ++ ++static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } ++ ++static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { ++ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); ++} ++ ++TEST_P(InstrAnalysisTest, IsTerminator) { ++ EXPECT_TRUE(Analysis->isTerminator(beq())); ++ EXPECT_FALSE(Analysis->isTerminator(bl())); ++ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); ++} ++ ++TEST_P(InstrAnalysisTest, IsCall) { ++ EXPECT_FALSE(Analysis->isCall(beq())); ++ EXPECT_TRUE(Analysis->isCall(bl())); ++ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); ++} ++ ++TEST_P(InstrAnalysisTest, IsReturn) { ++ EXPECT_FALSE(Analysis->isReturn(beq())); ++ EXPECT_FALSE(Analysis->isReturn(bl())); ++ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsBranch) { ++ EXPECT_TRUE(Analysis->isBranch(beq())); ++ EXPECT_FALSE(Analysis->isBranch(bl())); ++ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); ++ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE( ++ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsIndirectBranch) { ++ EXPECT_FALSE(Analysis->isIndirectBranch(beq())); ++ EXPECT_FALSE(Analysis->isIndirectBranch(bl())); ++ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, ++ testing::Values("loongarch32", "loongarch64")); +-- +2.20.1 + + +From 4d3ba0892d66b21f6a8a72f1d787e42a64be8867 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Wed, 15 Nov 2023 11:12:30 +0800 +Subject: [PATCH 05/14] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for + instruction 'b' (#71903) + +The tests for 'b' which commented with FIXME are incorrect, the +following patch will fix it. + +(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4) +--- + .../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +index 6a208d274a0d..6e1919fc2261 100644 +--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -50,6 +50,8 @@ static MCInst beq() { + .addImm(32); + } + ++static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } ++ + static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } + + static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { +@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { + + TEST_P(InstrAnalysisTest, IsTerminator) { + EXPECT_TRUE(Analysis->isTerminator(beq())); ++ EXPECT_TRUE(Analysis->isTerminator(b())); + EXPECT_FALSE(Analysis->isTerminator(bl())); + EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); +@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) { + + TEST_P(InstrAnalysisTest, IsCall) { + EXPECT_FALSE(Analysis->isCall(beq())); ++ EXPECT_FALSE(Analysis->isCall(b())); + EXPECT_TRUE(Analysis->isCall(bl())); + EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); +@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) { + + TEST_P(InstrAnalysisTest, IsReturn) { + EXPECT_FALSE(Analysis->isReturn(beq())); ++ EXPECT_FALSE(Analysis->isReturn(b())); + EXPECT_FALSE(Analysis->isReturn(bl())); + EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); +@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) { + + TEST_P(InstrAnalysisTest, IsBranch) { + EXPECT_TRUE(Analysis->isBranch(beq())); ++ EXPECT_TRUE(Analysis->isBranch(b())); + EXPECT_FALSE(Analysis->isBranch(bl())); + EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); + } + ++TEST_P(InstrAnalysisTest, IsConditionalBranch) { ++ EXPECT_TRUE(Analysis->isConditionalBranch(beq())); ++ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is ++ // wrong. The following patch will fix it. ++ EXPECT_TRUE(Analysis->isConditionalBranch(b())); ++ EXPECT_FALSE(Analysis->isConditionalBranch(bl())); ++} ++ + TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); ++ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is ++ // wrong. The following patch will fix it. ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); +@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + + TEST_P(InstrAnalysisTest, IsIndirectBranch) { + EXPECT_FALSE(Analysis->isIndirectBranch(beq())); ++ EXPECT_FALSE(Analysis->isIndirectBranch(b())); + EXPECT_FALSE(Analysis->isIndirectBranch(bl())); + EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); +-- +2.20.1 + + +From 034d4087be71c54248fff1bf7eae66291671776a Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Thu, 16 Nov 2023 14:01:58 +0800 +Subject: [PATCH 06/14] [LoongArch] Set isBarrier to true for instruction 'b' + (#72339) + +Instr "b offs26" represent to an unconditional branch in LoongArch. Set +isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return +correctly. + +Fixes https://github.com/llvm/llvm-project/pull/71903. + +(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f) +--- + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 + + llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------ + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 166379d7d592..05ae36a9781d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -586,6 +586,7 @@ class Br_I26 op> + : FmtI26 { + let isBranch = 1; + let isTerminator = 1; ++ let isBarrier = 1; + } + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +index 6e1919fc2261..468ee79615d6 100644 +--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) { + + TEST_P(InstrAnalysisTest, IsConditionalBranch) { + EXPECT_TRUE(Analysis->isConditionalBranch(beq())); +- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is +- // wrong. The following patch will fix it. +- EXPECT_TRUE(Analysis->isConditionalBranch(b())); ++ EXPECT_FALSE(Analysis->isConditionalBranch(b())); + EXPECT_FALSE(Analysis->isConditionalBranch(bl())); + } + + TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); +- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is +- // wrong. The following patch will fix it. +- EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); ++ EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); +-- +2.20.1 + + +From 701109dc419b8d07cd5254268d848dee1278b9ad Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Tue, 21 Nov 2023 08:34:52 +0800 +Subject: [PATCH 07/14] [LoongArch][MC] Pre-commit tests for instr bl fixupkind + testing (#72826) + +This patch is used to test whether fixupkind for bl can be returned +correctly. When BL has target-flags(loongarch-call), there is no error. +But without this flag, an assertion error will appear. So the test is +just tagged as "Expectedly Failed" now until the following patch fix it. + +(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a) +--- + .../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++ + 1 file changed, 66 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir + +diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +new file mode 100644 +index 000000000000..2c1d41be7711 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +@@ -0,0 +1,66 @@ ++## Tagged as "Expectedly Failed" until the following patch fix it ++# XFAIL: * ++# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ ++# RUN: llvm-objdump -d - | FileCheck %s ++ ++# REQUIRES: asserts ++ ++## Check that bl can get fixupkind correctly. ++## When BL has target-flags(loongarch-call), there is no error. But without ++## this flag, an assertion error will appear: ++## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. ++ ++--- | ++ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++ target triple = "loongarch64" ++ ++ define dso_local void @test_bl_fixupkind_with_flag() { ++ ; CHECK-LABEL: test_bl_fixupkind_with_flag ++ ; CHECK: addi.d $sp, $sp, -16 ++ ; CHECK-NEXT: st.d $ra, $sp, 8 ++ ; CHECK-NEXT: bl 0 ++ ; CHECK-NEXT: ld.d $ra, $sp, 8 ++ ; CHECK-NEXT: addi.d $sp, $sp, 16 ++ ; CHECK-NEXT: ret ++ entry: ++ call void @foo() ++ ret void ++ } ++ ++ define dso_local void @test_bl_fixupkind_without_flag() { ++ ; CHECK-LABEL: test_bl_fixupkind_without_flag ++ ; CHECK: addi.d $sp, $sp, -16 ++ ; CHECK-NEXT: st.d $ra, $sp, 8 ++ ; CHECK-NEXT: bl 0 ++ ; CHECK-NEXT: ld.d $ra, $sp, 8 ++ ; CHECK-NEXT: addi.d $sp, $sp, 16 ++ ; CHECK-NEXT: ret ++ entry: ++ call void @foo() ++ ret void ++ } ++ ++ declare dso_local void @foo(...) ++... ++--- ++name: test_bl_fixupkind_with_flag ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 ++ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 ++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 ++ PseudoRET ++ ++... ++--- ++name: test_bl_fixupkind_without_flag ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 ++ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 ++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 ++ PseudoRET ++ ++... +-- +2.20.1 + + +From a5bf03107b8738b0fab521d7718bed863056134b Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Tue, 21 Nov 2023 19:00:29 +0800 +Subject: [PATCH 08/14] [LoongArch][MC] Support to get the FixupKind for BL + (#72938) + +Previously, bolt could not get FixupKind for BL correctly, because bolt +cannot get target-flags for BL. Here just add support in MCCodeEmitter. + +Fixes https://github.com/llvm/llvm-project/pull/72826. + +(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9) +--- + .../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 + + llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------ + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +index 08c0820cb862..09d92ac9aa3a 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + FixupKind = LoongArch::fixup_loongarch_b21; + break; + case LoongArch::B: ++ case LoongArch::BL: + FixupKind = LoongArch::fixup_loongarch_b26; + break; + } +diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +index 2c1d41be7711..70cd5fb8d7eb 100644 +--- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir ++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +@@ -1,14 +1,10 @@ +-## Tagged as "Expectedly Failed" until the following patch fix it +-# XFAIL: * + # RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ + # RUN: llvm-objdump -d - | FileCheck %s + + # REQUIRES: asserts + +-## Check that bl can get fixupkind correctly. +-## When BL has target-flags(loongarch-call), there is no error. But without +-## this flag, an assertion error will appear: +-## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. ++## Check that bl can get fixupkind correctly, whether BL contains ++## target-flags(loongarch-call) or not. + + --- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +-- +2.20.1 + + +From 20421e57af53d963a95c6c318f71f9399d241188 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Thu, 23 Nov 2023 16:38:41 +0800 +Subject: [PATCH 09/14] [LoongArch][MC] Modify branch evaluation for + MCInstrAnalysis (#73205) + +Function evaluateBranch() is used to compute target address for a given +branch instruction and return true on success. But target address of +indirect branch cannot be simply added, so rule it out and just return +false. + +This patch also add objdump tests which capture the current state of +support for printing branch targets. Without this patch, the result of +"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 ". It is obviously +incorrect, because this instruction represents an indirect branch whose +target address depends on both the register value and the imm. After +this patch, it will be right despite loss of details. + +(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c) +--- + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +- + .../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++ + .../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 + + 3 files changed, 80 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s + create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index d580c3457fec..a4e6a09863e6 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -97,7 +97,8 @@ public: + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); +- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { ++ if ((isBranch(Inst) && !isIndirectBranch(Inst)) || ++ Inst.getOpcode() == LoongArch::BL) { + Target = Addr + Inst.getOperand(NumOps - 1).getImm(); + return true; + } +diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s +new file mode 100644 +index 000000000000..8cb00aef9954 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s +@@ -0,0 +1,76 @@ ++# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ ++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s ++# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ ++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s ++ ++# CHECK-LABEL: : ++foo: ++# CHECK: beq $a0, $a1, 108 ++beq $a0, $a1, .Llocal ++# CHECK: bne $a0, $a1, 104 ++bne $a0, $a1, .Llocal ++# CHECK: blt $a0, $a1, 100 ++blt $a0, $a1, .Llocal ++# CHECK: bltu $a0, $a1, 96 ++bltu $a0, $a1, .Llocal ++# CHECK: bge $a0, $a1, 92 ++bge $a0, $a1, .Llocal ++# CHECK: bgeu $a0, $a1, 88 ++bgeu $a0, $a1, .Llocal ++# CHECK: beqz $a0, 84 ++beqz $a0, .Llocal ++# CHECK: bnez $a0, 80 ++bnez $a0, .Llocal ++# CHECK: bceqz $fcc6, 76 ++bceqz $fcc6, .Llocal ++# CHECK: bcnez $fcc6, 72 ++bcnez $fcc6, .Llocal ++ ++# CHECK: beq $a0, $a1, 76 ++beq $a0, $a1, bar ++# CHECK: bne $a0, $a1, 72 ++bne $a0, $a1, bar ++# CHECK: blt $a0, $a1, 68 ++blt $a0, $a1, bar ++# CHECK: bltu $a0, $a1, 64 ++bltu $a0, $a1, bar ++# CHECK: bge $a0, $a1, 60 ++bge $a0, $a1, bar ++# CHECK: bgeu $a0, $a1, 56 ++bgeu $a0, $a1, bar ++# CHECK: beqz $a0, 52 ++beqz $a0, bar ++# CHECK: bnez $a0, 48 ++bnez $a0, bar ++# CHECK: bceqz $fcc6, 44 ++bceqz $fcc6, bar ++# CHECK: bcnez $fcc6, 40 ++bcnez $fcc6, bar ++ ++# CHECK: b 28 ++b .Llocal ++# CHECK: b 32 ++b bar ++ ++# CHECK: bl 20 ++bl .Llocal ++# CHECK: bl 24 ++bl bar ++ ++# CHECK: jirl $zero, $a0, 4{{$}} ++jirl $zero, $a0, 4 ++# CHECK: jirl $ra, $a0, 4{{$}} ++jirl $ra, $a0, 4 ++# CHECK: ret ++ret ++ ++.Llocal: ++# CHECK: 6c: nop ++# CHECK: nop ++nop ++nop ++ ++# CHECK-LABEL: : ++bar: ++# CHECK: 74: nop ++nop +diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000000..cc24278acbb4 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg +@@ -0,0 +1,2 @@ ++if not "LoongArch" in config.root.targets: ++ config.unsupported = True +-- +2.20.1 + + +From 0fe85205a8637c6671f423cddd41b712085232ac Mon Sep 17 00:00:00 2001 +From: hev +Date: Thu, 23 Nov 2023 15:15:26 +0800 +Subject: [PATCH 10/14] [LoongArch] Precommit a test for smul with overflow + (NFC) (#73212) + +(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71) +--- + .../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++ + 1 file changed, 118 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll + +diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +new file mode 100644 +index 000000000000..a53e77e5aa4b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +@@ -0,0 +1,118 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { ++; LA32-LABEL: smuloi64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: .cfi_offset 22, -8 ++; LA32-NEXT: move $fp, $a4 ++; LA32-NEXT: st.w $zero, $sp, 4 ++; LA32-NEXT: addi.w $a4, $sp, 4 ++; LA32-NEXT: bl %plt(__mulodi4) ++; LA32-NEXT: st.w $a1, $fp, 4 ++; LA32-NEXT: st.w $a0, $fp, 0 ++; LA32-NEXT: ld.w $a0, $sp, 4 ++; LA32-NEXT: sltu $a0, $zero, $a0 ++; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: smuloi64: ++; LA64: # %bb.0: ++; LA64-NEXT: mul.d $a3, $a0, $a1 ++; LA64-NEXT: st.d $a3, $a2, 0 ++; LA64-NEXT: mulh.d $a0, $a0, $a1 ++; LA64-NEXT: srai.d $a1, $a3, 63 ++; LA64-NEXT: xor $a0, $a0, $a1 ++; LA64-NEXT: sltu $a0, $zero, $a0 ++; LA64-NEXT: ret ++ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) ++ %val = extractvalue {i64, i1} %t, 0 ++ %obit = extractvalue {i64, i1} %t, 1 ++ store i64 %val, ptr %res ++ ret i1 %obit ++} ++ ++define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ++; LA32-LABEL: smuloi128: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -64 ++; LA32-NEXT: .cfi_def_cfa_offset 64 ++; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: .cfi_offset 22, -8 ++; LA32-NEXT: move $fp, $a2 ++; LA32-NEXT: st.w $zero, $sp, 52 ++; LA32-NEXT: ld.w $a2, $a1, 12 ++; LA32-NEXT: st.w $a2, $sp, 12 ++; LA32-NEXT: ld.w $a2, $a1, 8 ++; LA32-NEXT: st.w $a2, $sp, 8 ++; LA32-NEXT: ld.w $a2, $a1, 4 ++; LA32-NEXT: st.w $a2, $sp, 4 ++; LA32-NEXT: ld.w $a1, $a1, 0 ++; LA32-NEXT: st.w $a1, $sp, 0 ++; LA32-NEXT: ld.w $a1, $a0, 12 ++; LA32-NEXT: st.w $a1, $sp, 28 ++; LA32-NEXT: ld.w $a1, $a0, 8 ++; LA32-NEXT: st.w $a1, $sp, 24 ++; LA32-NEXT: ld.w $a1, $a0, 4 ++; LA32-NEXT: st.w $a1, $sp, 20 ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: st.w $a0, $sp, 16 ++; LA32-NEXT: addi.w $a0, $sp, 32 ++; LA32-NEXT: addi.w $a1, $sp, 16 ++; LA32-NEXT: addi.w $a2, $sp, 0 ++; LA32-NEXT: addi.w $a3, $sp, 52 ++; LA32-NEXT: bl %plt(__muloti4) ++; LA32-NEXT: ld.w $a0, $sp, 44 ++; LA32-NEXT: st.w $a0, $fp, 12 ++; LA32-NEXT: ld.w $a0, $sp, 40 ++; LA32-NEXT: st.w $a0, $fp, 8 ++; LA32-NEXT: ld.w $a0, $sp, 36 ++; LA32-NEXT: st.w $a0, $fp, 4 ++; LA32-NEXT: ld.w $a0, $sp, 32 ++; LA32-NEXT: st.w $a0, $fp, 0 ++; LA32-NEXT: ld.w $a0, $sp, 52 ++; LA32-NEXT: sltu $a0, $zero, $a0 ++; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 64 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: smuloi128: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.d $sp, $sp, -32 ++; LA64-NEXT: .cfi_def_cfa_offset 32 ++; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ++; LA64-NEXT: .cfi_offset 1, -8 ++; LA64-NEXT: .cfi_offset 22, -16 ++; LA64-NEXT: move $fp, $a4 ++; LA64-NEXT: st.d $zero, $sp, 8 ++; LA64-NEXT: addi.d $a4, $sp, 8 ++; LA64-NEXT: bl %plt(__muloti4) ++; LA64-NEXT: st.d $a1, $fp, 8 ++; LA64-NEXT: st.d $a0, $fp, 0 ++; LA64-NEXT: ld.d $a0, $sp, 8 ++; LA64-NEXT: sltu $a0, $zero, $a0 ++; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ++; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; LA64-NEXT: addi.d $sp, $sp, 32 ++; LA64-NEXT: ret ++ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) ++ %val = extractvalue {i128, i1} %t, 0 ++ %obit = extractvalue {i128, i1} %t, 1 ++ store i128 %val, ptr %res ++ ret i1 %obit ++} ++ ++declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone ++declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone +-- +2.20.1 + + +From e29ff285726046ec46c9005c67ba992e3efc8ace Mon Sep 17 00:00:00 2001 +From: hev +Date: Thu, 23 Nov 2023 19:34:50 +0800 +Subject: [PATCH 11/14] [LoongArch] Disable mulodi4 and muloti4 libcalls + (#73199) + +This library function only exists in compiler-rt not libgcc. So this +would fail to link unless we were linking with compiler-rt. + +Fixes https://github.com/ClangBuiltLinux/linux/issues/1958 + +(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e) +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 + + .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++--- + 2 files changed, 397 insertions(+), 71 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index f7eacd56c542..ed106cb766bc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // Set libcalls. + setLibcallName(RTLIB::MUL_I128, nullptr); ++ // The MULO libcall is not part of libgcc, only compiler-rt. ++ setLibcallName(RTLIB::MULO_I64, nullptr); + } + ++ // The MULO libcall is not part of libgcc, only compiler-rt. ++ setLibcallName(RTLIB::MULO_I128, nullptr); ++ + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; +diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +index a53e77e5aa4b..6cba4108d63c 100644 +--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll ++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +@@ -5,23 +5,53 @@ + define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { + ; LA32-LABEL: smuloi64: + ; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: .cfi_offset 22, -8 +-; LA32-NEXT: move $fp, $a4 +-; LA32-NEXT: st.w $zero, $sp, 4 +-; LA32-NEXT: addi.w $a4, $sp, 4 +-; LA32-NEXT: bl %plt(__mulodi4) +-; LA32-NEXT: st.w $a1, $fp, 4 +-; LA32-NEXT: st.w $a0, $fp, 0 +-; LA32-NEXT: ld.w $a0, $sp, 4 ++; LA32-NEXT: srai.w $a5, $a1, 31 ++; LA32-NEXT: mul.w $a6, $a2, $a5 ++; LA32-NEXT: mulh.wu $a7, $a2, $a5 ++; LA32-NEXT: add.w $a7, $a7, $a6 ++; LA32-NEXT: mul.w $a5, $a3, $a5 ++; LA32-NEXT: add.w $a5, $a7, $a5 ++; LA32-NEXT: srai.w $a7, $a3, 31 ++; LA32-NEXT: mul.w $t0, $a7, $a1 ++; LA32-NEXT: mulh.wu $t1, $a7, $a0 ++; LA32-NEXT: add.w $t0, $t1, $t0 ++; LA32-NEXT: mul.w $a7, $a7, $a0 ++; LA32-NEXT: add.w $t0, $t0, $a7 ++; LA32-NEXT: add.w $a5, $t0, $a5 ++; LA32-NEXT: mulh.wu $t0, $a0, $a2 ++; LA32-NEXT: mul.w $t1, $a1, $a2 ++; LA32-NEXT: add.w $t0, $t1, $t0 ++; LA32-NEXT: sltu $t1, $t0, $t1 ++; LA32-NEXT: mulh.wu $t2, $a1, $a2 ++; LA32-NEXT: add.w $t1, $t2, $t1 ++; LA32-NEXT: mul.w $t2, $a0, $a3 ++; LA32-NEXT: add.w $t0, $t2, $t0 ++; LA32-NEXT: sltu $t2, $t0, $t2 ++; LA32-NEXT: mulh.wu $t3, $a0, $a3 ++; LA32-NEXT: add.w $t2, $t3, $t2 ++; LA32-NEXT: add.w $a6, $a7, $a6 ++; LA32-NEXT: sltu $a7, $a6, $a7 ++; LA32-NEXT: add.w $a5, $a5, $a7 ++; LA32-NEXT: mul.w $a0, $a0, $a2 ++; LA32-NEXT: mul.w $a2, $a1, $a3 ++; LA32-NEXT: mulh.wu $a1, $a1, $a3 ++; LA32-NEXT: add.w $a3, $t1, $t2 ++; LA32-NEXT: sltu $a7, $a3, $t1 ++; LA32-NEXT: add.w $a1, $a1, $a7 ++; LA32-NEXT: st.w $a0, $a4, 0 ++; LA32-NEXT: add.w $a0, $a2, $a3 ++; LA32-NEXT: sltu $a2, $a0, $a2 ++; LA32-NEXT: add.w $a1, $a1, $a2 ++; LA32-NEXT: st.w $t0, $a4, 4 ++; LA32-NEXT: add.w $a1, $a1, $a5 ++; LA32-NEXT: add.w $a2, $a0, $a6 ++; LA32-NEXT: sltu $a0, $a2, $a0 ++; LA32-NEXT: add.w $a0, $a1, $a0 ++; LA32-NEXT: srai.w $a1, $t0, 31 ++; LA32-NEXT: xor $a0, $a0, $a1 ++; LA32-NEXT: xor $a1, $a2, $a1 ++; LA32-NEXT: or $a0, $a1, $a0 + ; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: smuloi64: +@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { + define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { + ; LA32-LABEL: smuloi128: + ; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -64 +-; LA32-NEXT: .cfi_def_cfa_offset 64 +-; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +-; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: addi.w $sp, $sp, -96 ++; LA32-NEXT: .cfi_def_cfa_offset 96 ++; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill + ; LA32-NEXT: .cfi_offset 1, -4 + ; LA32-NEXT: .cfi_offset 22, -8 +-; LA32-NEXT: move $fp, $a2 +-; LA32-NEXT: st.w $zero, $sp, 52 +-; LA32-NEXT: ld.w $a2, $a1, 12 +-; LA32-NEXT: st.w $a2, $sp, 12 +-; LA32-NEXT: ld.w $a2, $a1, 8 +-; LA32-NEXT: st.w $a2, $sp, 8 +-; LA32-NEXT: ld.w $a2, $a1, 4 +-; LA32-NEXT: st.w $a2, $sp, 4 +-; LA32-NEXT: ld.w $a1, $a1, 0 +-; LA32-NEXT: st.w $a1, $sp, 0 +-; LA32-NEXT: ld.w $a1, $a0, 12 +-; LA32-NEXT: st.w $a1, $sp, 28 +-; LA32-NEXT: ld.w $a1, $a0, 8 +-; LA32-NEXT: st.w $a1, $sp, 24 +-; LA32-NEXT: ld.w $a1, $a0, 4 +-; LA32-NEXT: st.w $a1, $sp, 20 +-; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: st.w $a0, $sp, 16 +-; LA32-NEXT: addi.w $a0, $sp, 32 +-; LA32-NEXT: addi.w $a1, $sp, 16 +-; LA32-NEXT: addi.w $a2, $sp, 0 +-; LA32-NEXT: addi.w $a3, $sp, 52 +-; LA32-NEXT: bl %plt(__muloti4) +-; LA32-NEXT: ld.w $a0, $sp, 44 +-; LA32-NEXT: st.w $a0, $fp, 12 +-; LA32-NEXT: ld.w $a0, $sp, 40 +-; LA32-NEXT: st.w $a0, $fp, 8 +-; LA32-NEXT: ld.w $a0, $sp, 36 +-; LA32-NEXT: st.w $a0, $fp, 4 +-; LA32-NEXT: ld.w $a0, $sp, 32 +-; LA32-NEXT: st.w $a0, $fp, 0 +-; LA32-NEXT: ld.w $a0, $sp, 52 ++; LA32-NEXT: .cfi_offset 23, -12 ++; LA32-NEXT: .cfi_offset 24, -16 ++; LA32-NEXT: .cfi_offset 25, -20 ++; LA32-NEXT: .cfi_offset 26, -24 ++; LA32-NEXT: .cfi_offset 27, -28 ++; LA32-NEXT: .cfi_offset 28, -32 ++; LA32-NEXT: .cfi_offset 29, -36 ++; LA32-NEXT: .cfi_offset 30, -40 ++; LA32-NEXT: .cfi_offset 31, -44 ++; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ld.w $a6, $a1, 0 ++; LA32-NEXT: ld.w $a7, $a0, 0 ++; LA32-NEXT: mulh.wu $a3, $a7, $a6 ++; LA32-NEXT: ld.w $a5, $a0, 4 ++; LA32-NEXT: mul.w $a4, $a5, $a6 ++; LA32-NEXT: add.w $a3, $a4, $a3 ++; LA32-NEXT: sltu $a4, $a3, $a4 ++; LA32-NEXT: mulh.wu $t0, $a5, $a6 ++; LA32-NEXT: add.w $a4, $t0, $a4 ++; LA32-NEXT: ld.w $t0, $a1, 4 ++; LA32-NEXT: mul.w $t1, $a7, $t0 ++; LA32-NEXT: add.w $a3, $t1, $a3 ++; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill ++; LA32-NEXT: sltu $t1, $a3, $t1 ++; LA32-NEXT: mulh.wu $t2, $a7, $t0 ++; LA32-NEXT: add.w $t1, $t2, $t1 ++; LA32-NEXT: ld.w $t4, $a0, 12 ++; LA32-NEXT: ld.w $t2, $a0, 8 ++; LA32-NEXT: ld.w $t3, $a1, 8 ++; LA32-NEXT: mulh.wu $a0, $t2, $t3 ++; LA32-NEXT: mul.w $t5, $t4, $t3 ++; LA32-NEXT: add.w $a0, $t5, $a0 ++; LA32-NEXT: sltu $t5, $a0, $t5 ++; LA32-NEXT: mulh.wu $t6, $t4, $t3 ++; LA32-NEXT: add.w $t5, $t6, $t5 ++; LA32-NEXT: ld.w $t7, $a1, 12 ++; LA32-NEXT: mul.w $a1, $t2, $t7 ++; LA32-NEXT: add.w $a0, $a1, $a0 ++; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a1, $a0, $a1 ++; LA32-NEXT: mulh.wu $t6, $t2, $t7 ++; LA32-NEXT: add.w $t6, $t6, $a1 ++; LA32-NEXT: srai.w $s7, $t4, 31 ++; LA32-NEXT: mul.w $a1, $s7, $t7 ++; LA32-NEXT: mulh.wu $t8, $s7, $t3 ++; LA32-NEXT: add.w $t8, $t8, $a1 ++; LA32-NEXT: mulh.wu $fp, $a6, $s7 ++; LA32-NEXT: mul.w $s6, $t0, $s7 ++; LA32-NEXT: add.w $s8, $s6, $fp ++; LA32-NEXT: mul.w $a1, $a6, $s7 ++; LA32-NEXT: add.w $ra, $a1, $s8 ++; LA32-NEXT: sltu $s0, $ra, $a1 ++; LA32-NEXT: add.w $a0, $fp, $s0 ++; LA32-NEXT: add.w $a3, $a4, $t1 ++; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a4, $a3, $a4 ++; LA32-NEXT: mulh.wu $t1, $a5, $t0 ++; LA32-NEXT: add.w $a3, $t1, $a4 ++; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill ++; LA32-NEXT: srai.w $s4, $t7, 31 ++; LA32-NEXT: mul.w $fp, $a7, $s4 ++; LA32-NEXT: mulh.wu $a4, $a7, $s4 ++; LA32-NEXT: add.w $s1, $a4, $fp ++; LA32-NEXT: sltu $s0, $s1, $fp ++; LA32-NEXT: add.w $s5, $a4, $s0 ++; LA32-NEXT: mul.w $a4, $s7, $t3 ++; LA32-NEXT: add.w $t8, $t8, $a4 ++; LA32-NEXT: add.w $s0, $ra, $t8 ++; LA32-NEXT: add.w $a3, $a1, $a4 ++; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a4, $a3, $a1 ++; LA32-NEXT: add.w $a3, $s0, $a4 ++; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill ++; LA32-NEXT: add.w $s3, $t5, $t6 ++; LA32-NEXT: sltu $a4, $s3, $t5 ++; LA32-NEXT: mulh.wu $t5, $t4, $t7 ++; LA32-NEXT: add.w $a3, $t5, $a4 ++; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill ++; LA32-NEXT: mul.w $a4, $a7, $a6 ++; LA32-NEXT: st.w $a4, $a2, 0 ++; LA32-NEXT: sltu $a4, $s8, $s6 ++; LA32-NEXT: mulh.wu $t5, $t0, $s7 ++; LA32-NEXT: add.w $a4, $t5, $a4 ++; LA32-NEXT: add.w $t1, $a4, $a0 ++; LA32-NEXT: sltu $a4, $t1, $a4 ++; LA32-NEXT: add.w $s2, $t5, $a4 ++; LA32-NEXT: mulh.wu $a4, $a7, $t3 ++; LA32-NEXT: mul.w $t5, $a5, $t3 ++; LA32-NEXT: add.w $a4, $t5, $a4 ++; LA32-NEXT: sltu $t5, $a4, $t5 ++; LA32-NEXT: mulh.wu $t6, $a5, $t3 ++; LA32-NEXT: add.w $a3, $t6, $t5 ++; LA32-NEXT: mul.w $t6, $a7, $t7 ++; LA32-NEXT: add.w $t5, $t6, $a4 ++; LA32-NEXT: sltu $a4, $t5, $t6 ++; LA32-NEXT: mulh.wu $t6, $a7, $t7 ++; LA32-NEXT: add.w $a4, $t6, $a4 ++; LA32-NEXT: mulh.wu $t6, $t2, $a6 ++; LA32-NEXT: mul.w $s7, $t4, $a6 ++; LA32-NEXT: add.w $t6, $s7, $t6 ++; LA32-NEXT: sltu $s7, $t6, $s7 ++; LA32-NEXT: mulh.wu $s8, $t4, $a6 ++; LA32-NEXT: add.w $a0, $s8, $s7 ++; LA32-NEXT: mul.w $s7, $t2, $t0 ++; LA32-NEXT: add.w $t6, $s7, $t6 ++; LA32-NEXT: sltu $s7, $t6, $s7 ++; LA32-NEXT: mulh.wu $s8, $t2, $t0 ++; LA32-NEXT: add.w $a2, $s8, $s7 ++; LA32-NEXT: mul.w $s8, $a5, $s4 ++; LA32-NEXT: add.w $s7, $s1, $s8 ++; LA32-NEXT: add.w $s1, $s7, $ra ++; LA32-NEXT: add.w $a1, $fp, $a1 ++; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill ++; LA32-NEXT: sltu $ra, $a1, $fp ++; LA32-NEXT: add.w $a1, $s1, $ra ++; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill ++; LA32-NEXT: xor $s0, $a1, $s7 ++; LA32-NEXT: sltui $s0, $s0, 1 ++; LA32-NEXT: sltu $a1, $a1, $s7 ++; LA32-NEXT: masknez $s1, $a1, $s0 ++; LA32-NEXT: maskeqz $s0, $ra, $s0 ++; LA32-NEXT: add.w $t1, $s6, $t1 ++; LA32-NEXT: sltu $s6, $t1, $s6 ++; LA32-NEXT: add.w $s2, $s2, $s6 ++; LA32-NEXT: add.w $a2, $a0, $a2 ++; LA32-NEXT: sltu $a0, $a2, $a0 ++; LA32-NEXT: mulh.wu $s6, $t4, $t0 ++; LA32-NEXT: add.w $t8, $s6, $a0 ++; LA32-NEXT: add.w $a4, $a3, $a4 ++; LA32-NEXT: sltu $a3, $a4, $a3 ++; LA32-NEXT: mulh.wu $s6, $a5, $t7 ++; LA32-NEXT: add.w $a3, $s6, $a3 ++; LA32-NEXT: mul.w $s6, $t4, $t7 ++; LA32-NEXT: mul.w $t7, $a5, $t7 ++; LA32-NEXT: mul.w $ra, $t4, $t0 ++; LA32-NEXT: mul.w $t0, $a5, $t0 ++; LA32-NEXT: mul.w $t4, $t4, $s4 ++; LA32-NEXT: mul.w $a7, $a7, $t3 ++; LA32-NEXT: mul.w $a6, $t2, $a6 ++; LA32-NEXT: mul.w $t3, $t2, $t3 ++; LA32-NEXT: mul.w $a0, $t2, $s4 ++; LA32-NEXT: mulh.wu $t2, $t2, $s4 ++; LA32-NEXT: mulh.wu $a5, $s4, $a5 ++; LA32-NEXT: sltu $s4, $s7, $s8 ++; LA32-NEXT: add.w $s4, $a5, $s4 ++; LA32-NEXT: add.w $s4, $s5, $s4 ++; LA32-NEXT: sltu $s5, $s4, $s5 ++; LA32-NEXT: add.w $s5, $a5, $s5 ++; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a1, $t0, $a1 ++; LA32-NEXT: sltu $a5, $a1, $t0 ++; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload ++; LA32-NEXT: add.w $t0, $t0, $a5 ++; LA32-NEXT: or $s0, $s0, $s1 ++; LA32-NEXT: add.w $a4, $t7, $a4 ++; LA32-NEXT: sltu $a5, $a4, $t7 ++; LA32-NEXT: add.w $t7, $a3, $a5 ++; LA32-NEXT: add.w $s1, $ra, $a2 ++; LA32-NEXT: sltu $a2, $s1, $ra ++; LA32-NEXT: add.w $t8, $t8, $a2 ++; LA32-NEXT: add.w $a5, $s6, $s3 ++; LA32-NEXT: sltu $a2, $a5, $s6 ++; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a2, $a3, $a2 ++; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload ++; LA32-NEXT: st.w $a3, $s6, 4 ++; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a3, $s2, $a3 ++; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload ++; LA32-NEXT: add.w $s2, $t1, $s2 ++; LA32-NEXT: sltu $t1, $s2, $t1 ++; LA32-NEXT: add.w $a3, $a3, $t1 ++; LA32-NEXT: add.w $t1, $s8, $s4 ++; LA32-NEXT: sltu $s3, $t1, $s8 ++; LA32-NEXT: add.w $s3, $s5, $s3 ++; LA32-NEXT: add.w $t2, $t2, $a0 ++; LA32-NEXT: add.w $t2, $t2, $t4 ++; LA32-NEXT: add.w $t2, $t2, $s7 ++; LA32-NEXT: add.w $t4, $a0, $fp ++; LA32-NEXT: sltu $a0, $t4, $a0 ++; LA32-NEXT: add.w $a0, $t2, $a0 ++; LA32-NEXT: add.w $a0, $s3, $a0 ++; LA32-NEXT: add.w $t2, $t1, $t4 ++; LA32-NEXT: sltu $t1, $t2, $t1 ++; LA32-NEXT: add.w $a0, $a0, $t1 ++; LA32-NEXT: add.w $a0, $a0, $a3 ++; LA32-NEXT: add.w $t1, $t2, $s2 ++; LA32-NEXT: sltu $a3, $t1, $t2 ++; LA32-NEXT: add.w $a0, $a0, $a3 ++; LA32-NEXT: add.w $a3, $t6, $t0 ++; LA32-NEXT: add.w $a1, $a6, $a1 ++; LA32-NEXT: sltu $a6, $a1, $a6 ++; LA32-NEXT: add.w $t0, $a3, $a6 ++; LA32-NEXT: add.w $a1, $a7, $a1 ++; LA32-NEXT: sltu $a7, $a1, $a7 ++; LA32-NEXT: add.w $a3, $t5, $t0 ++; LA32-NEXT: add.w $a3, $a3, $a7 ++; LA32-NEXT: sltu $t2, $a3, $t5 ++; LA32-NEXT: xor $t4, $a3, $t5 ++; LA32-NEXT: sltui $t4, $t4, 1 ++; LA32-NEXT: masknez $t2, $t2, $t4 ++; LA32-NEXT: maskeqz $a7, $a7, $t4 ++; LA32-NEXT: st.w $a1, $s6, 8 ++; LA32-NEXT: or $a1, $a7, $t2 ++; LA32-NEXT: sltu $a7, $t0, $t6 ++; LA32-NEXT: xor $t0, $t0, $t6 ++; LA32-NEXT: sltui $t0, $t0, 1 ++; LA32-NEXT: masknez $a7, $a7, $t0 ++; LA32-NEXT: maskeqz $a6, $a6, $t0 ++; LA32-NEXT: or $a6, $a6, $a7 ++; LA32-NEXT: add.w $a6, $s1, $a6 ++; LA32-NEXT: sltu $a7, $a6, $s1 ++; LA32-NEXT: add.w $a7, $t8, $a7 ++; LA32-NEXT: add.w $a1, $a4, $a1 ++; LA32-NEXT: sltu $a4, $a1, $a4 ++; LA32-NEXT: add.w $a4, $t7, $a4 ++; LA32-NEXT: add.w $t0, $t1, $s0 ++; LA32-NEXT: sltu $t1, $t0, $t1 ++; LA32-NEXT: add.w $a0, $a0, $t1 ++; LA32-NEXT: st.w $a3, $s6, 12 ++; LA32-NEXT: add.w $a1, $a6, $a1 ++; LA32-NEXT: sltu $a6, $a1, $a6 ++; LA32-NEXT: add.w $a4, $a7, $a4 ++; LA32-NEXT: add.w $a4, $a4, $a6 ++; LA32-NEXT: sltu $t1, $a4, $a7 ++; LA32-NEXT: xor $a7, $a4, $a7 ++; LA32-NEXT: sltui $a7, $a7, 1 ++; LA32-NEXT: masknez $t1, $t1, $a7 ++; LA32-NEXT: maskeqz $a6, $a6, $a7 ++; LA32-NEXT: or $a6, $a6, $t1 ++; LA32-NEXT: add.w $a6, $a5, $a6 ++; LA32-NEXT: sltu $a5, $a6, $a5 ++; LA32-NEXT: add.w $a2, $a2, $a5 ++; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a4, $t1, $a4 ++; LA32-NEXT: add.w $a1, $t3, $a1 ++; LA32-NEXT: sltu $a5, $a1, $t3 ++; LA32-NEXT: add.w $a4, $a4, $a5 ++; LA32-NEXT: sltu $a7, $a4, $t1 ++; LA32-NEXT: xor $t1, $a4, $t1 ++; LA32-NEXT: sltui $t1, $t1, 1 ++; LA32-NEXT: masknez $a7, $a7, $t1 ++; LA32-NEXT: maskeqz $a5, $a5, $t1 ++; LA32-NEXT: or $a5, $a5, $a7 ++; LA32-NEXT: add.w $a5, $a6, $a5 ++; LA32-NEXT: sltu $a6, $a5, $a6 ++; LA32-NEXT: add.w $a2, $a2, $a6 ++; LA32-NEXT: add.w $a0, $a2, $a0 ++; LA32-NEXT: add.w $a2, $a5, $t0 ++; LA32-NEXT: sltu $a5, $a2, $a5 ++; LA32-NEXT: add.w $a0, $a0, $a5 ++; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a5, $a1, $a5 ++; LA32-NEXT: sltu $a1, $a5, $a1 ++; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a6, $a4, $a6 ++; LA32-NEXT: add.w $a6, $a6, $a1 ++; LA32-NEXT: sltu $a7, $a6, $a4 ++; LA32-NEXT: xor $a4, $a6, $a4 ++; LA32-NEXT: sltui $a4, $a4, 1 ++; LA32-NEXT: masknez $a7, $a7, $a4 ++; LA32-NEXT: maskeqz $a1, $a1, $a4 ++; LA32-NEXT: or $a1, $a1, $a7 ++; LA32-NEXT: add.w $a1, $a2, $a1 ++; LA32-NEXT: sltu $a2, $a1, $a2 ++; LA32-NEXT: add.w $a0, $a0, $a2 ++; LA32-NEXT: srai.w $a2, $a3, 31 ++; LA32-NEXT: xor $a3, $a6, $a2 ++; LA32-NEXT: xor $a0, $a0, $a2 ++; LA32-NEXT: or $a0, $a3, $a0 ++; LA32-NEXT: xor $a3, $a5, $a2 ++; LA32-NEXT: xor $a1, $a1, $a2 ++; LA32-NEXT: or $a1, $a3, $a1 ++; LA32-NEXT: or $a0, $a1, $a0 + ; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +-; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 64 ++; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 96 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: smuloi128: + ; LA64: # %bb.0: +-; LA64-NEXT: addi.d $sp, $sp, -32 +-; LA64-NEXT: .cfi_def_cfa_offset 32 +-; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +-; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +-; LA64-NEXT: .cfi_offset 1, -8 +-; LA64-NEXT: .cfi_offset 22, -16 +-; LA64-NEXT: move $fp, $a4 +-; LA64-NEXT: st.d $zero, $sp, 8 +-; LA64-NEXT: addi.d $a4, $sp, 8 +-; LA64-NEXT: bl %plt(__muloti4) +-; LA64-NEXT: st.d $a1, $fp, 8 +-; LA64-NEXT: st.d $a0, $fp, 0 +-; LA64-NEXT: ld.d $a0, $sp, 8 ++; LA64-NEXT: srai.d $a5, $a1, 63 ++; LA64-NEXT: mul.d $a6, $a2, $a5 ++; LA64-NEXT: mulh.du $a7, $a2, $a5 ++; LA64-NEXT: add.d $a7, $a7, $a6 ++; LA64-NEXT: mul.d $a5, $a3, $a5 ++; LA64-NEXT: add.d $a5, $a7, $a5 ++; LA64-NEXT: srai.d $a7, $a3, 63 ++; LA64-NEXT: mul.d $t0, $a7, $a1 ++; LA64-NEXT: mulh.du $t1, $a7, $a0 ++; LA64-NEXT: add.d $t0, $t1, $t0 ++; LA64-NEXT: mul.d $a7, $a7, $a0 ++; LA64-NEXT: add.d $t0, $t0, $a7 ++; LA64-NEXT: add.d $a5, $t0, $a5 ++; LA64-NEXT: mulh.du $t0, $a0, $a2 ++; LA64-NEXT: mul.d $t1, $a1, $a2 ++; LA64-NEXT: add.d $t0, $t1, $t0 ++; LA64-NEXT: sltu $t1, $t0, $t1 ++; LA64-NEXT: mulh.du $t2, $a1, $a2 ++; LA64-NEXT: add.d $t1, $t2, $t1 ++; LA64-NEXT: mul.d $t2, $a0, $a3 ++; LA64-NEXT: add.d $t0, $t2, $t0 ++; LA64-NEXT: sltu $t2, $t0, $t2 ++; LA64-NEXT: mulh.du $t3, $a0, $a3 ++; LA64-NEXT: add.d $t2, $t3, $t2 ++; LA64-NEXT: add.d $a6, $a7, $a6 ++; LA64-NEXT: sltu $a7, $a6, $a7 ++; LA64-NEXT: add.d $a5, $a5, $a7 ++; LA64-NEXT: mul.d $a0, $a0, $a2 ++; LA64-NEXT: mul.d $a2, $a1, $a3 ++; LA64-NEXT: mulh.du $a1, $a1, $a3 ++; LA64-NEXT: add.d $a3, $t1, $t2 ++; LA64-NEXT: sltu $a7, $a3, $t1 ++; LA64-NEXT: add.d $a1, $a1, $a7 ++; LA64-NEXT: st.d $a0, $a4, 0 ++; LA64-NEXT: add.d $a0, $a2, $a3 ++; LA64-NEXT: sltu $a2, $a0, $a2 ++; LA64-NEXT: add.d $a1, $a1, $a2 ++; LA64-NEXT: st.d $t0, $a4, 8 ++; LA64-NEXT: add.d $a1, $a1, $a5 ++; LA64-NEXT: add.d $a2, $a0, $a6 ++; LA64-NEXT: sltu $a0, $a2, $a0 ++; LA64-NEXT: add.d $a0, $a1, $a0 ++; LA64-NEXT: srai.d $a1, $t0, 63 ++; LA64-NEXT: xor $a0, $a0, $a1 ++; LA64-NEXT: xor $a1, $a2, $a1 ++; LA64-NEXT: or $a0, $a1, $a0 + ; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +-; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +-; LA64-NEXT: addi.d $sp, $sp, 32 + ; LA64-NEXT: ret + %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) + %val = extractvalue {i128, i1} %t, 0 +-- +2.20.1 + + +From 01ced6193e2abfbd50fbd9d40066cf27f9f9067b Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 29 Nov 2023 15:21:21 +0800 +Subject: [PATCH 12/14] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions + (#73742) + +``` +when a=c=-0.0, b=0.0: +-(a * b + (-c)) = -0.0 +-a * b + c = 0.0 +(fneg (fma a, b (-c))) != (fma (fneg a), b ,c) +``` + +See https://reviews.llvm.org/D90901 for a similar discussion on X86. + +(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9) +--- + .../LoongArch/LoongArchFloat32InstrInfo.td | 8 +- + .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +- + llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++-- + llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++-- + 4 files changed, 483 insertions(+), 49 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +index 826db54febd3..65120c083f49 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), + def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), + (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +-// fnmsub.s: -fj * fk + fa +-def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), ++// fnmsub.s: -(fj * fk - fa) ++def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), ++ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; ++ ++// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) ++def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), + (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + } // Predicates = [HasBasicF] + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +index 5118474725b6..437c1e4d7be2 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), + (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + + // fnmsub.d: -(fj * fk - fa) +-def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), ++def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), ++ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; ++ ++// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) ++def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), + (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + } // Predicates = [HasBasicD] + +diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll +index 6dd628479433..58d20c62a668 100644 +--- a/llvm/test/CodeGen/LoongArch/double-fma.ll ++++ b/llvm/test/CodeGen/LoongArch/double-fma.ll +@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA32-CONTRACT-ON-LABEL: fnmsub_d: + ; LA32-CONTRACT-ON: # %bb.0: + ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 + ; LA32-CONTRACT-ON-NEXT: ret + ; + ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: + ; LA32-CONTRACT-OFF: # %bb.0: + ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 + ; LA32-CONTRACT-OFF-NEXT: ret + ; + ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: +@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-ON-LABEL: fnmsub_d: + ; LA64-CONTRACT-ON: # %bb.0: + ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 + ; LA64-CONTRACT-ON-NEXT: ret + ; + ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg double %c ++ %mul = fmul double %a, %b ++ %add = fadd double %mul, %negc ++ %neg = fneg double %add ++ ret double %neg ++} ++ ++define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg nsz double %a ++ %mul = fmul nsz double %nega, %b ++ %add = fadd nsz double %mul, %c ++ ret double %add ++} ++ ++;; Check that fnmsub.d is not emitted. ++define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 + ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a +@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg contract double %c ++ %mul = fmul contract double %a, %b ++ %add = fadd contract double %mul, %negc ++ %neg = fneg contract double %add ++ ret double %neg ++} ++ ++define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg contract nsz double %a ++ %mul = fmul contract nsz double %nega, %b ++ %add = fadd contract nsz double %mul, %c ++ ret double %add ++} ++ ++;; Check that fnmsub.d is not emitted. ++define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract double %a + %mul = fmul contract double %nega, %b +@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %fma = call double @llvm.fma.f64(double %a, double %b, double %c) +- %neg = fneg double %fma +- ret double %neg ++ %negfma = fneg double %fma ++ ret double %negfma + } + + define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { +@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg double %c ++ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) ++ %negfma = fneg double %fma ++ ret double %negfma ++} ++ ++define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a +- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) ++ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma + } + +-define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { +-; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: ++;; Check that fnmsub.d is not emitted. ++define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-FAST: # %bb.0: +-; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-FAST-NEXT: ret + ; +-; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-ON: # %bb.0: +-; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-ON-NEXT: ret + ; +-; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-OFF: # %bb.0: +-; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-OFF-NEXT: ret + ; +-; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-FAST: # %bb.0: +-; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-FAST-NEXT: ret + ; +-; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-ON: # %bb.0: +-; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-ON-NEXT: ret + ; +-; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: +-; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret +- %negb = fneg double %b +- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) ++ %nega = fneg double %a ++ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma + } + +@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b +- %sub = fsub contract double %c, %mul +- ret double %sub ++ %negc = fneg contract double %c ++ %add = fadd contract double %negc, %mul ++ %negadd = fneg contract double %add ++ ret double %negadd + } +diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll +index 54dc56784006..c236255d971a 100644 +--- a/llvm/test/CodeGen/LoongArch/float-fma.ll ++++ b/llvm/test/CodeGen/LoongArch/float-fma.ll +@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA32-CONTRACT-ON-LABEL: fnmsub_s: + ; LA32-CONTRACT-ON: # %bb.0: + ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 + ; LA32-CONTRACT-ON-NEXT: ret + ; + ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: + ; LA32-CONTRACT-OFF: # %bb.0: + ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 + ; LA32-CONTRACT-OFF-NEXT: ret + ; + ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: +@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-ON-LABEL: fnmsub_s: + ; LA64-CONTRACT-ON: # %bb.0: + ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 + ; LA64-CONTRACT-ON-NEXT: ret + ; + ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg float %c ++ %mul = fmul float %a, %b ++ %add = fadd float %mul, %negc ++ %neg = fneg float %add ++ ret float %neg ++} ++ ++define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg nsz float %a ++ %mul = fmul nsz float %nega, %b ++ %add = fadd nsz float %mul, %c ++ ret float %add ++} ++ ++;; Check that fnmsub.s is not emitted. ++define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 + ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a +@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg contract float %c ++ %mul = fmul contract float %a, %b ++ %add = fadd contract float %mul, %negc ++ %neg = fneg contract float %add ++ ret float %neg ++} ++ ++define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg contract nsz float %a ++ %mul = fmul contract nsz float %nega, %b ++ %add = fadd contract nsz float %mul, %c ++ ret float %add ++} ++ ++;; Check that fnmsub.s is not emitted. ++define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract float %a + %mul = fmul contract float %nega, %b +@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %fma = call float @llvm.fma.f64(float %a, float %b, float %c) +- %neg = fneg float %fma +- ret float %neg ++ %negfma = fneg float %fma ++ ret float %negfma + } + + define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { +@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg float %c ++ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) ++ %negfma = fneg float %fma ++ ret float %negfma ++} ++ ++define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a +- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) ++ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma + } + +-define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { +-; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: ++;; Check that fnmsub.s is not emitted. ++define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-FAST: # %bb.0: +-; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-FAST-NEXT: ret + ; +-; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-ON: # %bb.0: +-; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-ON-NEXT: ret + ; +-; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-OFF: # %bb.0: +-; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-OFF-NEXT: ret + ; +-; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-FAST: # %bb.0: +-; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-FAST-NEXT: ret + ; +-; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-ON: # %bb.0: +-; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-ON-NEXT: ret + ; +-; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: +-; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret +- %negb = fneg float %b +- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) ++ %nega = fneg float %a ++ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma + } + +@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b +- %sub = fsub contract float %c, %mul +- ret float %sub ++ %negc = fneg contract float %c ++ %add = fadd contract float %negc, %mul ++ %negadd = fneg contract float %add ++ ret float %negadd + } +-- +2.20.1 + + +From 7a3bd125d9c1d0265b265ce238a88d0d4550e5a0 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Wed, 3 Jan 2024 13:59:12 +0800 +Subject: [PATCH 13/14] [LoongArch] Fix the procossor series mask + +Refer PRID_SERIES_MASK definition in linux kernel: +arch/loongarch/include/asm/cpu.h. + +(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0) +--- + llvm/lib/TargetParser/Host.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index 81309280a44b..d11dc605e188 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() { + // Use processor id to detect cpu name. + uint32_t processor_id; + __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); +- switch (processor_id & 0xff00) { ++ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. ++ switch (processor_id & 0xf000) { + case 0xc000: // Loongson 64bit, 4-issue + return "la464"; + // TODO: Others. +-- +2.20.1 + + +From 3634ac4cbc475509c46521f5b8a3fcbeca6d06c7 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 11 Mar 2024 08:59:17 +0800 +Subject: [PATCH 14/14] [LoongArch] Make sure that the LoongArchISD::BSTRINS + node uses the correct `MSB` value (#84454) + +The `MSB` must not be greater than `GRLen`. Without this patch, newly +added test cases will crash with LoongArch32, resulting in a 'cannot +select' error. + +(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b) +(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626) +--- + llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++- + llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index ed106cb766bc..5affaf37ad5a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -2310,7 +2310,9 @@ Retry: + return DAG.getNode( + LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), +- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), ++ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) ++ : (MaskIdx0 + MaskLen0 - 1), ++ DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + +diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +index dfbe000841cd..e008caacad2a 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { + ret i32 %or + } + ++;; The high bits of `const` are zero. ++define i32 @pat5_high_zeros(i32 %a) nounwind { ++; CHECK-LABEL: pat5_high_zeros: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $a1, 1 ++; CHECK-NEXT: ori $a1, $a1, 564 ++; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 ++; CHECK-NEXT: ret ++ %and = and i32 %a, 65535 ; 0x0000ffff ++ %or = or i32 %and, 305397760 ; 0x12340000 ++ ret i32 %or ++} ++ + ;; Pattern 6: a = b | ((c & mask) << shamt) + ;; In this testcase b is 0x10000002, but in fact we do not require b being a + ;; constant. As long as all positions in b to be overwritten by the incoming +-- +2.20.1 + -- cgit v1.2.3