diff options
Diffstat (limited to '0002-add-riscv-support-not-upstream-modified-files.patch')
-rw-r--r-- | 0002-add-riscv-support-not-upstream-modified-files.patch | 813 |
1 files changed, 813 insertions, 0 deletions
diff --git a/0002-add-riscv-support-not-upstream-modified-files.patch b/0002-add-riscv-support-not-upstream-modified-files.patch new file mode 100644 index 0000000..05e842e --- /dev/null +++ b/0002-add-riscv-support-not-upstream-modified-files.patch @@ -0,0 +1,813 @@ +From: gns <infiwang@proton.me> +From: Jingwiw <wangjingwei@iscas.ac.cn> +Subject: [PATCH] riscv64: add initial support for riscv64 + +Add RISC-V 64 (rv64g) platform support with lp64d ABI to LuaJIT (not upstreamed yet). +Part1: modified files + +diff --git a/bundle/LuaJIT-2.1-20220411/Makefile b/bundle/LuaJIT-2.1-20220411/Makefile +index 6fb8efa..c81a448 100644 +--- a/bundle/LuaJIT-2.1-20220411/Makefile ++++ b/bundle/LuaJIT-2.1-20220411/Makefile +@@ -86,6 +86,7 @@ FILE_MAN= luajit.1 + FILE_PC= luajit.pc + FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h + FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ ++ dis_riscv.lua dis_riscv64.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua vmdef.lua +diff --git a/bundle/LuaJIT-2.1-20220411/src/Makefile b/bundle/LuaJIT-2.1-20220411/src/Makefile +index 68a9a7c..e1b7464 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/Makefile ++++ b/bundle/LuaJIT-2.1-20220411/src/Makefile +@@ -53,6 +53,7 @@ CCOPT_arm= + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= ++CCOPT_riscv64= + # + #CCDEBUG= + # Uncomment the next line to generate debug information: +@@ -270,6 +271,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) + else + TARGET_LJARCH= mips + endif ++else ++ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH))) ++ TARGET_LJARCH= riscv32 ++else ++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) ++ TARGET_LJARCH= riscv64 + else + $(error Unsupported target architecture) + endif +@@ -279,6 +286,8 @@ endif + endif + endif + endif ++endif ++endif + + ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 +@@ -454,6 +463,12 @@ else + ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D MIPSR6 + endif ++ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D RISCV32 ++endif ++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D RISCV64 ++endif + ifeq (ppc,$(TARGET_LJARCH)) + ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SQRT +diff --git a/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c b/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c +index 4efda1b..def4a92 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c ++++ b/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c +@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + #include "../dynasm/dasm_mips.h" + #elif LJ_TARGET_S390X + #include "../dynasm/dasm_s390x.h" ++#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64 ++#include "../dynasm/dasm_riscv.h" + #else + #error "No support for this architecture (yet)" + #endif +diff --git a/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c b/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c +index e73f9b1..0a187d5 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c ++++ b/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c +@@ -208,6 +208,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); ++#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64 ++ if ((ins & 0x7f) == 0x6fu) { ++ fprintf(ctx->fp, "\tjal %s\n", sym); ++ } else { ++ fprintf(stderr, ++ "Error: unsupported opcode %08x for %s symbol relocation.\n", ++ ins, sym); ++ exit(1); ++ } + #else + #error "missing relocation support for this architecture" + #endif +@@ -327,6 +336,9 @@ void emit_asm(BuildCtx *ctx) + #if LJ_TARGET_MIPS + fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); + #endif ++#if LJ_TARGET_RISCV64 ++ fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); ++#endif + + for (i = rel = 0; i < ctx->nsym; i++) { + int32_t ofs = ctx->sym[i].ofs; +diff --git a/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua b/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua +index 7cb23f1..c5e22f6 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua ++++ b/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua +@@ -85,6 +85,7 @@ local map_arch = { + mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, + mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, ++ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, + } + + local map_os = { +diff --git a/bundle/LuaJIT-2.1-20220411/src/lib_jit.c b/bundle/LuaJIT-2.1-20220411/src/lib_jit.c +index 50c2b13..11748a4 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lib_jit.c ++++ b/bundle/LuaJIT-2.1-20220411/src/lib_jit.c +@@ -714,6 +714,75 @@ JIT_PARAMDEF(JIT_PARAMINIT) + #include <sys/utsname.h> + #endif + ++#if LJ_TARGET_RISCV64 ++#include <setjmp.h> ++#include <signal.h> ++static sigjmp_buf sigbuf = {0}; ++static void detect_sigill(int sig) ++{ ++ siglongjmp(sigbuf, 1); ++} ++ ++static int riscv_compressed() ++{ ++#if defined(__riscv_compressed) ++ // Don't bother checking for RVC -- would crash before getting here. ++ return 1; ++#elif defined(__GNUC__) ++ // c.nop; c.nop; ++ __asm__(".4byte 0x00010001"); ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zba() ++{ ++#if defined(__GNUC__) ++ // Don't bother verifying the result, just check if the instruction exists. ++ // add.uw zero, zero, zero ++ __asm__(".4byte 0x0800003b"); ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zbb() ++{ ++#if defined(__GNUC__) ++ register int t asm ("a0"); ++ // addi a0, zero, 255; sext.b a0, a0; ++ __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); ++ return t < 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_xthead() ++{ ++#if defined(__GNUC__) ++ register int t asm ("a0"); ++ // C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". ++ // Therefore assume XThead* are present if XTheadBb is present. ++ // addi a0, zero, 255; th.ext a0, a0, 7, 0; ++ __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); ++ return t == -1; // In case of collision with other vendor extensions. ++#else ++ return 0; ++#endif ++} ++ ++static uint32_t riscv_probe(int (*func)(void), uint32_t flag) ++{ ++ if (sigsetjmp(sigbuf, 1) == 0) { ++ return func() ? flag : 0; ++ } else return 0; ++} ++#endif ++ + /* Arch-dependent CPU feature detection. */ + static uint32_t jit_cpudetect(void) + { +@@ -786,6 +855,21 @@ static uint32_t jit_cpudetect(void) + #endif + #elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ ++#elif LJ_TARGET_RISCV64 ++#if LJ_HASJIT ++ // SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. ++ struct sigaction old = {0}, act = {0}; ++ act.sa_handler = detect_sigill; ++ sigaction(SIGILL, &act, &old); ++ flags |= riscv_probe(riscv_compressed, JIT_F_RVC); ++ flags |= riscv_probe(riscv_zba, JIT_F_RVZba); ++ flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); ++ flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); ++ sigaction(SIGILL, &old, NULL); ++ ++ // Detect V/P? ++ // V have no hardware available, P not ratified yet. ++#endif + #else + #error "Missing CPU detection for this architecture" + #endif +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_arch.h b/bundle/LuaJIT-2.1-20220411/src/lj_arch.h +index 9f7047d..3490af4 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_arch.h ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_arch.h +@@ -33,6 +33,10 @@ + #define LUAJIT_ARCH_mips64 7 + #define LUAJIT_ARCH_S390X 8 + #define LUAJIT_ARCH_s390x 8 ++#define LUAJIT_ARCH_RISCV32 9 ++#define LUAJIT_ARCH_riscv32 9 ++#define LUAJIT_ARCH_RISCV64 10 ++#define LUAJIT_ARCH_riscv64 10 + + /* Target OS. */ + #define LUAJIT_OS_OTHER 0 +@@ -69,6 +73,10 @@ + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 + #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 ++#elif defined(__riscv) && __riscv_xlen == 32 ++#define LUAJIT_TARGET LUAJIT_ARCH_RISCV32 ++#elif defined(__riscv) && __riscv_xlen == 64 ++#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 + #else + #error "No support for this architecture (yet)" + #endif +@@ -451,6 +459,32 @@ + #define LJ_TARGET_GC64 1 + #define LJ_ARCH_NOJIT 1 /* NYI */ + ++#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV32 ++#error "No support for RISC-V 32" ++ ++#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 ++#if defined(__riscv_float_abi_double) ++ ++#define LJ_ARCH_NAME "riscv64" ++#define LJ_ARCH_BITS 64 ++#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ ++#define LJ_TARGET_RISCV64 1 ++#define LJ_TARGET_GC64 1 ++#define LJ_TARGET_EHRETREG 0 // TODO ++#define LJ_TARGET_EHRAREG 1 ++#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ ++ AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ ++#define LJ_TARGET_MASKSHIFT 1 ++#define LJ_TARGET_MASKROT 1 ++#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR, no ROLI */ ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL ++// for now ++#define LUAJIT_NO_UNWIND 1 ++ ++#else ++#error "No support for RISC-V 64 Soft-float/Single-float" ++#endif ++ + #else + #error "No target architecture defined" + #endif +@@ -525,6 +559,13 @@ + /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ + #error "Only n64 ABI supported for MIPS64" + #endif ++#elif LJ_TARGET_RISCV ++#if !defined(__riscv_float_abi_double) ++#error "Only RISC-V 64 double float supported for now" ++#endif ++#if defined(__riscv_compressed) ++#error "Compressed instructions not supported for now" ++#endif + #endif + #endif + +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_asm.c b/bundle/LuaJIT-2.1-20220411/src/lj_asm.c +index d811808..4deca17 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_asm.c ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_asm.c +@@ -185,6 +185,8 @@ IRFLDEF(FLOFS) + #include "lj_emit_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_emit_mips.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_emit_riscv.h" + #else + #error "Missing instruction emitter for target CPU" + #endif +@@ -1664,6 +1666,8 @@ static void asm_loop(ASMState *as) + #include "lj_asm_mips.h" + #elif LJ_TARGET_S390X + #include "lj_asm_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_asm_riscv64.h" + #else + #error "Missing assembler for target CPU" + #endif +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c +index 8162b95..96bc3b8 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c +@@ -686,6 +686,97 @@ + if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + ++#elif LJ_TARGET_RISCV64 ++/* -- RISC-V lp64d calling conventions ------------------------------------ */ ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ /* Return structs of size > 16 by reference. */ \ ++ cc->retref = !(sz <= 16); \ ++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_STRUCTRET2 \ ++ unsigned int cl = ccall_classify_struct(cts, ctr); \ ++ if ((cl & 4) && (cl >> 8) <= 2) { \ ++ CTSize i = (cl >> 8) - 1; \ ++ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ ++ } else { \ ++ if (cl > 1) { \ ++ sp = (uint8_t *)&cc->fpr[0]; \ ++ if ((cl >> 8) > 2) \ ++ sp = (uint8_t *)&cc->gpr[0]; \ ++ } \ ++ memcpy(dp, sp, ctr->size); \ ++ } \ ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 1 or 2 FPRs. */ \ ++ cc->retref = 0; ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = cc->fpr[0].f; \ ++ ((float *)dp)[1] = cc->fpr[1].f; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0].d; \ ++ ((double *)dp)[1] = cc->fpr[1].d; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ /* Pass long double complex by reference. */ \ ++ if (sz == 2*sizeof(long double)) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } \ ++ /* Pass complex in two FPRs or on stack. */ \ ++ else if (sz == 2*sizeof(float)) { \ ++ isfp = 2; \ ++ sz = 2*CTSIZE_PTR; \ ++ } else { \ ++ isfp = 1; \ ++ sz = 2*CTSIZE_PTR; \ ++ } ++ ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ sp = (uint8_t *)&cc->fpr[0].f; ++ ++#define CCALL_HANDLE_STRUCTARG \ ++ /* Pass structs of size >16 by reference. */ \ ++ unsigned int cl = ccall_classify_struct(cts, d); \ ++ nff = cl >> 8; \ ++ if (sz > 16) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } \ ++ /* Pass struct in FPRs. */ \ ++ if (cl > 1) { \ ++ isfp = (cl & 4) ? 2 : 1; \ ++ } ++ ++ ++#define CCALL_HANDLE_REGARG \ ++ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ ++ int n2 = ctype_isvector(d->info) ? 1 : \ ++ isfp == 1 ? n : 2; \ ++ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ ++ dp = &cc->fpr[nfpr]; \ ++ nfpr += n2; \ ++ goto done; \ ++ } else { \ ++ if (ngpr + n2 <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n2; \ ++ goto done; \ ++ } \ ++ } \ ++ } else { /* Try to pass argument in GPRs. */ \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -1045,6 +1136,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, + + #endif + ++/* -- RISC-V ABI struct classification ---------------------------- */ ++ ++#if LJ_TARGET_RISCV64 ++ ++static unsigned int ccall_classify_struct(CTState *cts, CType *ct) ++{ ++ CTSize sz = ct->size; ++ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); ++ while (ct->sib) { ++ CType *sct; ++ ct = ctype_get(cts, ct->sib); ++ if (ctype_isfield(ct->info)) { ++ sct = ctype_rawchild(cts, ct); ++ if (ctype_isfp(sct->info)) { ++ r |= sct->size; ++ if (!isu) n++; else if (n == 0) n = 1; ++ } else if (ctype_iscomplex(sct->info)) { ++ r |= (sct->size >> 1); ++ if (!isu) n += 2; else if (n < 2) n = 2; ++ } else if (ctype_isstruct(sct->info)) { ++ goto substruct; ++ } else { ++ goto noth; ++ } ++ } else if (ctype_isbitfield(ct->info)) { ++ goto noth; ++ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { ++ sct = ctype_rawchild(cts, ct); ++ substruct: ++ if (sct->size > 0) { ++ unsigned int s = ccall_classify_struct(cts, sct); ++ if (s <= 1) goto noth; ++ r |= (s & 255); ++ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); ++ } ++ } ++ } ++ if ((r == 4 || r == 8) && n <= 4) ++ return r + (n << 8); ++noth: /* Not a homogeneous float/double aggregate. */ ++ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ ++} ++ ++#endif ++ + /* -- Common C call handling ---------------------------------------------- */ + + /* Infer the destination CTypeID for a vararg argument. */ +@@ -1091,6 +1227,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + #endif + #endif + ++#if LJ_TARGET_RISCV64 ++ int nff = 0; ++#endif ++ + /* Clear unused regs to get some determinism in case of misdeclaration. */ + memset(cc->gpr, 0, sizeof(cc->gpr)); + #if CCALL_NUM_FPR +@@ -1265,7 +1405,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_RISCV64 ++ if (isfp && d->size == sizeof(float)) ++ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ ++#endif ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) + #if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +@@ -1305,13 +1449,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + CTSize i = (sz >> 2) - 1; + do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); + } ++#elif LJ_TARGET_RISCV64 ++ if (isfp == 2 && nff <= 2) { ++ /* Split complex float into separate registers. */ ++ CTSize i = (sz >> 2) - 1; ++ do { ++ ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; ++ } while (i--); ++ } + #else + UNUSED(isfp); + #endif + } + if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ + +-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) ++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 + cc->nfpr = nfpr; /* Required for vararg functions. */ + #endif + cc->nsp = nsp; +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h +index 5245553..29985ff 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h +@@ -154,6 +154,21 @@ typedef union FPRArg { + float f; + } FPRArg; + ++#elif LJ_TARGET_RISCV64 ++ ++#define CCALL_NARG_GPR 8 ++#define CCALL_NARG_FPR 8 ++#define CCALL_NRET_GPR 2 ++#define CCALL_NRET_FPR 2 ++#define CCALL_SPS_EXTRA 3 ++#define CCALL_SPS_FREE 1 ++ ++typedef intptr_t GPRArg; ++typedef union FPRArg { ++ double d; ++ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; ++} FPRArg; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -196,7 +211,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { + uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ + #elif LJ_TARGET_ARM64 + void *retp; /* Aggregate return pointer in x8. */ +-#elif LJ_TARGET_PPC ++#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 + uint8_t nfpr; /* Number of arguments in FPRs. */ + #endif + #if LJ_32 +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c b/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c +index c1e67ab..0c3d80c 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c +@@ -91,6 +91,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) + + #define CALLBACK_MCODE_HEAD 52 + ++#elif LJ_TARGET_RISCV64 ++ ++#define CALLBACK_MCODE_HEAD 68 ++ + #else + + /* Missing support for this architecture. */ +@@ -293,6 +297,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) + } + return p; + } ++#elif LJ_TARGET_RISCV64 ++static void *callback_mcode_init(global_State *g, uint32_t *page) ++{ ++ uint32_t *p = page; ++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; ++ uintptr_t ug = (uintptr_t)(void *)g; ++ uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; ++ uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; ++ MSize slot; ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); ++ *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); ++ *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); ++ p++; ++ } ++ return p; ++} + #else + /* Missing support for this architecture. */ + #define callback_mcode_init(g, p) (p) +@@ -567,6 +604,31 @@ void lj_ccallback_mcode_free(CTState *cts) + } + #endif + ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ ++#elif LJ_TARGET_RISCV64 ++ ++#define CALLBACK_HANDLE_REGARG \ ++ if (isfp) { \ ++ if (nfpr + n <= CCALL_NARG_FPR) { \ ++ sp = &cts->cb.fpr[nfpr]; \ ++ nfpr += n; \ ++ goto done; \ ++ } else if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } else { \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ + #define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; +@@ -735,7 +797,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c +index ded382a..32688a0 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c +@@ -56,6 +56,15 @@ static const ASMFunction dispatch_got[] = { + #undef GOTFUNC + #endif + ++#if LJ_TARGET_RISCV64 ++#include <math.h> ++#define GOTFUNC(name) (ASMFunction)name, ++static const ASMFunction dispatch_got[] = { ++ GOTDEF(GOTFUNC) ++}; ++#undef GOTFUNC ++#endif ++ + /* Initialize instruction dispatch table and hot counters. */ + void lj_dispatch_init(GG_State *GG) + { +@@ -76,7 +85,7 @@ void lj_dispatch_init(GG_State *GG) + GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); + for (i = 0; i < GG_NUM_ASMFF; i++) + GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 + memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); + #endif + } +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h +index 0594af5..37407f4 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h +@@ -66,6 +66,35 @@ GOTDEF(GOTENUM) + }; + #endif + ++#if LJ_TARGET_RISCV64 ++/* Need our own global offset table to wrap RISC-V PIC intern / extern calls */ ++ ++#if LJ_HASJIT ++#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) ++#else ++#define JITGOTDEF(_) ++#endif ++#if LJ_HASFFI ++#define FFIGOTDEF(_) \ ++ _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave) ++#else ++#define FFIGOTDEF(_) ++#endif ++ ++#define GOTDEF(_) \ ++ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ ++ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ ++ _(pow) _(fmod) _(ldexp) \ ++ JITGOTDEF(_) FFIGOTDEF(_) ++ ++enum { ++#define GOTENUM(name) LJ_GOT_##name, ++GOTDEF(GOTENUM) ++#undef GOTENUM ++ LJ_GOT__MAX ++}; ++#endif ++ + /* Type of hot counter. Must match the code in the assembler VM. */ + /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ + typedef uint16_t HotCount; +@@ -93,7 +122,7 @@ typedef struct GG_State { + /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ + uint8_t align1[(16-sizeof(global_State))&15]; + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 + ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ + #endif + #if LJ_HASJIT +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_frame.h b/bundle/LuaJIT-2.1-20220411/src/lj_frame.h +index 4058311..26b8081 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_frame.h ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_frame.h +@@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + ** need to change to 3. + */ + #define CFRAME_SHIFT_MULTRES 0 ++#elif LJ_TARGET_RISCV64 ++#define CFRAME_OFS_ERRF 252 ++#define CFRAME_OFS_NRES 248 ++#define CFRAME_OFS_PREV 240 ++#define CFRAME_OFS_L 232 ++#define CFRAME_OFS_PC 224 ++#define CFRAME_OFS_MULTRES 0 ++#define CFRAME_SIZE 256 ++#define CFRAME_SHIFT_MULTRES 3 + #else + #error "Missing CFRAME_* definitions for this architecture" + #endif +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_jit.h b/bundle/LuaJIT-2.1-20220411/src/lj_jit.h +index 74b40fd..23d027b 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_jit.h ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_jit.h +@@ -67,6 +67,15 @@ + #endif + #endif + ++#elif LJ_TARGET_RISCV64 ++ ++#define JIT_F_RVC (JIT_F_CPU << 0) ++#define JIT_F_RVZba (JIT_F_CPU << 1) ++#define JIT_F_RVZbb (JIT_F_CPU << 2) ++#define JIT_F_RVXThead (JIT_F_CPU << 3) ++ ++#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006XThead" ++ + #else + + #define JIT_F_CPUSTRING "" +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_target.h b/bundle/LuaJIT-2.1-20220411/src/lj_target.h +index 3831cb6..4534567 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_target.h ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_target.h +@@ -55,7 +55,7 @@ typedef uint32_t RegSP; + /* Bitset for registers. 32 registers suffice for most architectures. + ** Note that one set holds bits for both GPRs and FPRs. + */ +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 + typedef uint64_t RegSet; + #else + typedef uint32_t RegSet; +@@ -69,7 +69,7 @@ typedef uint32_t RegSet; + #define rset_set(rs, r) (rs |= RID2RSET(r)) + #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) + #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 + #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) + #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) + #else +@@ -146,6 +146,8 @@ typedef uint32_t RegCost; + #include "lj_target_mips.h" + #elif LJ_TARGET_S390X + #include "lj_target_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_target_riscv.h" + #else + #error "Missing include for target CPU" + #endif +diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c b/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c +index b6cc60b..2186621 100644 +--- a/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c ++++ b/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c +@@ -58,7 +58,8 @@ double lj_vm_foldarith(double x, double y, int op) + + /* -- Helper functions for generated machine code ------------------------- */ + +-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS ++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ ++ || LJ_TARGET_RISCV64 + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; |