From: gns From: Jingwiw Subject: [PATCH] riscv64: add initial support for riscv64 Add RISC-V 64 (rv64g) platform support with lp64d ABI to LuaJIT (not upstreamed yet). Part1: modified files diff --git a/bundle/LuaJIT-2.1-20220411/Makefile b/bundle/LuaJIT-2.1-20220411/Makefile index 6fb8efa..c81a448 100644 --- a/bundle/LuaJIT-2.1-20220411/Makefile +++ b/bundle/LuaJIT-2.1-20220411/Makefile @@ -86,6 +86,7 @@ FILE_MAN= luajit.1 FILE_PC= luajit.pc FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_riscv.lua dis_riscv64.lua \ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua vmdef.lua diff --git a/bundle/LuaJIT-2.1-20220411/src/Makefile b/bundle/LuaJIT-2.1-20220411/src/Makefile index 68a9a7c..e1b7464 100644 --- a/bundle/LuaJIT-2.1-20220411/src/Makefile +++ b/bundle/LuaJIT-2.1-20220411/src/Makefile @@ -53,6 +53,7 @@ CCOPT_arm= CCOPT_arm64= CCOPT_ppc= CCOPT_mips= +CCOPT_riscv64= # #CCDEBUG= # Uncomment the next line to generate debug information: @@ -270,6 +271,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) else TARGET_LJARCH= mips endif +else +ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv32 +else +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv64 else $(error Unsupported target architecture) endif @@ -279,6 +286,8 @@ endif endif endif endif +endif +endif ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 @@ -454,6 +463,12 @@ else ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D MIPSR6 endif +ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D RISCV32 +endif +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D RISCV64 +endif ifeq (ppc,$(TARGET_LJARCH)) ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D SQRT diff --git a/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c b/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c index 4efda1b..def4a92 100644 --- a/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c +++ b/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c @@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); #include "../dynasm/dasm_mips.h" #elif LJ_TARGET_S390X #include "../dynasm/dasm_s390x.h" +#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64 +#include "../dynasm/dasm_riscv.h" #else #error "No support for this architecture (yet)" #endif diff --git a/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c b/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c index e73f9b1..0a187d5 100644 --- a/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c +++ b/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c @@ -208,6 +208,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, "Error: unsupported opcode %08x for %s symbol relocation.\n", ins, sym); exit(1); +#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64 + if ((ins & 0x7f) == 0x6fu) { + fprintf(ctx->fp, "\tjal %s\n", sym); + } else { + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); + } #else #error "missing relocation support for this architecture" #endif @@ -327,6 +336,9 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_MIPS fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); #endif +#if LJ_TARGET_RISCV64 + fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); +#endif for (i = rel = 0; i < ctx->nsym; i++) { int32_t ofs = ctx->sym[i].ofs; diff --git a/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua b/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua index 7cb23f1..c5e22f6 100644 --- a/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua +++ b/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua @@ -85,6 +85,7 @@ local map_arch = { mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, } local map_os = { diff --git a/bundle/LuaJIT-2.1-20220411/src/lib_jit.c b/bundle/LuaJIT-2.1-20220411/src/lib_jit.c index 50c2b13..11748a4 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lib_jit.c +++ b/bundle/LuaJIT-2.1-20220411/src/lib_jit.c @@ -714,6 +714,75 @@ JIT_PARAMDEF(JIT_PARAMINIT) #include #endif +#if LJ_TARGET_RISCV64 +#include +#include +static sigjmp_buf sigbuf = {0}; +static void detect_sigill(int sig) +{ + siglongjmp(sigbuf, 1); +} + +static int riscv_compressed() +{ +#if defined(__riscv_compressed) + // Don't bother checking for RVC -- would crash before getting here. + return 1; +#elif defined(__GNUC__) + // c.nop; c.nop; + __asm__(".4byte 0x00010001"); + return 1; +#else + return 0; +#endif +} + +static int riscv_zba() +{ +#if defined(__GNUC__) + // Don't bother verifying the result, just check if the instruction exists. + // add.uw zero, zero, zero + __asm__(".4byte 0x0800003b"); + return 1; +#else + return 0; +#endif +} + +static int riscv_zbb() +{ +#if defined(__GNUC__) + register int t asm ("a0"); + // addi a0, zero, 255; sext.b a0, a0; + __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); + return t < 0; +#else + return 0; +#endif +} + +static int riscv_xthead() +{ +#if defined(__GNUC__) + register int t asm ("a0"); + // C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". + // Therefore assume XThead* are present if XTheadBb is present. + // addi a0, zero, 255; th.ext a0, a0, 7, 0; + __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); + return t == -1; // In case of collision with other vendor extensions. +#else + return 0; +#endif +} + +static uint32_t riscv_probe(int (*func)(void), uint32_t flag) +{ + if (sigsetjmp(sigbuf, 1) == 0) { + return func() ? flag : 0; + } else return 0; +} +#endif + /* Arch-dependent CPU feature detection. */ static uint32_t jit_cpudetect(void) { @@ -786,6 +855,21 @@ static uint32_t jit_cpudetect(void) #endif #elif LJ_TARGET_S390X /* No optional CPU features to detect (for now). */ +#elif LJ_TARGET_RISCV64 +#if LJ_HASJIT + // SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. + struct sigaction old = {0}, act = {0}; + act.sa_handler = detect_sigill; + sigaction(SIGILL, &act, &old); + flags |= riscv_probe(riscv_compressed, JIT_F_RVC); + flags |= riscv_probe(riscv_zba, JIT_F_RVZba); + flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); + sigaction(SIGILL, &old, NULL); + + // Detect V/P? + // V have no hardware available, P not ratified yet. +#endif #else #error "Missing CPU detection for this architecture" #endif diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_arch.h b/bundle/LuaJIT-2.1-20220411/src/lj_arch.h index 9f7047d..3490af4 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_arch.h +++ b/bundle/LuaJIT-2.1-20220411/src/lj_arch.h @@ -33,6 +33,10 @@ #define LUAJIT_ARCH_mips64 7 #define LUAJIT_ARCH_S390X 8 #define LUAJIT_ARCH_s390x 8 +#define LUAJIT_ARCH_RISCV32 9 +#define LUAJIT_ARCH_riscv32 9 +#define LUAJIT_ARCH_RISCV64 10 +#define LUAJIT_ARCH_riscv64 10 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -69,6 +73,10 @@ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 +#elif defined(__riscv) && __riscv_xlen == 32 +#define LUAJIT_TARGET LUAJIT_ARCH_RISCV32 +#elif defined(__riscv) && __riscv_xlen == 64 +#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 #else #error "No support for this architecture (yet)" #endif @@ -451,6 +459,32 @@ #define LJ_TARGET_GC64 1 #define LJ_ARCH_NOJIT 1 /* NYI */ +#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV32 +#error "No support for RISC-V 32" + +#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 +#if defined(__riscv_float_abi_double) + +#define LJ_ARCH_NAME "riscv64" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ +#define LJ_TARGET_RISCV64 1 +#define LJ_TARGET_GC64 1 +#define LJ_TARGET_EHRETREG 0 // TODO +#define LJ_TARGET_EHRAREG 1 +#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ + AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR, no ROLI */ +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +// for now +#define LUAJIT_NO_UNWIND 1 + +#else +#error "No support for RISC-V 64 Soft-float/Single-float" +#endif + #else #error "No target architecture defined" #endif @@ -525,6 +559,13 @@ /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ #error "Only n64 ABI supported for MIPS64" #endif +#elif LJ_TARGET_RISCV +#if !defined(__riscv_float_abi_double) +#error "Only RISC-V 64 double float supported for now" +#endif +#if defined(__riscv_compressed) +#error "Compressed instructions not supported for now" +#endif #endif #endif diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_asm.c b/bundle/LuaJIT-2.1-20220411/src/lj_asm.c index d811808..4deca17 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_asm.c +++ b/bundle/LuaJIT-2.1-20220411/src/lj_asm.c @@ -185,6 +185,8 @@ IRFLDEF(FLOFS) #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" +#elif LJ_TARGET_RISCV64 +#include "lj_emit_riscv.h" #else #error "Missing instruction emitter for target CPU" #endif @@ -1664,6 +1666,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_mips.h" #elif LJ_TARGET_S390X #include "lj_asm_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "lj_asm_riscv64.h" #else #error "Missing assembler for target CPU" #endif diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c index 8162b95..96bc3b8 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c +++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c @@ -686,6 +686,97 @@ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ } +#elif LJ_TARGET_RISCV64 +/* -- RISC-V lp64d calling conventions ------------------------------------ */ + +#define CCALL_HANDLE_STRUCTRET \ + /* Return structs of size > 16 by reference. */ \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + unsigned int cl = ccall_classify_struct(cts, ctr); \ + if ((cl & 4) && (cl >> 8) <= 2) { \ + CTSize i = (cl >> 8) - 1; \ + do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ + } else { \ + if (cl > 1) { \ + sp = (uint8_t *)&cc->fpr[0]; \ + if ((cl >> 8) > 2) \ + sp = (uint8_t *)&cc->gpr[0]; \ + } \ + memcpy(dp, sp, ctr->size); \ + } \ + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass long double complex by reference. */ \ + if (sz == 2*sizeof(long double)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass complex in two FPRs or on stack. */ \ + else if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + sz = 2*CTSIZE_PTR; \ + } else { \ + isfp = 1; \ + sz = 2*CTSIZE_PTR; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size >16 by reference. */ \ + unsigned int cl = ccall_classify_struct(cts, d); \ + nff = cl >> 8; \ + if (sz > 16) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass struct in FPRs. */ \ + if (cl > 1) { \ + isfp = (cl & 4) ? 2 : 1; \ + } + + +#define CCALL_HANDLE_REGARG \ + if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : 2; \ + if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += n2; \ + goto done; \ + } else { \ + if (ngpr + n2 <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n2; \ + goto done; \ + } \ + } \ + } else { /* Try to pass argument in GPRs. */ \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -1045,6 +1136,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, #endif +/* -- RISC-V ABI struct classification ---------------------------- */ + +#if LJ_TARGET_RISCV64 + +static unsigned int ccall_classify_struct(CTState *cts, CType *ct) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else if (ctype_isstruct(sct->info)) { + goto substruct; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + sct = ctype_rawchild(cts, ct); + substruct: + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -1091,6 +1227,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif #endif +#if LJ_TARGET_RISCV64 + int nff = 0; +#endif + /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR @@ -1265,7 +1405,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int64_t *)dp = (int64_t)*(int32_t *)dp; } #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_RISCV64 + if (isfp && d->size == sizeof(float)) + ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) @@ -1305,13 +1449,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } +#elif LJ_TARGET_RISCV64 + if (isfp == 2 && nff <= 2) { + /* Split complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; + do { + ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; + } while (i--); + } #else UNUSED(isfp); #endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif cc->nsp = nsp; diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h index 5245553..29985ff 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h +++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h @@ -154,6 +154,21 @@ typedef union FPRArg { float f; } FPRArg; +#elif LJ_TARGET_RISCV64 + +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 8 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -196,7 +211,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC +#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c b/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c index c1e67ab..0c3d80c 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c +++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c @@ -91,6 +91,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MCODE_HEAD 52 +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_MCODE_HEAD 68 + #else /* Missing support for this architecture. */ @@ -293,6 +297,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) } return p; } +#elif LJ_TARGET_RISCV64 +static void *callback_mcode_init(global_State *g, uint32_t *page) +{ + uint32_t *p = page; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; + uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; + uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; + MSize slot; + *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); + *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); + *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); + *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); + p++; + } + return p; +} #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) @@ -567,6 +604,31 @@ void lj_ccallback_mcode_free(CTState *cts) } #endif +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + n <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr]; \ + nfpr += n; \ + goto done; \ + } else if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } else { \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; @@ -735,7 +797,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int64_t *)dp = (int64_t)*(int32_t *)dp; } #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c index ded382a..32688a0 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c +++ b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c @@ -56,6 +56,15 @@ static const ASMFunction dispatch_got[] = { #undef GOTFUNC #endif +#if LJ_TARGET_RISCV64 +#include +#define GOTFUNC(name) (ASMFunction)name, +static const ASMFunction dispatch_got[] = { + GOTDEF(GOTFUNC) +}; +#undef GOTFUNC +#endif + /* Initialize instruction dispatch table and hot counters. */ void lj_dispatch_init(GG_State *GG) { @@ -76,7 +85,7 @@ void lj_dispatch_init(GG_State *GG) GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); -#if LJ_TARGET_MIPS +#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); #endif } diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h index 0594af5..37407f4 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h +++ b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h @@ -66,6 +66,35 @@ GOTDEF(GOTENUM) }; #endif +#if LJ_TARGET_RISCV64 +/* Need our own global offset table to wrap RISC-V PIC intern / extern calls */ + +#if LJ_HASJIT +#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) +#else +#define JITGOTDEF(_) +#endif +#if LJ_HASFFI +#define FFIGOTDEF(_) \ + _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave) +#else +#define FFIGOTDEF(_) +#endif + +#define GOTDEF(_) \ + _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ + _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ + _(pow) _(fmod) _(ldexp) \ + JITGOTDEF(_) FFIGOTDEF(_) + +enum { +#define GOTENUM(name) LJ_GOT_##name, +GOTDEF(GOTENUM) +#undef GOTENUM + LJ_GOT__MAX +}; +#endif + /* Type of hot counter. Must match the code in the assembler VM. */ /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ typedef uint16_t HotCount; @@ -93,7 +122,7 @@ typedef struct GG_State { /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ uint8_t align1[(16-sizeof(global_State))&15]; #endif -#if LJ_TARGET_MIPS +#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ #endif #if LJ_HASJIT diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_frame.h b/bundle/LuaJIT-2.1-20220411/src/lj_frame.h index 4058311..26b8081 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_frame.h +++ b/bundle/LuaJIT-2.1-20220411/src/lj_frame.h @@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ ** need to change to 3. */ #define CFRAME_SHIFT_MULTRES 0 +#elif LJ_TARGET_RISCV64 +#define CFRAME_OFS_ERRF 252 +#define CFRAME_OFS_NRES 248 +#define CFRAME_OFS_PREV 240 +#define CFRAME_OFS_L 232 +#define CFRAME_OFS_PC 224 +#define CFRAME_OFS_MULTRES 0 +#define CFRAME_SIZE 256 +#define CFRAME_SHIFT_MULTRES 3 #else #error "Missing CFRAME_* definitions for this architecture" #endif diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_jit.h b/bundle/LuaJIT-2.1-20220411/src/lj_jit.h index 74b40fd..23d027b 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_jit.h +++ b/bundle/LuaJIT-2.1-20220411/src/lj_jit.h @@ -67,6 +67,15 @@ #endif #endif +#elif LJ_TARGET_RISCV64 + +#define JIT_F_RVC (JIT_F_CPU << 0) +#define JIT_F_RVZba (JIT_F_CPU << 1) +#define JIT_F_RVZbb (JIT_F_CPU << 2) +#define JIT_F_RVXThead (JIT_F_CPU << 3) + +#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006XThead" + #else #define JIT_F_CPUSTRING "" diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_target.h b/bundle/LuaJIT-2.1-20220411/src/lj_target.h index 3831cb6..4534567 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_target.h +++ b/bundle/LuaJIT-2.1-20220411/src/lj_target.h @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 typedef uint64_t RegSet; #else typedef uint32_t RegSet; @@ -69,7 +69,7 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) #else @@ -146,6 +146,8 @@ typedef uint32_t RegCost; #include "lj_target_mips.h" #elif LJ_TARGET_S390X #include "lj_target_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "lj_target_riscv.h" #else #error "Missing include for target CPU" #endif diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c b/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c index b6cc60b..2186621 100644 --- a/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c +++ b/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c @@ -58,7 +58,8 @@ double lj_vm_foldarith(double x, double y, int op) /* -- Helper functions for generated machine code ------------------------- */ -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ + || LJ_TARGET_RISCV64 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub;