summaryrefslogtreecommitdiff
path: root/0002-add-riscv-support-not-upstream-modified-files.patch
blob: 05e842e1fc2bcd30e0cf37bc1c13a609dcc47305 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
From: gns <infiwang@proton.me>
From: Jingwiw <wangjingwei@iscas.ac.cn>
Subject: [PATCH] riscv64: add initial support for riscv64

Add RISC-V 64 (rv64g) platform support with lp64d ABI to LuaJIT (not upstreamed yet).
Part1: modified files

diff --git a/bundle/LuaJIT-2.1-20220411/Makefile b/bundle/LuaJIT-2.1-20220411/Makefile
index 6fb8efa..c81a448 100644
--- a/bundle/LuaJIT-2.1-20220411/Makefile
+++ b/bundle/LuaJIT-2.1-20220411/Makefile
@@ -86,6 +86,7 @@ FILE_MAN= luajit.1
 FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+	      dis_riscv.lua dis_riscv64.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
 	      dis_mips64.lua dis_mips64el.lua vmdef.lua
diff --git a/bundle/LuaJIT-2.1-20220411/src/Makefile b/bundle/LuaJIT-2.1-20220411/src/Makefile
index 68a9a7c..e1b7464 100644
--- a/bundle/LuaJIT-2.1-20220411/src/Makefile
+++ b/bundle/LuaJIT-2.1-20220411/src/Makefile
@@ -53,6 +53,7 @@ CCOPT_arm=
 CCOPT_arm64=
 CCOPT_ppc=
 CCOPT_mips=
+CCOPT_riscv64=
 #
 #CCDEBUG=
 # Uncomment the next line to generate debug information:
@@ -270,6 +271,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
   else
     TARGET_LJARCH= mips
   endif
+else
+ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH)))
+    TARGET_LJARCH= riscv32
+else
+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
+    TARGET_LJARCH= riscv64
 else
   $(error Unsupported target architecture)
 endif
@@ -279,6 +286,8 @@ endif
 endif
 endif
 endif
+endif
+endif
 
 ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
   TARGET_SYS= PS3
@@ -454,6 +463,12 @@ else
 ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
   DASM_AFLAGS+= -D MIPSR6
 endif
+ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D RISCV32
+endif
+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D RISCV64
+endif
 ifeq (ppc,$(TARGET_LJARCH))
   ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D SQRT
diff --git a/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c b/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c
index 4efda1b..def4a92 100644
--- a/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c
+++ b/bundle/LuaJIT-2.1-20220411/src/host/buildvm.c
@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
 #include "../dynasm/dasm_mips.h"
 #elif LJ_TARGET_S390X
 #include "../dynasm/dasm_s390x.h"
+#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64
+#include "../dynasm/dasm_riscv.h"
 #else
 #error "No support for this architecture (yet)"
 #endif
diff --git a/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c b/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c
index e73f9b1..0a187d5 100644
--- a/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c
+++ b/bundle/LuaJIT-2.1-20220411/src/host/buildvm_asm.c
@@ -208,6 +208,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
 	  "Error: unsupported opcode %08x for %s symbol relocation.\n",
 	  ins, sym);
   exit(1);
+#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64
+  if ((ins & 0x7f) == 0x6fu) {
+    fprintf(ctx->fp, "\tjal %s\n", sym);
+  } else {
+    fprintf(stderr,
+  	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+  	    ins, sym);
+    exit(1);
+  }
 #else
 #error "missing relocation support for this architecture"
 #endif
@@ -327,6 +336,9 @@ void emit_asm(BuildCtx *ctx)
 #if LJ_TARGET_MIPS
   fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
 #endif
+#if LJ_TARGET_RISCV64
+  fprintf(ctx->fp, ".option arch, -c\n.option norelax\n");
+#endif
 
   for (i = rel = 0; i < ctx->nsym; i++) {
     int32_t ofs = ctx->sym[i].ofs;
diff --git a/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua b/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua
index 7cb23f1..c5e22f6 100644
--- a/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua
+++ b/bundle/LuaJIT-2.1-20220411/src/jit/bcsave.lua
@@ -85,6 +85,7 @@ local map_arch = {
   mips64el =	{ e = "le", b = 64, m = 8, f = 0x80000007, },
   mips64r6 =	{ e = "be", b = 64, m = 8, f = 0xa0000407, },
   mips64r6el =	{ e = "le", b = 64, m = 8, f = 0xa0000407, },
+  riscv64 =    { e = "le", b = 64, m = 243, f = 0x00000004, },
 }
 
 local map_os = {
diff --git a/bundle/LuaJIT-2.1-20220411/src/lib_jit.c b/bundle/LuaJIT-2.1-20220411/src/lib_jit.c
index 50c2b13..11748a4 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lib_jit.c
+++ b/bundle/LuaJIT-2.1-20220411/src/lib_jit.c
@@ -714,6 +714,75 @@ JIT_PARAMDEF(JIT_PARAMINIT)
 #include <sys/utsname.h>
 #endif
 
+#if LJ_TARGET_RISCV64
+#include <setjmp.h>
+#include <signal.h>
+static sigjmp_buf sigbuf = {0};
+static void detect_sigill(int sig)
+{
+  siglongjmp(sigbuf, 1);
+}
+
+static int riscv_compressed()
+{
+#if defined(__riscv_compressed)
+  // Don't bother checking for RVC -- would crash before getting here.
+  return 1;
+#elif defined(__GNUC__)
+  // c.nop; c.nop;
+  __asm__(".4byte 0x00010001");
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+static int riscv_zba()
+{
+#if defined(__GNUC__)
+  // Don't bother verifying the result, just check if the instruction exists.
+  // add.uw zero, zero, zero
+  __asm__(".4byte 0x0800003b");
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+static int riscv_zbb()
+{
+#if defined(__GNUC__)
+  register int t asm ("a0");
+  // addi a0, zero, 255; sext.b a0, a0;
+  __asm__("addi a0, zero, 255\n\t.4byte 0x60451513");
+  return t < 0;
+#else
+  return 0;
+#endif
+}
+
+static int riscv_xthead()
+{
+#if defined(__GNUC__)
+    register int t asm ("a0");
+    // C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc".
+    // Therefore assume XThead* are present if XTheadBb is present.
+    // addi a0, zero, 255; th.ext a0, a0, 7, 0;
+    __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b");
+    return t == -1;		// In case of collision with other vendor extensions.
+#else
+    return 0;
+#endif
+}
+
+static uint32_t riscv_probe(int (*func)(void), uint32_t flag)
+{
+    if (sigsetjmp(sigbuf, 1) == 0) {
+        return func() ? flag : 0;
+    } else return 0;
+}
+#endif
+
 /* Arch-dependent CPU feature detection. */
 static uint32_t jit_cpudetect(void)
 {
@@ -786,6 +855,21 @@ static uint32_t jit_cpudetect(void)
 #endif
 #elif LJ_TARGET_S390X
   /* No optional CPU features to detect (for now). */
+#elif LJ_TARGET_RISCV64
+#if LJ_HASJIT
+  // SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future.
+  struct sigaction old = {0}, act = {0};
+  act.sa_handler = detect_sigill;
+  sigaction(SIGILL, &act, &old);
+  flags |= riscv_probe(riscv_compressed, JIT_F_RVC);
+  flags |= riscv_probe(riscv_zba, JIT_F_RVZba);
+  flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb);
+  flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead);
+  sigaction(SIGILL, &old, NULL);
+
+  // Detect V/P?
+  // V have no hardware available, P not ratified yet.
+#endif
 #else
 #error "Missing CPU detection for this architecture"
 #endif
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_arch.h b/bundle/LuaJIT-2.1-20220411/src/lj_arch.h
index 9f7047d..3490af4 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_arch.h
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_arch.h
@@ -33,6 +33,10 @@
 #define LUAJIT_ARCH_mips64	7
 #define LUAJIT_ARCH_S390X	8
 #define LUAJIT_ARCH_s390x	8
+#define LUAJIT_ARCH_RISCV32	9
+#define LUAJIT_ARCH_riscv32	9
+#define LUAJIT_ARCH_RISCV64	10
+#define LUAJIT_ARCH_riscv64	10
 
 /* Target OS. */
 #define LUAJIT_OS_OTHER		0
@@ -69,6 +73,10 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS64
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS32
+#elif defined(__riscv) && __riscv_xlen == 32
+#define LUAJIT_TARGET LUAJIT_ARCH_RISCV32
+#elif defined(__riscv) && __riscv_xlen == 64
+#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
 #else
 #error "No support for this architecture (yet)"
 #endif
@@ -451,6 +459,32 @@
 #define LJ_TARGET_GC64		1
 #define LJ_ARCH_NOJIT		1	/* NYI */
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV32
+#error "No support for RISC-V 32"
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64
+#if defined(__riscv_float_abi_double)
+
+#define LJ_ARCH_NAME		"riscv64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE	/* Forget about BE for now */
+#define LJ_TARGET_RISCV64	1
+#define LJ_TARGET_GC64		1
+#define LJ_TARGET_EHRETREG	0 // TODO
+#define LJ_TARGET_EHRAREG	1
+#define LJ_TARGET_JUMPRANGE	30	/* JAL +-2^20 = +-1MB,\
+        AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR, no ROLI */
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+// for now
+#define LUAJIT_NO_UNWIND	1
+
+#else
+#error "No support for RISC-V 64 Soft-float/Single-float"
+#endif
+
 #else
 #error "No target architecture defined"
 #endif
@@ -525,6 +559,13 @@
 /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
 #error "Only n64 ABI supported for MIPS64"
 #endif
+#elif LJ_TARGET_RISCV
+#if !defined(__riscv_float_abi_double)
+#error "Only RISC-V 64 double float supported for now"
+#endif
+#if defined(__riscv_compressed)
+#error "Compressed instructions not supported for now"
+#endif
 #endif
 #endif
 
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_asm.c b/bundle/LuaJIT-2.1-20220411/src/lj_asm.c
index d811808..4deca17 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_asm.c
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_asm.c
@@ -185,6 +185,8 @@ IRFLDEF(FLOFS)
 #include "lj_emit_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "lj_emit_mips.h"
+#elif LJ_TARGET_RISCV64
+#include "lj_emit_riscv.h"
 #else
 #error "Missing instruction emitter for target CPU"
 #endif
@@ -1664,6 +1666,8 @@ static void asm_loop(ASMState *as)
 #include "lj_asm_mips.h"
 #elif LJ_TARGET_S390X
 #include "lj_asm_s390x.h"
+#elif LJ_TARGET_RISCV64
+#include "lj_asm_riscv64.h"
 #else
 #error "Missing assembler for target CPU"
 #endif
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c
index 8162b95..96bc3b8 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.c
@@ -686,6 +686,97 @@
     if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
   }
 
+#elif LJ_TARGET_RISCV64
+/* -- RISC-V lp64d calling conventions ------------------------------------ */
+
+#define CCALL_HANDLE_STRUCTRET \
+  /* Return structs of size > 16 by reference. */ \
+  cc->retref = !(sz <= 16); \
+  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  unsigned int cl = ccall_classify_struct(cts, ctr); \
+  if ((cl & 4) && (cl >> 8) <= 2) { \
+    CTSize i = (cl >> 8) - 1; \
+    do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
+  } else { \
+    if (cl > 1) { \
+      sp = (uint8_t *)&cc->fpr[0]; \
+      if ((cl >> 8) > 2) \
+        sp = (uint8_t *)&cc->gpr[0]; \
+    } \
+      memcpy(dp, sp, ctr->size); \
+  } \
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in 1 or 2 FPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass long double complex by reference. */ \
+  if (sz == 2*sizeof(long double)) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } \
+  /* Pass complex in two FPRs or on stack. */ \
+  else if (sz == 2*sizeof(float)) { \
+    isfp = 2; \
+    sz = 2*CTSIZE_PTR; \
+  } else { \
+    isfp = 1; \
+    sz = 2*CTSIZE_PTR; \
+  }
+
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    sp = (uint8_t *)&cc->fpr[0].f;
+
+#define CCALL_HANDLE_STRUCTARG \
+  /* Pass structs of size >16 by reference. */ \
+  unsigned int cl = ccall_classify_struct(cts, d); \
+  nff = cl >> 8; \
+  if (sz > 16) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } \
+  /* Pass struct in FPRs. */ \
+  if (cl > 1) { \
+    isfp = (cl & 4) ? 2 : 1; \
+  }
+
+
+#define CCALL_HANDLE_REGARG \
+  if (isfp && (!isva)) {  /* Try to pass argument in FPRs. */ \
+    int n2 = ctype_isvector(d->info) ? 1 : \
+            isfp == 1 ? n : 2; \
+    if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += n2; \
+      goto done; \
+    } else { \
+      if (ngpr + n2 <= maxgpr) { \
+       dp = &cc->gpr[ngpr]; \
+       ngpr += n2; \
+       goto done; \
+      } \
+    } \
+  } else {  /* Try to pass argument in GPRs. */ \
+      if (ngpr + n <= maxgpr) { \
+        dp = &cc->gpr[ngpr]; \
+        ngpr += n; \
+        goto done; \
+    } \
+  }
+
 #else
 #error "Missing calling convention definitions for this architecture"
 #endif
@@ -1045,6 +1136,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
 
 #endif
 
+/* -- RISC-V ABI struct classification ---------------------------- */
+
+#if LJ_TARGET_RISCV64
+
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+  CTSize sz = ct->size;
+  unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	r |= sct->size;
+	if (!isu) n++; else if (n == 0) n = 1;
+      } else if (ctype_iscomplex(sct->info)) {
+	r |= (sct->size >> 1);
+	if (!isu) n += 2; else if (n < 2) n = 2;
+      } else if (ctype_isstruct(sct->info)) {
+	goto substruct;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      goto noth;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      sct = ctype_rawchild(cts, ct);
+    substruct:
+      if (sct->size > 0) {
+	unsigned int s = ccall_classify_struct(cts, sct);
+	if (s <= 1) goto noth;
+	r |= (s & 255);
+	if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+      }
+    }
+  }
+  if ((r == 4 || r == 8) && n <= 4)
+    return r + (n << 8);
+noth:  /* Not a homogeneous float/double aggregate. */
+  return (sz <= 16);  /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
 /* -- Common C call handling ---------------------------------------------- */
 
 /* Infer the destination CTypeID for a vararg argument. */
@@ -1091,6 +1227,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
 #endif
 #endif
 
+#if LJ_TARGET_RISCV64
+  int nff = 0;
+#endif
+
   /* Clear unused regs to get some determinism in case of misdeclaration. */
   memset(cc->gpr, 0, sizeof(cc->gpr));
 #if CCALL_NUM_FPR
@@ -1265,7 +1405,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
         *(int64_t *)dp = (int64_t)*(int32_t *)dp;
     }
 #endif
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+#if LJ_TARGET_RISCV64
+    if (isfp && d->size == sizeof(float))
+      ((uint32_t *)dp)[1] = 0xffffffffu;  /* Float NaN boxing */
+#endif
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
     if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
 #if LJ_TARGET_MIPS64
 	 || (isfp && nsp == 0)
@@ -1305,13 +1449,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
       CTSize i = (sz >> 2) - 1;
       do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
     }
+#elif LJ_TARGET_RISCV64
+    if (isfp == 2 && nff <= 2) {
+      /* Split complex float into separate registers. */
+      CTSize i = (sz >> 2) - 1;
+      do {
+        ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
+      } while (i--);
+    }
 #else
     UNUSED(isfp);
 #endif
   }
   if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
 
-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
   cc->nfpr = nfpr;  /* Required for vararg functions. */
 #endif
   cc->nsp = nsp;
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h
index 5245553..29985ff 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccall.h
@@ -154,6 +154,21 @@ typedef union FPRArg {
   float f;
 } FPRArg;
 
+#elif LJ_TARGET_RISCV64
+
+#define CCALL_NARG_GPR		8
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_GPR		2
+#define CCALL_NRET_FPR		2
+#define CCALL_SPS_EXTRA		3
+#define CCALL_SPS_FREE		1
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+} FPRArg;
+
 #else
 #error "Missing calling convention definitions for this architecture"
 #endif
@@ -196,7 +211,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
 #elif LJ_TARGET_ARM64
   void *retp;			/* Aggregate return pointer in x8. */
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #endif
 #if LJ_32
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c b/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c
index c1e67ab..0c3d80c 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_ccallback.c
@@ -91,6 +91,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 
 #define CALLBACK_MCODE_HEAD		52
 
+#elif LJ_TARGET_RISCV64
+
+#define CALLBACK_MCODE_HEAD		68
+
 #else
 
 /* Missing support for this architecture. */
@@ -293,6 +297,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
   }
   return p;
 }
+#elif LJ_TARGET_RISCV64
+static void *callback_mcode_init(global_State *g, uint32_t *page)
+{
+  uint32_t *p = page;
+  uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
+  uintptr_t ug = (uintptr_t)(void *)g;
+  uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL;
+  uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL;
+  MSize slot;
+  *p++ = RISCVI_LUI  | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi));
+  *p++ = RISCVI_LUI  | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi));
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi));
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi));
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff);
+  *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0);
+  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+    *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot);
+    *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p));
+    p++;
+  }
+  return p;
+}
 #else
 /* Missing support for this architecture. */
 #define callback_mcode_init(g, p)	(p)
@@ -567,6 +604,31 @@ void lj_ccallback_mcode_free(CTState *cts)
   }
 #endif
 
+#define CALLBACK_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    ((float *)dp)[1] = *(float *)dp;
+
+#elif LJ_TARGET_RISCV64
+
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (nfpr + n <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr]; \
+      nfpr += n; \
+      goto done; \
+    } else if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  } else { \
+    if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
 #define CALLBACK_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     ((float *)dp)[1] = *(float *)dp;
@@ -735,7 +797,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
         *(int64_t *)dp = (int64_t)*(int32_t *)dp;
     }
 #endif
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
     /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
     if (ctr->size <= 4 &&
 	(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c
index ded382a..32688a0 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.c
@@ -56,6 +56,15 @@ static const ASMFunction dispatch_got[] = {
 #undef GOTFUNC
 #endif
 
+#if LJ_TARGET_RISCV64
+#include <math.h>
+#define GOTFUNC(name)	(ASMFunction)name,
+static const ASMFunction dispatch_got[] = {
+  GOTDEF(GOTFUNC)
+};
+#undef GOTFUNC
+#endif
+
 /* Initialize instruction dispatch table and hot counters. */
 void lj_dispatch_init(GG_State *GG)
 {
@@ -76,7 +85,7 @@ void lj_dispatch_init(GG_State *GG)
   GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0);
   for (i = 0; i < GG_NUM_ASMFF; i++)
     GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
-#if LJ_TARGET_MIPS
+#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64
   memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
 #endif
 }
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h
index 0594af5..37407f4 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_dispatch.h
@@ -66,6 +66,35 @@ GOTDEF(GOTENUM)
 };
 #endif
 
+#if LJ_TARGET_RISCV64
+/* Need our own global offset table to wrap RISC-V PIC intern / extern calls */
+
+#if LJ_HASJIT
+#define JITGOTDEF(_)	_(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
+#else
+#define JITGOTDEF(_)
+#endif
+#if LJ_HASFFI
+#define FFIGOTDEF(_) \
+  _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave)
+#else
+#define FFIGOTDEF(_)
+#endif
+
+#define GOTDEF(_) \
+  _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
+  _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
+  _(pow) _(fmod) _(ldexp) \
+  JITGOTDEF(_) FFIGOTDEF(_)
+
+enum {
+#define GOTENUM(name) LJ_GOT_##name,
+GOTDEF(GOTENUM)
+#undef GOTENUM
+  LJ_GOT__MAX
+};
+#endif
+
 /* Type of hot counter. Must match the code in the assembler VM. */
 /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */
 typedef uint16_t HotCount;
@@ -93,7 +122,7 @@ typedef struct GG_State {
   /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
   uint8_t align1[(16-sizeof(global_State))&15];
 #endif
-#if LJ_TARGET_MIPS
+#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64
   ASMFunction got[LJ_GOT__MAX];		/* Global offset table. */
 #endif
 #if LJ_HASJIT
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_frame.h b/bundle/LuaJIT-2.1-20220411/src/lj_frame.h
index 4058311..26b8081 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_frame.h
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_frame.h
@@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 ** need to change to 3.
 */
 #define CFRAME_SHIFT_MULTRES	0
+#elif LJ_TARGET_RISCV64
+#define CFRAME_OFS_ERRF		252
+#define CFRAME_OFS_NRES		248
+#define CFRAME_OFS_PREV		240
+#define CFRAME_OFS_L		232
+#define CFRAME_OFS_PC		224
+#define CFRAME_OFS_MULTRES	0
+#define CFRAME_SIZE		256
+#define CFRAME_SHIFT_MULTRES	3
 #else
 #error "Missing CFRAME_* definitions for this architecture"
 #endif
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_jit.h b/bundle/LuaJIT-2.1-20220411/src/lj_jit.h
index 74b40fd..23d027b 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_jit.h
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_jit.h
@@ -67,6 +67,15 @@
 #endif
 #endif
 
+#elif LJ_TARGET_RISCV64
+
+#define JIT_F_RVC		(JIT_F_CPU << 0)
+#define JIT_F_RVZba		(JIT_F_CPU << 1)
+#define JIT_F_RVZbb		(JIT_F_CPU << 2)
+#define JIT_F_RVXThead		(JIT_F_CPU << 3)
+
+#define JIT_F_CPUSTRING		"\003RVC\003Zba\003Zbb\006XThead"
+
 #else
 
 #define JIT_F_CPUSTRING		""
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_target.h b/bundle/LuaJIT-2.1-20220411/src/lj_target.h
index 3831cb6..4534567 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_target.h
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_target.h
@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
 /* Bitset for registers. 32 registers suffice for most architectures.
 ** Note that one set holds bits for both GPRs and FPRs.
 */
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
 typedef uint64_t RegSet;
 #else
 typedef uint32_t RegSet;
@@ -69,7 +69,7 @@ typedef uint32_t RegSet;
 #define rset_set(rs, r)		(rs |= RID2RSET(r))
 #define rset_clear(rs, r)	(rs &= ~RID2RSET(r))
 #define rset_exclude(rs, r)	(rs & ~RID2RSET(r))
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
 #define rset_picktop(rs)	((Reg)(__builtin_clzll(rs)^63))
 #define rset_pickbot(rs)	((Reg)__builtin_ctzll(rs))
 #else
@@ -146,6 +146,8 @@ typedef uint32_t RegCost;
 #include "lj_target_mips.h"
 #elif LJ_TARGET_S390X
 #include "lj_target_s390x.h"
+#elif LJ_TARGET_RISCV64
+#include "lj_target_riscv.h"
 #else
 #error "Missing include for target CPU"
 #endif
diff --git a/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c b/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c
index b6cc60b..2186621 100644
--- a/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c
+++ b/bundle/LuaJIT-2.1-20220411/src/lj_vmmath.c
@@ -58,7 +58,8 @@ double lj_vm_foldarith(double x, double y, int op)
 
 /* -- Helper functions for generated machine code ------------------------- */
 
-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
+#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
+ || LJ_TARGET_RISCV64
 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
 {
   uint32_t y, ua, ub;