summaryrefslogtreecommitdiff
path: root/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
diff options
context:
space:
mode:
Diffstat (limited to 'LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch')
-rw-r--r--LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch626
1 files changed, 626 insertions, 0 deletions
diff --git a/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch b/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
new file mode 100644
index 0000000..35acd20
--- /dev/null
+++ b/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
@@ -0,0 +1,626 @@
+From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 8 Sep 2023 14:10:55 +0800
+Subject: [PATCH 20/29] LoongArch: Add lasx/lsx support for
+ _dl_runtime_profile.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/bits/link.h | 24 ++-
+ sysdeps/loongarch/bits/link_lavcurrent.h | 25 +++
+ sysdeps/loongarch/dl-audit-check.h | 23 +++
+ sysdeps/loongarch/dl-link.sym | 8 +-
+ sysdeps/loongarch/dl-machine.h | 11 +-
+ sysdeps/loongarch/dl-trampoline.S | 177 +----------------
+ sysdeps/loongarch/dl-trampoline.h | 242 +++++++++++++++++++++++
+ 7 files changed, 331 insertions(+), 179 deletions(-)
+ create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h
+ create mode 100644 sysdeps/loongarch/dl-audit-check.h
+
+diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
+index 7fa61312..00f6f25f 100644
+--- a/sysdeps/loongarch/bits/link.h
++++ b/sysdeps/loongarch/bits/link.h
+@@ -20,10 +20,26 @@
+ #error "Never include <bits/link.h> directly; use <link.h> instead."
+ #endif
+
++#ifndef __loongarch_soft_float
++typedef float La_loongarch_vr
++ __attribute__ ((__vector_size__ (16), __aligned__ (16)));
++typedef float La_loongarch_xr
++ __attribute__ ((__vector_size__ (32), __aligned__ (16)));
++
++typedef union
++{
++ double fpreg[4];
++ La_loongarch_vr vr[2];
++ La_loongarch_xr xr[1];
++} La_loongarch_vector __attribute__ ((__aligned__ (16)));
++#endif
++
+ typedef struct La_loongarch_regs
+ {
+ unsigned long int lr_reg[8]; /* a0 - a7 */
+- double lr_fpreg[8]; /* fa0 - fa7 */
++#ifndef __loongarch_soft_float
++ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
++#endif
+ unsigned long int lr_ra;
+ unsigned long int lr_sp;
+ } La_loongarch_regs;
+@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
+ {
+ unsigned long int lrv_a0;
+ unsigned long int lrv_a1;
+- double lrv_fa0;
+- double lrv_fa1;
++#ifndef __loongarch_soft_float
++ La_loongarch_vector lrv_vec0;
++ La_loongarch_vector lrv_vec1;
++#endif
+ } La_loongarch_retval;
+
+ __BEGIN_DECLS
+diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h
+new file mode 100644
+index 00000000..15f1eb84
+--- /dev/null
++++ b/sysdeps/loongarch/bits/link_lavcurrent.h
+@@ -0,0 +1,25 @@
++/* Data structure for communication from the run-time dynamic linker for
++ loaded ELF shared objects. LAV_CURRENT definition.
++ Copyright (C) 2023 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#ifndef _LINK_H
++# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
++#endif
++
++/* Version numbers for la_version handshake interface. */
++#define LAV_CURRENT 3
+diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h
+new file mode 100644
+index 00000000..a139c939
+--- /dev/null
++++ b/sysdeps/loongarch/dl-audit-check.h
+@@ -0,0 +1,23 @@
++/* rtld-audit version check. LoongArch version.
++ Copyright (C) 2023 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++static inline bool
++_dl_audit_check_version (unsigned int lav)
++{
++ return lav == LAV_CURRENT;
++}
+diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
+index 868ab7c6..b534968e 100644
+--- a/sysdeps/loongarch/dl-link.sym
++++ b/sysdeps/loongarch/dl-link.sym
+@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
+ DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
+
+ DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg)
+-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg)
++#ifndef __loongarch_soft_float
++DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec)
++#endif
+ DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra)
+ DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp)
+
+ DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0)
+-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1)
++#ifndef __loongarch_soft_float
++DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0)
++#endif
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 066bb233..8a2db9de 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ #if !defined __loongarch_soft_float
+ extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+ extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
++ extern void _dl_runtime_profile_lasx (void) attribute_hidden;
++ extern void _dl_runtime_profile_lsx (void) attribute_hidden;
+ #endif
+ extern void _dl_runtime_resolve (void) attribute_hidden;
+ extern void _dl_runtime_profile (void) attribute_hidden;
+@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ end in this function. */
+ if (profile != 0)
+ {
+- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
++#if !defined __loongarch_soft_float
++ if (SUPPORT_LASX)
++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
++ else if (SUPPORT_LSX)
++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
++ else
++#endif
++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
+
+ if (GLRO(dl_profile) != NULL
+ && _dl_name_match_p (GLRO(dl_profile), l))
+diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
+index 8fd91469..bb449ecf 100644
+--- a/sysdeps/loongarch/dl-trampoline.S
++++ b/sysdeps/loongarch/dl-trampoline.S
+@@ -22,190 +22,21 @@
+ #if !defined __loongarch_soft_float
+ #define USE_LASX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lasx
++#define _dl_runtime_profile _dl_runtime_profile_lasx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LASX
+ #undef _dl_runtime_resolve
++#undef _dl_runtime_profile
+
+ #define USE_LSX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lsx
++#define _dl_runtime_profile _dl_runtime_profile_lsx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LSX
+ #undef _dl_runtime_resolve
++#undef _dl_runtime_profile
+ #endif
+
+ #include "dl-trampoline.h"
+-
+-#include "dl-link.h"
+-
+-ENTRY (_dl_runtime_profile)
+- /* LoongArch we get called with:
+- t0 linkr_map pointer
+- t1 the scaled offset stored in t0, which can be used
+- to calculate the offset of the current symbol in .rela.plt
+- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+- t3 dl resolver entry point, no use in this function
+-
+- Stack frame layout:
+- [sp, #96] La_loongarch_regs
+- [sp, #48] La_loongarch_retval
+- [sp, #40] frame size return from pltenter
+- [sp, #32] dl_profile_call saved a1
+- [sp, #24] dl_profile_call saved a0
+- [sp, #16] T1
+- [sp, #0] ra, fp <- fp
+- */
+-
+-# define OFFSET_T1 16
+-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
+-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16
+-# define OFFSET_RV OFFSET_FS + 8
+-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
+-
+-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+-
+- /* Save arguments to stack. */
+- ADDI sp, sp, -SF_SIZE
+- REG_S ra, sp, 0
+- REG_S fp, sp, 8
+-
+- or fp, sp, zero
+-
+- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+- /* Update .got.plt and obtain runtime address of callee. */
+- SLLI a1, t1, 1
+- or a0, t0, zero
+- ADD a1, a1, t1
+- or a2, ra, zero /* return addr */
+- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
+- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
+-
+- REG_S a0, fp, OFFSET_SAVED_CALL_A0
+- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-
+- la t2, _dl_profile_fixup
+- jirl ra, t2, 0
+-
+- REG_L t3, fp, OFFSET_FS
+- bge t3, zero, 1f
+-
+- /* Save the return. */
+- or t4, v0, zero
+-
+- /* Restore arguments from stack. */
+- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+- REG_L ra, fp, 0
+- REG_L fp, fp, SZREG
+-
+- ADDI sp, sp, SF_SIZE
+- jirl zero, t4, 0
+-
+-1:
+- /* The new frame size is in t3. */
+- SUB sp, fp, t3
+- BSTRINS sp, zero, 3, 0
+-
+- REG_S a0, fp, OFFSET_T1
+-
+- or a0, sp, zero
+- ADDI a1, fp, SF_SIZE
+- or a2, t3, zero
+- la t5, memcpy
+- jirl ra, t5, 0
+-
+- REG_L t6, fp, OFFSET_T1
+-
+- /* Call the function. */
+- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+- jirl ra, t6, 0
+-
+- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
+- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+- /* Setup call to pltexit. */
+- REG_L a0, fp, OFFSET_SAVED_CALL_A0
+- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+- ADDI a2, fp, OFFSET_RG
+- ADDI a3, fp, OFFSET_RV
+- la t7, _dl_audit_pltexit
+- jirl ra, t7, 0
+-
+- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
+- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+- /* RA from within La_loongarch_reg. */
+- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+- or sp, fp, zero
+- ADDI sp, sp, SF_SIZE
+- REG_S fp, fp, SZREG
+-
+- jirl zero, ra, 0
+-
+-END (_dl_runtime_profile)
+diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
+index 99fcacab..e298439d 100644
+--- a/sysdeps/loongarch/dl-trampoline.h
++++ b/sysdeps/loongarch/dl-trampoline.h
+@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
+ /* Invoke the callee. */
+ jirl zero, t1, 0
+ END (_dl_runtime_resolve)
++
++#include "dl-link.h"
++
++ENTRY (_dl_runtime_profile)
++ /* LoongArch we get called with:
++ t0 linkr_map pointer
++ t1 the scaled offset stored in t0, which can be used
++ to calculate the offset of the current symbol in .rela.plt
++ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
++ t3 dl resolver entry point, no use in this function
++
++ Stack frame layout:
++ [sp, #208] La_loongarch_regs
++ [sp, #128] La_loongarch_retval // align: 16
++ [sp, #112] frame size return from pltenter
++ [sp, #80 ] dl_profile_call saved vec1
++ [sp, #48 ] dl_profile_call saved vec0 // align: 16
++ [sp, #32 ] dl_profile_call saved a1
++ [sp, #24 ] dl_profile_call saved a0
++ [sp, #16 ] T1
++ [sp, #0 ] ra, fp <- fp
++ */
++
++# define OFFSET_T1 16
++# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
++# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
++# define OFFSET_RV OFFSET_FS + 8 + 8
++# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
++
++# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
++
++ /* Save arguments to stack. */
++ ADDI sp, sp, -SF_SIZE
++ REG_S ra, sp, 0
++ REG_S fp, sp, 8
++
++ or fp, sp, zero
++
++ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++ /* Update .got.plt and obtain runtime address of callee. */
++ SLLI a1, t1, 1
++ or a0, t0, zero
++ ADD a1, a1, t1
++ or a2, ra, zero /* return addr */
++ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
++ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
++
++ REG_S a0, fp, OFFSET_SAVED_CALL_A0
++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
++
++ la t2, _dl_profile_fixup
++ jirl ra, t2, 0
++
++ REG_L t3, fp, OFFSET_FS
++ bge t3, zero, 1f
++
++ /* Save the return. */
++ or t4, v0, zero
++
++ /* Restore arguments from stack. */
++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++ REG_L ra, fp, 0
++ REG_L fp, fp, SZREG
++
++ ADDI sp, sp, SF_SIZE
++ jirl zero, t4, 0
++
++1:
++ /* The new frame size is in t3. */
++ SUB sp, fp, t3
++ BSTRINS sp, zero, 3, 0
++
++ REG_S a0, fp, OFFSET_T1
++
++ or a0, sp, zero
++ ADDI a1, fp, SF_SIZE
++ or a2, t3, zero
++ la t5, memcpy
++ jirl ra, t5, 0
++
++ REG_L t6, fp, OFFSET_T1
++
++ /* Call the function. */
++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++ jirl ra, t6, 0
++
++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
++
++#ifdef USE_LASX
++ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
++#elif defined USE_LSX
++ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
++#elif !defined __loongarch_soft_float
++ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
++#endif
++
++ /* Setup call to pltexit. */
++ REG_L a0, fp, OFFSET_SAVED_CALL_A0
++ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
++ ADDI a2, fp, OFFSET_RG
++ ADDI a3, fp, OFFSET_RV
++ la t7, _dl_audit_pltexit
++ jirl ra, t7, 0
++
++ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
++ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
++
++#ifdef USE_LASX
++ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
++#elif defined USE_LSX
++ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
++#elif !defined __loongarch_soft_float
++ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
++#endif
++
++ /* RA from within La_loongarch_reg. */
++ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
++ or sp, fp, zero
++ ADDI sp, sp, SF_SIZE
++ REG_S fp, fp, SZREG
++
++ jirl zero, ra, 0
++
++END (_dl_runtime_profile)
+--
+2.33.0
+