diff options
author | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
commit | db43dfdfa8bc2b938582aef3d87e43594c13ee50 (patch) | |
tree | 47b95b2f6ac8d8b7e6fa373a5bd7d661bf7234df /LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch | |
parent | b933872de72b006230559f77acc3ccfb38a1f343 (diff) |
automatic import of glibcopeneuler20.03
Diffstat (limited to 'LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch')
-rw-r--r-- | LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch | 626 |
1 files changed, 626 insertions, 0 deletions
diff --git a/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch b/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch new file mode 100644 index 0000000..35acd20 --- /dev/null +++ b/LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch @@ -0,0 +1,626 @@ +From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Fri, 8 Sep 2023 14:10:55 +0800 +Subject: [PATCH 20/29] LoongArch: Add lasx/lsx support for + _dl_runtime_profile. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/bits/link.h | 24 ++- + sysdeps/loongarch/bits/link_lavcurrent.h | 25 +++ + sysdeps/loongarch/dl-audit-check.h | 23 +++ + sysdeps/loongarch/dl-link.sym | 8 +- + sysdeps/loongarch/dl-machine.h | 11 +- + sysdeps/loongarch/dl-trampoline.S | 177 +---------------- + sysdeps/loongarch/dl-trampoline.h | 242 +++++++++++++++++++++++ + 7 files changed, 331 insertions(+), 179 deletions(-) + create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h + create mode 100644 sysdeps/loongarch/dl-audit-check.h + +diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h +index 7fa61312..00f6f25f 100644 +--- a/sysdeps/loongarch/bits/link.h ++++ b/sysdeps/loongarch/bits/link.h +@@ -20,10 +20,26 @@ + #error "Never include <bits/link.h> directly; use <link.h> instead." + #endif + ++#ifndef __loongarch_soft_float ++typedef float La_loongarch_vr ++ __attribute__ ((__vector_size__ (16), __aligned__ (16))); ++typedef float La_loongarch_xr ++ __attribute__ ((__vector_size__ (32), __aligned__ (16))); ++ ++typedef union ++{ ++ double fpreg[4]; ++ La_loongarch_vr vr[2]; ++ La_loongarch_xr xr[1]; ++} La_loongarch_vector __attribute__ ((__aligned__ (16))); ++#endif ++ + typedef struct La_loongarch_regs + { + unsigned long int lr_reg[8]; /* a0 - a7 */ +- double lr_fpreg[8]; /* fa0 - fa7 */ ++#ifndef __loongarch_soft_float ++ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ ++#endif + unsigned long int lr_ra; + unsigned long int lr_sp; + } La_loongarch_regs; +@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval + { + unsigned long int lrv_a0; + unsigned long int lrv_a1; +- double lrv_fa0; +- double lrv_fa1; ++#ifndef __loongarch_soft_float ++ La_loongarch_vector lrv_vec0; ++ La_loongarch_vector lrv_vec1; ++#endif + } La_loongarch_retval; + + __BEGIN_DECLS +diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h +new file mode 100644 +index 00000000..15f1eb84 +--- /dev/null ++++ b/sysdeps/loongarch/bits/link_lavcurrent.h +@@ -0,0 +1,25 @@ ++/* Data structure for communication from the run-time dynamic linker for ++ loaded ELF shared objects. LAV_CURRENT definition. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _LINK_H ++# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead." ++#endif ++ ++/* Version numbers for la_version handshake interface. */ ++#define LAV_CURRENT 3 +diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h +new file mode 100644 +index 00000000..a139c939 +--- /dev/null ++++ b/sysdeps/loongarch/dl-audit-check.h +@@ -0,0 +1,23 @@ ++/* rtld-audit version check. LoongArch version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ return lav == LAV_CURRENT; ++} +diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym +index 868ab7c6..b534968e 100644 +--- a/sysdeps/loongarch/dl-link.sym ++++ b/sysdeps/loongarch/dl-link.sym +@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) + DL_SIZEOF_RV sizeof(struct La_loongarch_retval) + + DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) +-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) ++#ifndef __loongarch_soft_float ++DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) ++#endif + DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) + DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) + + DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) +-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) ++#ifndef __loongarch_soft_float ++DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) ++#endif +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index 066bb233..8a2db9de 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + #if !defined __loongarch_soft_float + extern void _dl_runtime_resolve_lasx (void) attribute_hidden; + extern void _dl_runtime_resolve_lsx (void) attribute_hidden; ++ extern void _dl_runtime_profile_lasx (void) attribute_hidden; ++ extern void _dl_runtime_profile_lsx (void) attribute_hidden; + #endif + extern void _dl_runtime_resolve (void) attribute_hidden; + extern void _dl_runtime_profile (void) attribute_hidden; +@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + end in this function. */ + if (profile != 0) + { +- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; ++ else if (SUPPORT_LSX) ++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; ++ else ++#endif ++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) +diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S +index 8fd91469..bb449ecf 100644 +--- a/sysdeps/loongarch/dl-trampoline.S ++++ b/sysdeps/loongarch/dl-trampoline.S +@@ -22,190 +22,21 @@ + #if !defined __loongarch_soft_float + #define USE_LASX + #define _dl_runtime_resolve _dl_runtime_resolve_lasx ++#define _dl_runtime_profile _dl_runtime_profile_lasx + #include "dl-trampoline.h" + #undef FRAME_SIZE + #undef USE_LASX + #undef _dl_runtime_resolve ++#undef _dl_runtime_profile + + #define USE_LSX + #define _dl_runtime_resolve _dl_runtime_resolve_lsx ++#define _dl_runtime_profile _dl_runtime_profile_lsx + #include "dl-trampoline.h" + #undef FRAME_SIZE + #undef USE_LSX + #undef _dl_runtime_resolve ++#undef _dl_runtime_profile + #endif + + #include "dl-trampoline.h" +- +-#include "dl-link.h" +- +-ENTRY (_dl_runtime_profile) +- /* LoongArch we get called with: +- t0 linkr_map pointer +- t1 the scaled offset stored in t0, which can be used +- to calculate the offset of the current symbol in .rela.plt +- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function +- t3 dl resolver entry point, no use in this function +- +- Stack frame layout: +- [sp, #96] La_loongarch_regs +- [sp, #48] La_loongarch_retval +- [sp, #40] frame size return from pltenter +- [sp, #32] dl_profile_call saved a1 +- [sp, #24] dl_profile_call saved a0 +- [sp, #16] T1 +- [sp, #0] ra, fp <- fp +- */ +- +-# define OFFSET_T1 16 +-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 +-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 +-# define OFFSET_RV OFFSET_FS + 8 +-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV +- +-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) +- +- /* Save arguments to stack. */ +- ADDI sp, sp, -SF_SIZE +- REG_S ra, sp, 0 +- REG_S fp, sp, 8 +- +- or fp, sp, zero +- +- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- +- /* Update .got.plt and obtain runtime address of callee. */ +- SLLI a1, t1, 1 +- or a0, t0, zero +- ADD a1, a1, t1 +- or a2, ra, zero /* return addr */ +- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ +- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ +- +- REG_S a0, fp, OFFSET_SAVED_CALL_A0 +- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG +- +- la t2, _dl_profile_fixup +- jirl ra, t2, 0 +- +- REG_L t3, fp, OFFSET_FS +- bge t3, zero, 1f +- +- /* Save the return. */ +- or t4, v0, zero +- +- /* Restore arguments from stack. */ +- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- +- REG_L ra, fp, 0 +- REG_L fp, fp, SZREG +- +- ADDI sp, sp, SF_SIZE +- jirl zero, t4, 0 +- +-1: +- /* The new frame size is in t3. */ +- SUB sp, fp, t3 +- BSTRINS sp, zero, 3, 0 +- +- REG_S a0, fp, OFFSET_T1 +- +- or a0, sp, zero +- ADDI a1, fp, SF_SIZE +- or a2, t3, zero +- la t5, memcpy +- jirl ra, t5, 0 +- +- REG_L t6, fp, OFFSET_T1 +- +- /* Call the function. */ +- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- jirl ra, t6, 0 +- +- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 +- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG +- +-#ifndef __loongarch_soft_float +- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 +- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG +-#endif +- +- /* Setup call to pltexit. */ +- REG_L a0, fp, OFFSET_SAVED_CALL_A0 +- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG +- ADDI a2, fp, OFFSET_RG +- ADDI a3, fp, OFFSET_RV +- la t7, _dl_audit_pltexit +- jirl ra, t7, 0 +- +- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 +- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 +- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG +-#endif +- +- /* RA from within La_loongarch_reg. */ +- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA +- or sp, fp, zero +- ADDI sp, sp, SF_SIZE +- REG_S fp, fp, SZREG +- +- jirl zero, ra, 0 +- +-END (_dl_runtime_profile) +diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h +index 99fcacab..e298439d 100644 +--- a/sysdeps/loongarch/dl-trampoline.h ++++ b/sysdeps/loongarch/dl-trampoline.h +@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) + /* Invoke the callee. */ + jirl zero, t1, 0 + END (_dl_runtime_resolve) ++ ++#include "dl-link.h" ++ ++ENTRY (_dl_runtime_profile) ++ /* LoongArch we get called with: ++ t0 linkr_map pointer ++ t1 the scaled offset stored in t0, which can be used ++ to calculate the offset of the current symbol in .rela.plt ++ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function ++ t3 dl resolver entry point, no use in this function ++ ++ Stack frame layout: ++ [sp, #208] La_loongarch_regs ++ [sp, #128] La_loongarch_retval // align: 16 ++ [sp, #112] frame size return from pltenter ++ [sp, #80 ] dl_profile_call saved vec1 ++ [sp, #48 ] dl_profile_call saved vec0 // align: 16 ++ [sp, #32 ] dl_profile_call saved a1 ++ [sp, #24 ] dl_profile_call saved a0 ++ [sp, #16 ] T1 ++ [sp, #0 ] ra, fp <- fp ++ */ ++ ++# define OFFSET_T1 16 ++# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 ++# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 ++# define OFFSET_RV OFFSET_FS + 8 + 8 ++# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV ++ ++# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) ++ ++ /* Save arguments to stack. */ ++ ADDI sp, sp, -SF_SIZE ++ REG_S ra, sp, 0 ++ REG_S fp, sp, 8 ++ ++ or fp, sp, zero ++ ++ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ /* Update .got.plt and obtain runtime address of callee. */ ++ SLLI a1, t1, 1 ++ or a0, t0, zero ++ ADD a1, a1, t1 ++ or a2, ra, zero /* return addr */ ++ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ ++ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ ++ ++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 ++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG ++ ++ la t2, _dl_profile_fixup ++ jirl ra, t2, 0 ++ ++ REG_L t3, fp, OFFSET_FS ++ bge t3, zero, 1f ++ ++ /* Save the return. */ ++ or t4, v0, zero ++ ++ /* Restore arguments from stack. */ ++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ REG_L ra, fp, 0 ++ REG_L fp, fp, SZREG ++ ++ ADDI sp, sp, SF_SIZE ++ jirl zero, t4, 0 ++ ++1: ++ /* The new frame size is in t3. */ ++ SUB sp, fp, t3 ++ BSTRINS sp, zero, 3, 0 ++ ++ REG_S a0, fp, OFFSET_T1 ++ ++ or a0, sp, zero ++ ADDI a1, fp, SF_SIZE ++ or a2, t3, zero ++ la t5, memcpy ++ jirl ra, t5, 0 ++ ++ REG_L t6, fp, OFFSET_T1 ++ ++ /* Call the function. */ ++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ jirl ra, t6, 0 ++ ++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 ++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG ++ ++#ifdef USE_LASX ++ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG ++#elif defined USE_LSX ++ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG ++#endif ++ ++ /* Setup call to pltexit. */ ++ REG_L a0, fp, OFFSET_SAVED_CALL_A0 ++ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG ++ ADDI a2, fp, OFFSET_RG ++ ADDI a3, fp, OFFSET_RV ++ la t7, _dl_audit_pltexit ++ jirl ra, t7, 0 ++ ++ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 ++ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG ++#endif ++ ++ /* RA from within La_loongarch_reg. */ ++ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA ++ or sp, fp, zero ++ ADDI sp, sp, SF_SIZE ++ REG_S fp, fp, SZREG ++ ++ jirl zero, ra, 0 ++ ++END (_dl_runtime_profile) +-- +2.33.0 + |