diff options
Diffstat (limited to 'LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch')
-rw-r--r-- | LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch | 1099 |
1 files changed, 1099 insertions, 0 deletions
diff --git a/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch b/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch new file mode 100644 index 0000000..2e98b05 --- /dev/null +++ b/LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch @@ -0,0 +1,1099 @@ +From 351086591d938aaf884d475261ae96ec5da00384 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Wed, 13 Sep 2023 15:34:59 +0800 +Subject: [PATCH 22/29] LoongArch: Add ifunc support for strcpy, + stpcpy{aligned, unaligned, lsx, lasx} + +According to glibc strcpy and stpcpy microbenchmark test results(changed +to use generic_strcpy and generic_stpcpy instead of strlen + memcpy), +comparing with the generic version, this implementation could reduce the +runtime as following: + +Name Percent of rutime reduced +strcpy-aligned 8%-45% +strcpy-unaligned 8%-48%, comparing with the aligned version, unaligned + version takes less instructions to copy the tail of data + which length is less than 8. it also has better performance + in case src and dest cannot be both aligned with 8bytes +strcpy-lsx 20%-80% +strcpy-lasx 15%-86% +stpcpy-aligned 6%-43% +stpcpy-unaligned 8%-48% +stpcpy-lsx 10%-80% +stpcpy-lasx 10%-87% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 8 + + .../lp64/multiarch/ifunc-impl-list.c | 18 ++ + .../loongarch/lp64/multiarch/stpcpy-aligned.S | 27 +++ + .../loongarch/lp64/multiarch/stpcpy-lasx.S | 22 ++ + sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S | 22 ++ + .../lp64/multiarch/stpcpy-unaligned.S | 22 ++ + sysdeps/loongarch/lp64/multiarch/stpcpy.c | 42 ++++ + .../loongarch/lp64/multiarch/strcpy-aligned.S | 202 ++++++++++++++++ + .../loongarch/lp64/multiarch/strcpy-lasx.S | 215 ++++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S | 212 +++++++++++++++++ + .../lp64/multiarch/strcpy-unaligned.S | 138 +++++++++++ + sysdeps/loongarch/lp64/multiarch/strcpy.c | 35 +++ + 12 files changed, 963 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 360a6718..39550bea 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -16,6 +16,14 @@ sysdep_routines += \ + strcmp-lsx \ + strncmp-aligned \ + strncmp-lsx \ ++ strcpy-aligned \ ++ strcpy-unaligned \ ++ strcpy-lsx \ ++ strcpy-lasx \ ++ stpcpy-aligned \ ++ stpcpy-unaligned \ ++ stpcpy-lsx \ ++ stpcpy-lasx \ + memcpy-aligned \ + memcpy-unaligned \ + memmove-unaligned \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index e397d58c..39a14f1d 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -76,6 +76,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) + ) + ++ IFUNC_IMPL (i, name, strcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, stpcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) ++ ) ++ + IFUNC_IMPL (i, name, memcpy, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S +new file mode 100644 +index 00000000..1f763db6 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S +@@ -0,0 +1,27 @@ ++/* stpcpy-aligned implementation is in strcpy-aligned.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++# define STPCPY __stpcpy_aligned ++#else ++# define STPCPY __stpcpy ++#endif ++ ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-aligned.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S +new file mode 100644 +index 00000000..13d6c953 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S +@@ -0,0 +1,22 @@ ++/* stpcpy-lasx implementation is in strcpy-lasx.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STPCPY __stpcpy_lasx ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-lasx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S +new file mode 100644 +index 00000000..e0f17ab5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S +@@ -0,0 +1,22 @@ ++/* stpcpy-lsx implementation is in strcpy-lsx.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STPCPY __stpcpy_lsx ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-lsx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S +new file mode 100644 +index 00000000..cc2f9712 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S +@@ -0,0 +1,22 @@ ++/* stpcpy-unaligned implementation is in strcpy-unaligned.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STPCPY __stpcpy_unaligned ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-unaligned.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c +new file mode 100644 +index 00000000..d4860d7a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c +@@ -0,0 +1,42 @@ ++/* Multiple versions of stpcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2017-2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define stpcpy __redirect_stpcpy ++# define __stpcpy __redirect___stpcpy ++# define NO_MEMPCPY_STPCPY_REDIRECT ++# define __NO_STRING_INLINES ++# include <string.h> ++# undef stpcpy ++# undef __stpcpy ++ ++# define SYMBOL_NAME stpcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ()); ++ ++weak_alias (__stpcpy, stpcpy) ++# ifdef SHARED ++__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); ++__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S +new file mode 100644 +index 00000000..4ed539fd +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S +@@ -0,0 +1,202 @@ ++/* Optimized strcpy stpcpy aligned implementation using basic LoongArch ++ instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# ifndef STRCPY ++# define STRCPY __strcpy_aligned ++# endif ++#else ++# ifndef STRCPY ++# define STRCPY strcpy ++# endif ++#endif ++ ++LEAF(STRCPY, 6) ++ andi a3, a0, 0x7 ++ move a2, a0 ++ beqz a3, L(dest_align) ++ sub.d a5, a1, a3 ++ addi.d a5, a5, 8 ++ ++L(make_dest_align): ++ ld.b t0, a1, 0 ++ addi.d a1, a1, 1 ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++ beqz t0, L(al_out) ++ ++ bne a1, a5, L(make_dest_align) ++ ++L(dest_align): ++ andi a4, a1, 7 ++ bstrins.d a1, zero, 2, 0 ++ ++ lu12i.w t5, 0x1010 ++ ld.d t0, a1, 0 ++ ori t5, t5, 0x101 ++ bstrins.d t5, t5, 63, 32 ++ ++ slli.d t6, t5, 0x7 ++ bnez a4, L(unalign) ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ ++ and t3, t1, t2 ++ bnez t3, L(al_end) ++ ++L(al_loop): ++ st.d t0, a2, 0 ++ ld.d t0, a1, 8 ++ ++ addi.d a1, a1, 8 ++ addi.d a2, a2, 8 ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ ++ and t3, t1, t2 ++ beqz t3, L(al_loop) ++ ++L(al_end): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ ++ andi a3, t1, 8 ++ andi a4, t1, 4 ++ andi a5, t1, 2 ++ andi a6, t1, 1 ++ ++L(al_end_8): ++ beqz a3, L(al_end_4) ++ st.d t0, a2, 0 ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, 7 ++#endif ++ jr ra ++L(al_end_4): ++ beqz a4, L(al_end_2) ++ st.w t0, a2, 0 ++ addi.d a2, a2, 4 ++ srli.d t0, t0, 32 ++L(al_end_2): ++ beqz a5, L(al_end_1) ++ st.h t0, a2, 0 ++ addi.d a2, a2, 2 ++ srli.d t0, t0, 16 ++L(al_end_1): ++ beqz a6, L(al_out) ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++L(al_out): ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++#endif ++ jr ra ++ ++ .align 4 ++L(unalign): ++ slli.d a5, a4, 3 ++ li.d t1, -1 ++ sub.d a6, zero, a5 ++ ++ srl.d a7, t0, a5 ++ sll.d t7, t1, a6 ++ ++ or t0, a7, t7 ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t3, t1, t2 ++ ++ bnez t3, L(un_end) ++ ++ ld.d t4, a1, 8 ++ ++ sub.d t1, t4, t5 ++ andn t2, t6, t4 ++ sll.d t0, t4, a6 ++ and t3, t1, t2 ++ ++ or t0, t0, a7 ++ bnez t3, L(un_end_with_remaining) ++ ++L(un_loop): ++ srl.d a7, t4, a5 ++ ++ ld.d t4, a1, 16 ++ addi.d a1, a1, 8 ++ ++ st.d t0, a2, 0 ++ addi.d a2, a2, 8 ++ ++ sub.d t1, t4, t5 ++ andn t2, t6, t4 ++ sll.d t0, t4, a6 ++ and t3, t1, t2 ++ ++ or t0, t0, a7 ++ beqz t3, L(un_loop) ++ ++L(un_end_with_remaining): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ sub.d t1, t1, a4 ++ ++ blt t1, zero, L(un_end_less_8) ++ st.d t0, a2, 0 ++ addi.d a2, a2, 8 ++ beqz t1, L(un_out) ++ srl.d t0, t4, a5 ++ b L(un_end_less_8) ++ ++L(un_end): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ ++L(un_end_less_8): ++ andi a4, t1, 4 ++ andi a5, t1, 2 ++ andi a6, t1, 1 ++L(un_end_4): ++ beqz a4, L(un_end_2) ++ st.w t0, a2, 0 ++ addi.d a2, a2, 4 ++ srli.d t0, t0, 32 ++L(un_end_2): ++ beqz a5, L(un_end_1) ++ st.h t0, a2, 0 ++ addi.d a2, a2, 2 ++ srli.d t0, t0, 16 ++L(un_end_1): ++ beqz a6, L(un_out) ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++L(un_out): ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++#endif ++ jr ra ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S +new file mode 100644 +index 00000000..c2825612 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S +@@ -0,0 +1,215 @@ ++/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_lasx ++# endif ++ ++# ifdef USE_AS_STPCPY ++# define dstend a0 ++# else ++# define dstend a4 ++# endif ++ ++LEAF(STRCPY, 6) ++ ori t8, zero, 0xfe0 ++ andi t0, a1, 0xfff ++ li.d t7, -1 ++ move a2, a0 ++ ++ bltu t8, t0, L(page_cross_start) ++L(start_entry): ++ xvld xr0, a1, 0 ++ li.d t0, 32 ++ andi t1, a2, 0x1f ++ ++ xvsetanyeqz.b fcc0, xr0 ++ sub.d t0, t0, t1 ++ bcnez fcc0, L(end) ++ add.d a1, a1, t0 ++ ++ xvst xr0, a2, 0 ++ andi a3, a1, 0x1f ++ add.d a2, a2, t0 ++ bnez a3, L(unaligned) ++ ++ ++ xvld xr0, a1, 0 ++ xvsetanyeqz.b fcc0, xr0 ++ bcnez fcc0, L(al_end) ++L(al_loop): ++ xvst xr0, a2, 0 ++ ++ xvld xr0, a1, 32 ++ addi.d a2, a2, 32 ++ addi.d a1, a1, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ ++ bceqz fcc0, L(al_loop) ++L(al_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ xvld xr0, a1, -31 ++ ++ ++ add.d dstend, a2, t0 ++ xvst xr0, dstend, -31 ++ jr ra ++ nop ++ ++L(page_cross_start): ++ move a4, a1 ++ bstrins.d a4, zero, 4, 0 ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ ++ beq t0, t7, L(start_entry) ++ b L(tail) ++L(unaligned): ++ andi t0, a1, 0xfff ++ bltu t8, t0, L(un_page_cross) ++ ++ ++L(un_start_entry): ++ xvld xr0, a1, 0 ++ xvsetanyeqz.b fcc0, xr0 ++ bcnez fcc0, L(un_end) ++ addi.d a1, a1, 32 ++ ++L(un_loop): ++ xvst xr0, a2, 0 ++ andi t0, a1, 0xfff ++ addi.d a2, a2, 32 ++ bltu t8, t0, L(page_cross_loop) ++ ++L(un_loop_entry): ++ xvld xr0, a1, 0 ++ addi.d a1, a1, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(un_loop) ++ ++ addi.d a1, a1, -32 ++L(un_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ ++ movfr2gr.s t0, fa0 ++L(un_tail): ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ xvld xr0, a1, -31 ++ ++ add.d dstend, a2, t0 ++ xvst xr0, dstend, -31 ++ jr ra ++L(un_page_cross): ++ sub.d a4, a1, a3 ++ ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t7, L(un_start_entry) ++ b L(un_tail) ++ ++ ++L(page_cross_loop): ++ sub.d a4, a1, a3 ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t7, L(un_loop_entry) ++ ++ b L(un_tail) ++L(end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++L(tail): ++ cto.w t0, t0 ++ add.d dstend, a2, t0 ++ add.d a5, a1, t0 ++ ++L(less_32): ++ srli.d t1, t0, 4 ++ beqz t1, L(less_16) ++ vld vr0, a1, 0 ++ vld vr1, a5, -15 ++ ++ vst vr0, a2, 0 ++ vst vr1, dstend, -15 ++ jr ra ++L(less_16): ++ srli.d t1, t0, 3 ++ ++ beqz t1, L(less_8) ++ ld.d t2, a1, 0 ++ ld.d t3, a5, -7 ++ st.d t2, a2, 0 ++ ++ st.d t3, dstend, -7 ++ jr ra ++L(less_8): ++ li.d t1, 3 ++ bltu t0, t1, L(less_3) ++ ++ ld.w t2, a1, 0 ++ ld.w t3, a5, -3 ++ st.w t2, a2, 0 ++ st.w t3, dstend, -3 ++ ++ jr ra ++L(less_3): ++ beqz t0, L(zero_byte) ++ ld.h t2, a1, 0 ++ ++ st.h t2, a2, 0 ++L(zero_byte): ++ st.b zero, dstend, 0 ++ jr ra ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S +new file mode 100644 +index 00000000..fc2498f7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S +@@ -0,0 +1,212 @@ ++/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_lsx ++# endif ++ ++LEAF(STRCPY, 6) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a4, a1, 0xf ++ vld vr1, t0, %pc_lo12(L(INDEX)) ++ move a2, a0 ++ ++ beqz a4, L(load_start) ++ xor t0, a1, a4 ++ vld vr0, t0, 0 ++ vreplgr2vr.b vr2, a4 ++ ++ vadd.b vr2, vr2, vr1 ++ vshuf.b vr0, vr2, vr0, vr2 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(end) ++ ++L(load_start): ++ vld vr0, a1, 0 ++ li.d t1, 16 ++ andi a3, a2, 0xf ++ vsetanyeqz.b fcc0, vr0 ++ ++ ++ sub.d t0, t1, a3 ++ bcnez fcc0, L(end) ++ add.d a1, a1, t0 ++ vst vr0, a2, 0 ++ ++ andi a3, a1, 0xf ++ add.d a2, a2, t0 ++ bnez a3, L(unaligned) ++ vld vr0, a1, 0 ++ ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(al_end) ++L(al_loop): ++ vst vr0, a2, 0 ++ vld vr0, a1, 16 ++ ++ addi.d a2, a2, 16 ++ addi.d a1, a1, 16 ++ vsetanyeqz.b fcc0, vr0 ++ bceqz fcc0, L(al_loop) ++ ++ ++L(al_end): ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ ++ vld vr0, a1, -15 ++# ifdef USE_AS_STPCPY ++ add.d a0, a2, t0 ++ vst vr0, a0, -15 ++# else ++ add.d a2, a2, t0 ++ vst vr0, a2, -15 ++# endif ++ jr ra ++ ++L(end): ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ addi.d t0, t0, 1 ++ ++L(end_16): ++ andi t1, t0, 16 ++ beqz t1, L(end_8) ++ vst vr0, a2, 0 ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, 15 ++# endif ++ jr ra ++ ++L(end_8): ++ andi t2, t0, 8 ++ andi t3, t0, 4 ++ andi t4, t0, 2 ++ andi t5, t0, 1 ++ ++ beqz t2, L(end_4) ++ vstelm.d vr0, a2, 0, 0 ++ addi.d a2, a2, 8 ++ vbsrl.v vr0, vr0, 8 ++ ++L(end_4): ++ beqz t3, L(end_2) ++ vstelm.w vr0, a2, 0, 0 ++ addi.d a2, a2, 4 ++ vbsrl.v vr0, vr0, 4 ++ ++L(end_2): ++ beqz t4, L(end_1) ++ vstelm.h vr0, a2, 0, 0 ++ addi.d a2, a2, 2 ++ vbsrl.v vr0, vr0, 2 ++ ++ ++L(end_1): ++ beqz t5, L(out) ++ vstelm.b vr0, a2, 0, 0 ++ addi.d a2, a2, 1 ++L(out): ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++# endif ++ jr ra ++ ++ .align 4 ++L(unaligned): ++ bstrins.d a1, zero, 3, 0 ++ vld vr2, a1, 0 ++ vreplgr2vr.b vr3, a3 ++ vslt.b vr4, vr1, vr3 ++ ++ vor.v vr0, vr2, vr4 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_first_end) ++ vld vr0, a1, 16 ++ ++ vadd.b vr3, vr3, vr1 ++ vshuf.b vr4, vr0, vr2, vr3 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_end) ++ ++ ++ vor.v vr2, vr0, vr0 ++ addi.d a1, a1, 16 ++L(un_loop): ++ vld vr0, a1, 16 ++ vst vr4, a2, 0 ++ ++ addi.d a2, a2, 16 ++ vshuf.b vr4, vr0, vr2, vr3 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_end) ++ ++ vld vr2, a1, 32 ++ vst vr4, a2, 0 ++ addi.d a1, a1, 32 ++ addi.d a2, a2, 16 ++ ++ vshuf.b vr4, vr2, vr0, vr3 ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(un_loop) ++ vor.v vr0, vr2, vr2 ++ ++ ++ addi.d a1, a1, -16 ++L(un_end): ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, 1f ++ vst vr4, a2, 0 ++ ++1: ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ ++ vld vr0, a1, 1 ++ add.d a2, a2, t0 ++ sub.d a2, a2, a3 ++ vst vr0, a2, 1 ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, 16 ++# endif ++ jr ra ++L(un_first_end): ++ addi.d a2, a2, -16 ++ addi.d a1, a1, -16 ++ b 1b ++END(STRCPY) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S +new file mode 100644 +index 00000000..9e31883b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S +@@ -0,0 +1,138 @@ ++/* Optimized strcpy unaligned implementation using basic LoongArch ++ instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_unaligned ++# endif ++ ++# ifdef USE_AS_STPCPY ++# define dstend a0 ++# else ++# define dstend a4 ++# endif ++ ++LEAF(STRCPY, 6) ++ lu12i.w t5, 0x01010 ++ li.w t0, 0xff8 ++ ori t5, t5, 0x101 ++ andi t1, a1, 0xfff ++ ++ bstrins.d t5, t5, 63, 32 ++ move a2, a0 ++ slli.d t6, t5, 7 ++ bltu t0, t1, L(page_cross) ++ ++L(start_entry): ++ ld.d t0, a1, 0 ++ li.d t3, 8 ++ andi a3, a1, 0x7 ++ sub.d t1, t0, t5 ++ ++ andn t2, t6, t0 ++ sub.d t3, t3, a3 ++ and t1, t1, t2 ++ bnez t1, L(end) ++ ++ ++ add.d a1, a1, t3 ++ st.d t0, a2, 0 ++ add.d a2, a2, t3 ++ ld.d t0, a1, 0 ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ bnez t1, L(long_end) ++ ++L(loop): ++ st.d t0, a2, 0 ++ ld.d t0, a1, 8 ++ addi.d a2, a2, 8 ++ addi.d a1, a1, 8 ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ beqz t1, L(loop) ++ ++ ++L(long_end): ++ ctz.d t1, t1 ++ srli.d t1, t1, 3 ++ add.d a1, a1, t1 ++ ld.d t0, a1, -7 ++ ++ add.d dstend, a2, t1 ++ st.d t0, dstend, -7 ++ jr ra ++ nop ++ ++L(end): ++ ctz.d t1, t1 ++ srli.d t1, t1, 3 ++ add.d a3, a1, t1 ++ add.d dstend, a2, t1 ++ ++L(less_8): ++ li.d t0, 3 ++ bltu t1, t0, L(less_3) ++ ld.w t1, a1, 0 ++ ld.w t2, a3, -3 ++ ++ ++ st.w t1, a2, 0 ++ st.w t2, dstend, -3 ++ jr ra ++L(less_3): ++ beqz t1, L(zero_bytes) ++ ++ ld.h t1, a1, 0 ++ st.h t1, a2, 0 ++L(zero_bytes): ++ st.b zero, dstend, 0 ++ jr ra ++ ++L(page_cross): ++ move a4, a1 ++ bstrins.d a4, zero, 2, 0 ++ ld.d t0, a4, 0 ++ li.d t3, -1 ++ ++ slli.d t4, a1, 3 ++ srl.d t3, t3, t4 ++ srl.d t0, t0, t4 ++ orn t0, t0, t3 ++ ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ beqz t1, L(start_entry) ++ ++ b L(end) ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c +new file mode 100644 +index 00000000..46afd068 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strcpy __redirect_strcpy ++# include <string.h> ++# undef strcpy ++ ++# define SYMBOL_NAME strcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy); ++# endif ++#endif +-- +2.33.0 + |