diff options
Diffstat (limited to 'LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch')
-rw-r--r-- | LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch | 465 |
1 files changed, 465 insertions, 0 deletions
diff --git a/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch b/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch new file mode 100644 index 0000000..e6b4d91 --- /dev/null +++ b/LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch @@ -0,0 +1,465 @@ +From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Thu, 24 Aug 2023 16:50:17 +0800 +Subject: [PATCH 08/29] LoongArch: Add ifunc support for strnlen{aligned, lsx, + lasx} + +Based on the glibc microbenchmark, strnlen-aligned implementation could +reduce the runtime more than 10%, strnlen-lsx implementation could reduce +the runtime about 50%-78%, strnlen-lasx implementation could reduce the +runtime about 50%-88%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 8 ++ + .../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 +++++++ + .../lp64/multiarch/strnlen-aligned.S | 102 ++++++++++++++++++ + .../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++++++++++++++++ + .../loongarch/lp64/multiarch/strnlen-lsx.S | 89 +++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 +++++++ + 7 files changed, 382 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index afa51041..c4dd3143 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -3,6 +3,9 @@ sysdep_routines += \ + strlen-aligned \ + strlen-lsx \ + strlen-lasx \ ++ strnlen-aligned \ ++ strnlen-lsx \ ++ strnlen-lasx \ + strchr-aligned \ + strchr-lsx \ + strchr-lasx \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 25eb96b0..7cec0b77 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) + ) + ++ IFUNC_IMPL (i, name, strnlen, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx) ++ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned) ++ ) ++ + IFUNC_IMPL (i, name, strchr, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h +new file mode 100644 +index 00000000..5cf89810 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h +@@ -0,0 +1,41 @@ ++/* Common definition for strnlen ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +new file mode 100644 +index 00000000..b900430a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +@@ -0,0 +1,102 @@ ++/* Optimized strnlen implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRNLEN __strnlen_aligned ++#else ++# define STRNLEN __strnlen ++#endif ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(out) ++ lu12i.w a2, 0x01010 ++ andi t1, a0, 0x7 ++ move t4, a0 ++ ++ bstrins.d a0, zero, 2, 0 ++ ori a2, a2, 0x101 ++ li.w t0, -1 ++ ld.d t2, a0, 0 ++ ++ slli.d t3, t1, 3 ++ bstrins.d a2, a2, 63, 32 ++ li.w t5, 8 ++ slli.d a3, a2, 7 ++ ++ sub.w t1, t5, t1 ++ sll.d t0, t0, t3 ++ orn t2, t2, t0 ++ sub.d t0, t2, a2 ++ ++ ++ andn t3, a3, t2 ++ and t0, t0, t3 ++ bnez t0, L(count_pos) ++ sub.d t5, a1, t1 ++ ++ bgeu t1, a1, L(out) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t2, a0, 0 ++ sub.d t0, t2, a2 ++ ++ andn t1, a3, t2 ++ sltui t6, t5, 9 ++ and t0, t0, t1 ++ or t7, t0, t6 ++ ++ bnez t7, L(count_pos) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t0, t2, a2 ++ ++ ++ andn t1, a3, t2 ++ sltui t6, t5, 17 ++ and t0, t0, t1 ++ addi.d t5, t5, -16 ++ ++ or t7, t0, t6 ++ beqz t7, L(loop) ++ addi.d a0, a0, -8 ++L(count_pos): ++ ctz.d t1, t0 ++ ++ sub.d a0, a0, t4 ++ srli.d t1, t1, 3 ++ add.d a0, t1, a0 ++ sltu t0, a0, a1 ++ ++ masknez t1, a1, t0 ++ maskeqz a0, a0, t0 ++ or a0, a0, t1 ++ jr ra ++ ++ ++L(out): ++ move a0, a1 ++ jr ra ++END(STRNLEN) ++ ++weak_alias (STRNLEN, strnlen) ++libc_hidden_builtin_def (STRNLEN) +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +new file mode 100644 +index 00000000..2c03d3d9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +@@ -0,0 +1,100 @@ ++/* Optimized strnlen implementation using loongarch LASX instructions ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNLEN __strnlen_lasx ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(ret0) ++ andi t1, a0, 0x3f ++ li.d t3, 65 ++ sub.d a2, a0, t1 ++ ++ xvld xr0, a2, 0 ++ xvld xr1, a2, 32 ++ sub.d t1, t3, t1 ++ move a3, a0 ++ ++ sltu t1, a1, t1 ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ ++ ++ movfr2gr.d t0, fa0 ++ sra.d t0, t0, a0 ++ orn t1, t1, t0 ++ bnez t1, L(end) ++ ++ add.d a4, a0, a1 ++ move a0, a2 ++ addi.d a4, a4, -1 ++ bstrins.d a4, zero, 5, 0 ++ ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ addi.d a0, a0, 64 ++ beq a0, a4, L(out) ++ ++ xvmin.bu xr2, xr0, xr1 ++ xvsetanyeqz.b fcc0, xr2 ++ bceqz fcc0, L(loop) ++L(out): ++ xvmsknz.b xr0, xr0 ++ ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++L(end): ++ sub.d a0, a0, a3 ++ ++ cto.d t0, t0 ++ add.d a0, a0, t0 ++ sltu t1, a0, a1 ++ masknez t0, a1, t1 ++ ++ maskeqz t1, a0, t1 ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ ++ jr ra ++END(STRNLEN) ++ ++libc_hidden_def (STRNLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +new file mode 100644 +index 00000000..b769a895 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +@@ -0,0 +1,89 @@ ++/* Optimized strnlen implementation using loongarch LSX instructions ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNLEN __strnlen_lsx ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(ret0) ++ andi t1, a0, 0x1f ++ li.d t3, 33 ++ sub.d a2, a0, t1 ++ ++ vld vr0, a2, 0 ++ vld vr1, a2, 16 ++ sub.d t1, t3, t1 ++ move a3, a0 ++ ++ sltu t1, a1, t1 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a0 ++ orn t1, t1, t0 ++ bnez t1, L(end) ++ ++ ++ add.d a4, a0, a1 ++ move a0, a2 ++ addi.d a4, a4, -1 ++ bstrins.d a4, zero, 4, 0 ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ beq a0, a4, L(out) ++ ++ vmin.bu vr2, vr0, vr1 ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(loop) ++L(out): ++ vmsknz.b vr0, vr0 ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++L(end): ++ sub.d a0, a0, a3 ++ ++ ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++ sltu t1, a0, a1 ++ masknez t0, a1, t1 ++ ++ maskeqz t1, a0, t1 ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ jr ra ++END(STRNLEN) ++ ++libc_hidden_builtin_def (STRNLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c +new file mode 100644 +index 00000000..38b7a25a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c +@@ -0,0 +1,39 @@ ++/* Multiple versions of strnlen. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strnlen __redirect_strnlen ++# define __strnlen __redirect___strnlen ++# include <string.h> ++# undef __strnlen ++# undef strnlen ++ ++# define SYMBOL_NAME strnlen ++# include "ifunc-strnlen.h" ++ ++libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ()); ++weak_alias (__strnlen, strnlen); ++# ifdef SHARED ++__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen) ++ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen); ++__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen) ++ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen); ++# endif ++#endif +-- +2.33.0 + |