diff options
Diffstat (limited to 'LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch')
-rw-r--r-- | LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch b/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch new file mode 100644 index 0000000..1ac8637 --- /dev/null +++ b/LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch @@ -0,0 +1,448 @@ +From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:35 +0800 +Subject: [PATCH 14/29] LoongArch: Add ifunc support for rawmemchr{aligned, + lsx, lasx} + +According to glibc rawmemchr microbenchmark, A few cases tested with +char '\0' experience performance degradation due to the lasx and lsx +versions don't handle the '\0' separately. Overall, rawmemchr-lasx +implementation could reduce the runtime about 40%-80%, rawmemchr-lsx +implementation could reduce the runtime about 40%-66%, rawmemchr-aligned +implementation could reduce the runtime about 20%-40%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 8 ++ + .../lp64/multiarch/ifunc-rawmemchr.h | 40 ++++++ + .../lp64/multiarch/rawmemchr-aligned.S | 124 ++++++++++++++++++ + .../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++ + .../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++ + sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++ + 7 files changed, 365 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 5d7ae7ae..64416b02 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -21,5 +21,8 @@ sysdep_routines += \ + memmove-unaligned \ + memmove-lsx \ + memmove-lasx \ ++ rawmemchr-aligned \ ++ rawmemchr-lsx \ ++ rawmemchr-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index c8ba87bd..3db9af14 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) + ) + ++ IFUNC_IMPL (i, name, rawmemchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx) ++ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) ++ ) ++ + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h +new file mode 100644 +index 00000000..a7bb4cf9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for rawmemchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S +new file mode 100644 +index 00000000..9c7155ae +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S +@@ -0,0 +1,124 @@ ++/* Optimized rawmemchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define RAWMEMCHR_NAME __rawmemchr_aligned ++#else ++# define RAWMEMCHR_NAME __rawmemchr ++#endif ++ ++LEAF(RAWMEMCHR_NAME, 6) ++ andi t1, a0, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ bstrins.d a1, a1, 15, 8 ++ ++ ld.d t0, a0, 0 ++ slli.d t1, t1, 3 ++ ori a2, a2, 0x101 ++ bstrins.d a1, a1, 31, 16 ++ ++ li.w t8, -1 ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a2, a2, 63, 32 ++ sll.d t2, t8, t1 ++ ++ sll.d t3, a1, t1 ++ orn t0, t0, t2 ++ slli.d a3, a2, 7 ++ beqz a1, L(find_zero) ++ ++ xor t0, t0, t3 ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ ++ bnez t3, L(count_pos) ++ addi.d a0, a0, 8 ++ ++L(loop): ++ ld.d t0, a0, 0 ++ xor t0, t0, a1 ++ ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ xor t0, t0, a1 ++ sub.d t1, t0, a2 ++ ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ beqz t3, L(loop) ++ addi.d a0, a0, -8 ++L(count_pos): ++ ctz.d t0, t3 ++ srli.d t0, t0, 3 ++ add.d a0, a0, t0 ++ jr ra ++ ++L(loop_7bit): ++ ld.d t0, a0, 0 ++L(find_zero): ++ sub.d t1, t0, a2 ++ and t2, t1, a3 ++ bnez t2, L(more_check) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t0, a2 ++ and t2, t1, a3 ++ ++ beqz t2, L(loop_7bit) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ addi.d a0, a0, 8 ++ ++L(loop_8bit): ++ ld.d t0, a0, 0 ++ ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t0, a2 ++ ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ beqz t3, L(loop_8bit) ++ ++ addi.d a0, a0, -8 ++ b L(count_pos) ++ ++END(RAWMEMCHR_NAME) ++ ++libc_hidden_builtin_def (__rawmemchr) +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S +new file mode 100644 +index 00000000..be2eb59d +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S +@@ -0,0 +1,82 @@ ++/* Optimized rawmemchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/asm.h> ++#include <sys/regdef.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define RAWMEMCHR __rawmemchr_lasx ++ ++LEAF(RAWMEMCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 5, 0 ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ ++ xvreplgr2vr.b xr2, a1 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ sra.d t0, t0, a2 ++ ++ ++ beqz t0, L(loop) ++ ctz.d t0, t0 ++ add.d a0, a2, t0 ++ jr ra ++ ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ addi.d a0, a0, 64 ++ xvseq.b xr0, xr0, xr2 ++ ++ xvseq.b xr1, xr1, xr2 ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++ ctz.d t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(RAWMEMCHR) ++ ++libc_hidden_builtin_def (RAWMEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S +new file mode 100644 +index 00000000..2f6fe024 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S +@@ -0,0 +1,71 @@ ++/* Optimized rawmemchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define RAWMEMCHR __rawmemchr_lsx ++ ++LEAF(RAWMEMCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ vreplgr2vr.b vr2, a1 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a2 ++ ++ beqz t0, L(loop) ++ ctz.w t0, t0 ++ add.d a0, a2, t0 ++ jr ra ++ ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ vseq.b vr0, vr0, vr2 ++ ++ vseq.b vr1, vr1, vr2 ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++ ctz.w t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(RAWMEMCHR) ++ ++libc_hidden_builtin_def (RAWMEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c +new file mode 100644 +index 00000000..89c7ffff +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of rawmemchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++# define rawmemchr __redirect_rawmemchr ++# define __rawmemchr __redirect___rawmemchr ++# include <string.h> ++# undef rawmemchr ++# undef __rawmemchr ++ ++# define SYMBOL_NAME rawmemchr ++# include "ifunc-rawmemchr.h" ++ ++libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, ++ IFUNC_SELECTOR ()); ++weak_alias (__rawmemchr, rawmemchr) ++# ifdef SHARED ++__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr) ++ __attribute__((visibility ("hidden"))); ++# endif ++#endif +-- +2.33.0 + |