diff options
author | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
commit | db43dfdfa8bc2b938582aef3d87e43594c13ee50 (patch) | |
tree | 47b95b2f6ac8d8b7e6fa373a5bd7d661bf7234df /LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch | |
parent | b933872de72b006230559f77acc3ccfb38a1f343 (diff) |
automatic import of glibcopeneuler20.03
Diffstat (limited to 'LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch')
-rw-r--r-- | LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch b/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch new file mode 100644 index 0000000..ad7e613 --- /dev/null +++ b/LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch @@ -0,0 +1,485 @@ +From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:36 +0800 +Subject: [PATCH 15/29] LoongArch: Add ifunc support for memchr{aligned, lsx, + lasx} + +According to glibc memchr microbenchmark, this implementation could reduce +the runtime as following: + +Name Percent of runtime reduced +memchr-lasx 37%-83% +memchr-lsx 30%-66% +memchr-aligned 0%-15% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 7 ++ + .../loongarch/lp64/multiarch/ifunc-memchr.h | 40 ++++++ + .../loongarch/lp64/multiarch/memchr-aligned.S | 95 ++++++++++++++ + .../loongarch/lp64/multiarch/memchr-lasx.S | 117 ++++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memchr.c | 37 ++++++ + 7 files changed, 401 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 64416b02..2f4802cf 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -24,5 +24,8 @@ sysdep_routines += \ + rawmemchr-aligned \ + rawmemchr-lsx \ + rawmemchr-lasx \ ++ memchr-aligned \ ++ memchr-lsx \ ++ memchr-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 3db9af14..a567b9cf 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) + ) + ++ IFUNC_IMPL (i, name, memchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx) ++ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) ++ ) + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h +new file mode 100644 +index 00000000..9060ccd5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for memchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S +new file mode 100644 +index 00000000..81d0d004 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S +@@ -0,0 +1,95 @@ ++/* Optimized memchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define MEMCHR_NAME __memchr_aligned ++#else ++# define MEMCHR_NAME memchr ++#endif ++ ++LEAF(MEMCHR_NAME, 6) ++ beqz a2, L(out) ++ andi t1, a0, 0x7 ++ add.d a5, a0, a2 ++ bstrins.d a0, zero, 2, 0 ++ ++ ld.d t0, a0, 0 ++ bstrins.d a1, a1, 15, 8 ++ lu12i.w a3, 0x01010 ++ slli.d t2, t1, 03 ++ ++ bstrins.d a1, a1, 31, 16 ++ ori a3, a3, 0x101 ++ li.d t7, -1 ++ li.d t8, 8 ++ ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a3, a3, 63, 32 ++ sll.d t2, t7, t2 ++ xor t0, t0, a1 ++ ++ ++ addi.d a6, a5, -1 ++ slli.d a4, a3, 7 ++ sub.d t1, t8, t1 ++ orn t0, t0, t2 ++ ++ sub.d t2, t0, a3 ++ andn t3, a4, t0 ++ bstrins.d a6, zero, 2, 0 ++ and t0, t2, t3 ++ ++ bgeu t1, a2, L(end) ++L(loop): ++ bnez t0, L(found) ++ ld.d t1, a0, 8 ++ xor t0, t1, a1 ++ ++ addi.d a0, a0, 8 ++ sub.d t2, t0, a3 ++ andn t3, a4, t0 ++ and t0, t2, t3 ++ ++ ++ bne a0, a6, L(loop) ++L(end): ++ sub.d t1, a5, a6 ++ ctz.d t0, t0 ++ srli.d t0, t0, 3 ++ ++ sltu t1, t0, t1 ++ add.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++L(found): ++ ctz.d t0, t0 ++ srli.d t0, t0, 3 ++ add.d a0, a0, t0 ++ jr ra ++ ++L(out): ++ move a0, zero ++ jr ra ++END(MEMCHR_NAME) ++ ++libc_hidden_builtin_def (MEMCHR_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S +new file mode 100644 +index 00000000..a26cdf48 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S +@@ -0,0 +1,117 @@ ++/* Optimized memchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCHR __memchr_lasx ++ ++LEAF(MEMCHR, 6) ++ beqz a2, L(ret0) ++ add.d a3, a0, a2 ++ andi t0, a0, 0x3f ++ bstrins.d a0, zero, 5, 0 ++ ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ li.d t1, -1 ++ li.d t2, 64 ++ ++ xvreplgr2vr.b xr2, a1 ++ sll.d t3, t1, t0 ++ sub.d t2, t2, t0 ++ xvseq.b xr0, xr0, xr2 ++ ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ ++ ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ ++ movfr2gr.d t0, fa0 ++ and t0, t0, t3 ++ bgeu t2, a2, L(end) ++ bnez t0, L(found) ++ ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 5, 0 ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ ++ addi.d a0, a0, 64 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ beq a0, a4, L(out) ++ ++ ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ xvmsknz.b xr0, xr0 ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++L(found): ++ ctz.d t1, t0 ++ ++ add.d a0, a0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++ ++L(out): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(end): ++ sub.d t2, zero, a3 ++ srl.d t1, t1, t2 ++ and t0, t0, t1 ++ ctz.d t1, t0 ++ ++ add.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++END(MEMCHR) ++ ++libc_hidden_builtin_def (MEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S +new file mode 100644 +index 00000000..a73ecd25 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S +@@ -0,0 +1,102 @@ ++/* Optimized memchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCHR __memchr_lsx ++ ++LEAF(MEMCHR, 6) ++ beqz a2, L(ret0) ++ add.d a3, a0, a2 ++ andi t0, a0, 0x1f ++ bstrins.d a0, zero, 4, 0 ++ ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ li.d t1, -1 ++ li.d t2, 32 ++ ++ vreplgr2vr.b vr2, a1 ++ sll.d t3, t1, t0 ++ sub.d t2, t2, t0 ++ vseq.b vr0, vr0, vr2 ++ ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ ++ movfr2gr.s t0, fa0 ++ and t0, t0, t3 ++ bgeu t2, a2, L(end) ++ bnez t0, L(found) ++ ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 4, 0 ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ ++ addi.d a0, a0, 32 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ beq a0, a4, L(out) ++ ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++L(found): ++ ctz.w t0, t0 ++ ++ add.d a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++L(out): ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(end): ++ sub.d t2, zero, a3 ++ srl.w t1, t1, t2 ++ and t0, t0, t1 ++ ctz.w t1, t0 ++ ++ ++ add.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++END(MEMCHR) ++ ++libc_hidden_builtin_def (MEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c +new file mode 100644 +index 00000000..059479c0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memchr __redirect_memchr ++# include <string.h> ++# undef memchr ++ ++# define SYMBOL_NAME memchr ++# include "ifunc-memchr.h" ++ ++libc_ifunc_redirected (__redirect_memchr, memchr, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr); ++# endif ++ ++#endif +-- +2.33.0 + |