diff options
author | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
commit | db43dfdfa8bc2b938582aef3d87e43594c13ee50 (patch) | |
tree | 47b95b2f6ac8d8b7e6fa373a5bd7d661bf7234df /LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch | |
parent | b933872de72b006230559f77acc3ccfb38a1f343 (diff) |
automatic import of glibcopeneuler20.03
Diffstat (limited to 'LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch')
-rw-r--r-- | LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch | 784 |
1 files changed, 784 insertions, 0 deletions
diff --git a/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch b/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch new file mode 100644 index 0000000..2e18ba2 --- /dev/null +++ b/LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch @@ -0,0 +1,784 @@ +From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:38 +0800 +Subject: [PATCH 17/29] LoongArch: Add ifunc support for memset{aligned, + unaligned, lsx, lasx} + +According to glibc memset microbenchmark test results, for LSX and LASX +versions, A few cases with length less than 8 experience performace +degradation, overall, the LASX version could reduce the runtime about +15% - 75%, LSX version could reduce the runtime about 15%-50%. + +The unaligned version uses unaligned memmory access to set data which +length is less than 64 and make address aligned with 8. For this part, +the performace is better than aligned version. Comparing with the generic +version, the performance is close when the length is larger than 128. When +the length is 8-128, the unaligned version could reduce the runtime about +30%-70%, the aligned version could reduce the runtime about 20%-50%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 4 + + .../lp64/multiarch/dl-symbol-redir-ifunc.h | 24 +++ + .../lp64/multiarch/ifunc-impl-list.c | 10 + + .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++ + .../loongarch/lp64/multiarch/memset-lasx.S | 142 ++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++ + .../lp64/multiarch/memset-unaligned.S | 162 ++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memset.c | 37 ++++ + 8 files changed, 688 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 7b87bc90..216886c5 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -30,5 +30,9 @@ sysdep_routines += \ + memrchr-generic \ + memrchr-lsx \ + memrchr-lasx \ ++ memset-aligned \ ++ memset-unaligned \ ++ memset-lsx \ ++ memset-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +new file mode 100644 +index 00000000..e2723873 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +@@ -0,0 +1,24 @@ ++/* Symbol rediretion for loader/static initialization code. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _DL_IFUNC_GENERIC_H ++#define _DL_IFUNC_GENERIC_H ++ ++asm ("memset = __memset_aligned"); ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 8bd5489e..37f60dde 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + #endif + IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic) + ) ++ ++ IFUNC_IMPL (i, name, memset, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx) ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned) ++ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned) ++ ) ++ + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S +new file mode 100644 +index 00000000..1fce95b7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S +@@ -0,0 +1,174 @@ ++/* Optimized memset aligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define MEMSET_NAME __memset_aligned ++#else ++# define MEMSET_NAME memset ++#endif ++ ++LEAF(MEMSET_NAME, 6) ++ move t0, a0 ++ andi a3, a0, 0x7 ++ li.w t6, 16 ++ beqz a3, L(align) ++ bltu a2, t6, L(short_data) ++ ++L(make_align): ++ li.w t8, 8 ++ sub.d t2, t8, a3 ++ pcaddi t1, 11 ++ slli.d t3, t2, 2 ++ sub.d t1, t1, t3 ++ jr t1 ++ ++L(al7): ++ st.b a1, t0, 6 ++L(al6): ++ st.b a1, t0, 5 ++L(al5): ++ st.b a1, t0, 4 ++L(al4): ++ st.b a1, t0, 3 ++L(al3): ++ st.b a1, t0, 2 ++L(al2): ++ st.b a1, t0, 1 ++L(al1): ++ st.b a1, t0, 0 ++L(al0): ++ add.d t0, t0, t2 ++ sub.d a2, a2, t2 ++ ++L(align): ++ bstrins.d a1, a1, 15, 8 ++ bstrins.d a1, a1, 31, 16 ++ bstrins.d a1, a1, 63, 32 ++ bltu a2, t6, L(less_16bytes) ++ ++ andi a4, a2, 0x3f ++ beq a4, a2, L(less_64bytes) ++ ++ sub.d t1, a2, a4 ++ move a2, a4 ++ add.d a5, t0, t1 ++ ++L(loop_64bytes): ++ addi.d t0, t0, 64 ++ st.d a1, t0, -64 ++ st.d a1, t0, -56 ++ st.d a1, t0, -48 ++ st.d a1, t0, -40 ++ ++ st.d a1, t0, -32 ++ st.d a1, t0, -24 ++ st.d a1, t0, -16 ++ st.d a1, t0, -8 ++ bne t0, a5, L(loop_64bytes) ++ ++L(less_64bytes): ++ srai.d a4, a2, 5 ++ beqz a4, L(less_32bytes) ++ addi.d a2, a2, -32 ++ st.d a1, t0, 0 ++ ++ st.d a1, t0, 8 ++ st.d a1, t0, 16 ++ st.d a1, t0, 24 ++ addi.d t0, t0, 32 ++ ++L(less_32bytes): ++ bltu a2, t6, L(less_16bytes) ++ addi.d a2, a2, -16 ++ st.d a1, t0, 0 ++ st.d a1, t0, 8 ++ addi.d t0, t0, 16 ++ ++L(less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(less_8bytes) ++ addi.d a2, a2, -8 ++ st.d a1, t0, 0 ++ addi.d t0, t0, 8 ++ ++L(less_8bytes): ++ beqz a2, L(less_1byte) ++ srai.d a4, a2, 2 ++ beqz a4, L(less_4bytes) ++ addi.d a2, a2, -4 ++ st.w a1, t0, 0 ++ addi.d t0, t0, 4 ++ ++L(less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(less_2bytes) ++ addi.d a2, a2, -2 ++ st.h a1, t0, 0 ++ addi.d t0, t0, 2 ++ ++L(less_2bytes): ++ beqz a2, L(less_1byte) ++ st.b a1, t0, 0 ++L(less_1byte): ++ jr ra ++ ++L(short_data): ++ pcaddi t1, 19 ++ slli.d t3, a2, 2 ++ sub.d t1, t1, t3 ++ jr t1 ++L(short_15): ++ st.b a1, a0, 14 ++L(short_14): ++ st.b a1, a0, 13 ++L(short_13): ++ st.b a1, a0, 12 ++L(short_12): ++ st.b a1, a0, 11 ++L(short_11): ++ st.b a1, a0, 10 ++L(short_10): ++ st.b a1, a0, 9 ++L(short_9): ++ st.b a1, a0, 8 ++L(short_8): ++ st.b a1, a0, 7 ++L(short_7): ++ st.b a1, a0, 6 ++L(short_6): ++ st.b a1, a0, 5 ++L(short_5): ++ st.b a1, a0, 4 ++L(short_4): ++ st.b a1, a0, 3 ++L(short_3): ++ st.b a1, a0, 2 ++L(short_2): ++ st.b a1, a0, 1 ++L(short_1): ++ st.b a1, a0, 0 ++L(short_0): ++ jr ra ++END(MEMSET_NAME) ++ ++libc_hidden_builtin_def (MEMSET_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S +new file mode 100644 +index 00000000..041abbac +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S +@@ -0,0 +1,142 @@ ++/* Optimized memset implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMSET __memset_lasx ++ ++LEAF(MEMSET, 6) ++ li.d t1, 32 ++ move a3, a0 ++ xvreplgr2vr.b xr0, a1 ++ add.d a4, a0, a2 ++ ++ bgeu t1, a2, L(less_32bytes) ++ li.d t3, 128 ++ li.d t2, 64 ++ blt t3, a2, L(long_bytes) ++ ++L(less_128bytes): ++ bgeu t2, a2, L(less_64bytes) ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a4, -32 ++ ++ xvst xr0, a4, -64 ++ jr ra ++L(less_64bytes): ++ xvst xr0, a3, 0 ++ xvst xr0, a4, -32 ++ ++ ++ jr ra ++L(less_32bytes): ++ srli.d t0, a2, 4 ++ beqz t0, L(less_16bytes) ++ vst vr0, a3, 0 ++ ++ vst vr0, a4, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ vstelm.d vr0, a3, 0, 0 ++ vstelm.d vr0, a4, -8, 0 ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ ++ beqz t0, L(less_4bytes) ++ vstelm.w vr0, a3, 0, 0 ++ vstelm.w vr0, a4, -4, 0 ++ jr ra ++ ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ vstelm.h vr0, a3, 0, 0 ++ vstelm.h vr0, a4, -2, 0 ++ ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ st.b a1, a3, 0 ++L(less_1bytes): ++ jr ra ++ ++L(long_bytes): ++ xvst xr0, a3, 0 ++ bstrins.d a3, zero, 4, 0 ++ addi.d a3, a3, 32 ++ sub.d a2, a4, a3 ++ ++ andi t0, a2, 0xff ++ beq t0, a2, L(long_end) ++ move a2, t0 ++ sub.d t0, a4, t0 ++ ++ ++L(loop_256): ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a3, 64 ++ xvst xr0, a3, 96 ++ ++ xvst xr0, a3, 128 ++ xvst xr0, a3, 160 ++ xvst xr0, a3, 192 ++ xvst xr0, a3, 224 ++ ++ addi.d a3, a3, 256 ++ bne a3, t0, L(loop_256) ++L(long_end): ++ bltu a2, t3, L(end_less_128) ++ addi.d a2, a2, -128 ++ ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a3, 64 ++ xvst xr0, a3, 96 ++ ++ ++ addi.d a3, a3, 128 ++L(end_less_128): ++ bltu a2, t2, L(end_less_64) ++ addi.d a2, a2, -64 ++ xvst xr0, a3, 0 ++ ++ xvst xr0, a3, 32 ++ addi.d a3, a3, 64 ++L(end_less_64): ++ bltu a2, t1, L(end_less_32) ++ xvst xr0, a3, 0 ++ ++L(end_less_32): ++ xvst xr0, a4, -32 ++ jr ra ++END(MEMSET) ++ ++libc_hidden_builtin_def (MEMSET) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S +new file mode 100644 +index 00000000..3d3982aa +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S +@@ -0,0 +1,135 @@ ++/* Optimized memset implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMSET __memset_lsx ++ ++LEAF(MEMSET, 6) ++ li.d t1, 16 ++ move a3, a0 ++ vreplgr2vr.b vr0, a1 ++ add.d a4, a0, a2 ++ ++ bgeu t1, a2, L(less_16bytes) ++ li.d t3, 64 ++ li.d t2, 32 ++ bgeu a2, t3, L(long_bytes) ++ ++L(less_64bytes): ++ bgeu t2, a2, L(less_32bytes) ++ vst vr0, a3, 0 ++ vst vr0, a3, 16 ++ vst vr0, a4, -32 ++ ++ vst vr0, a4, -16 ++ jr ra ++L(less_32bytes): ++ vst vr0, a3, 0 ++ vst vr0, a4, -16 ++ ++ ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ vstelm.d vr0, a3, 0, 0 ++ ++ vstelm.d vr0, a4, -8, 0 ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ ++ vstelm.w vr0, a3, 0, 0 ++ vstelm.w vr0, a4, -4, 0 ++ jr ra ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ ++ beqz t0, L(less_2bytes) ++ vstelm.h vr0, a3, 0, 0 ++ vstelm.h vr0, a4, -2, 0 ++ jr ra ++ ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ vstelm.b vr0, a3, 0, 0 ++L(less_1bytes): ++ jr ra ++L(long_bytes): ++ vst vr0, a3, 0 ++ ++ bstrins.d a3, zero, 3, 0 ++ addi.d a3, a3, 16 ++ sub.d a2, a4, a3 ++ andi t0, a2, 0x7f ++ ++ beq t0, a2, L(long_end) ++ move a2, t0 ++ sub.d t0, a4, t0 ++ ++L(loop_128): ++ vst vr0, a3, 0 ++ ++ vst vr0, a3, 16 ++ vst vr0, a3, 32 ++ vst vr0, a3, 48 ++ vst vr0, a3, 64 ++ ++ ++ vst vr0, a3, 80 ++ vst vr0, a3, 96 ++ vst vr0, a3, 112 ++ addi.d a3, a3, 128 ++ ++ bne a3, t0, L(loop_128) ++L(long_end): ++ bltu a2, t3, L(end_less_64) ++ addi.d a2, a2, -64 ++ vst vr0, a3, 0 ++ ++ vst vr0, a3, 16 ++ vst vr0, a3, 32 ++ vst vr0, a3, 48 ++ addi.d a3, a3, 64 ++ ++L(end_less_64): ++ bltu a2, t2, L(end_less_32) ++ addi.d a2, a2, -32 ++ vst vr0, a3, 0 ++ vst vr0, a3, 16 ++ ++ addi.d a3, a3, 32 ++L(end_less_32): ++ bltu a2, t1, L(end_less_16) ++ vst vr0, a3, 0 ++ ++L(end_less_16): ++ vst vr0, a4, -16 ++ jr ra ++END(MEMSET) ++ ++libc_hidden_builtin_def (MEMSET) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S +new file mode 100644 +index 00000000..f7d32039 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S +@@ -0,0 +1,162 @@ ++/* Optimized memset unaligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++ ++# define MEMSET_NAME __memset_unaligned ++ ++#define ST_128(n) \ ++ st.d a1, a0, n; \ ++ st.d a1, a0, n+8 ; \ ++ st.d a1, a0, n+16 ; \ ++ st.d a1, a0, n+24 ; \ ++ st.d a1, a0, n+32 ; \ ++ st.d a1, a0, n+40 ; \ ++ st.d a1, a0, n+48 ; \ ++ st.d a1, a0, n+56 ; \ ++ st.d a1, a0, n+64 ; \ ++ st.d a1, a0, n+72 ; \ ++ st.d a1, a0, n+80 ; \ ++ st.d a1, a0, n+88 ; \ ++ st.d a1, a0, n+96 ; \ ++ st.d a1, a0, n+104; \ ++ st.d a1, a0, n+112; \ ++ st.d a1, a0, n+120; ++ ++LEAF(MEMSET_NAME, 6) ++ bstrins.d a1, a1, 15, 8 ++ add.d t7, a0, a2 ++ bstrins.d a1, a1, 31, 16 ++ move t0, a0 ++ ++ bstrins.d a1, a1, 63, 32 ++ srai.d t8, a2, 4 ++ beqz t8, L(less_16bytes) ++ srai.d t8, a2, 6 ++ ++ bnez t8, L(more_64bytes) ++ srai.d t8, a2, 5 ++ beqz t8, L(less_32bytes) ++ ++ st.d a1, a0, 0 ++ st.d a1, a0, 8 ++ st.d a1, a0, 16 ++ st.d a1, a0, 24 ++ ++ st.d a1, t7, -32 ++ st.d a1, t7, -24 ++ st.d a1, t7, -16 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_32bytes): ++ st.d a1, a0, 0 ++ st.d a1, a0, 8 ++ st.d a1, t7, -16 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d t8, a2, 3 ++ beqz t8, L(less_8bytes) ++ st.d a1, a0, 0 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d t8, a2, 2 ++ beqz t8, L(less_4bytes) ++ st.w a1, a0, 0 ++ st.w a1, t7, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d t8, a2, 1 ++ beqz t8, L(less_2bytes) ++ st.h a1, a0, 0 ++ st.h a1, t7, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ st.b a1, a0, 0 ++ ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(more_64bytes): ++ srli.d a0, a0, 3 ++ slli.d a0, a0, 3 ++ addi.d a0, a0, 0x8 ++ st.d a1, t0, 0 ++ ++ sub.d t2, t0, a0 ++ add.d a2, t2, a2 ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc) ++ ++L(loop_less): ++ ST_128(0) ++ addi.d a0, a0, 0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less) ++ ++L(end_unalign_proc): ++ addi.d a2, a2, 0x80 ++ pcaddi t1, 20 ++ andi t5, a2, 0x78 ++ srli.d t5, t5, 1 ++ ++ sub.d t1, t1, t5 ++ jr t1 ++ ++ st.d a1, a0, 112 ++ st.d a1, a0, 104 ++ st.d a1, a0, 96 ++ st.d a1, a0, 88 ++ st.d a1, a0, 80 ++ st.d a1, a0, 72 ++ st.d a1, a0, 64 ++ st.d a1, a0, 56 ++ st.d a1, a0, 48 ++ st.d a1, a0, 40 ++ st.d a1, a0, 32 ++ st.d a1, a0, 24 ++ st.d a1, a0, 16 ++ st.d a1, a0, 8 ++ st.d a1, a0, 0 ++ st.d a1, t7, -8 ++ ++ move a0, t0 ++ jr ra ++END(MEMSET_NAME) ++ ++libc_hidden_builtin_def (MEMSET_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c +new file mode 100644 +index 00000000..3ff60d8a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memset. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memset __redirect_memset ++# include <string.h> ++# undef memset ++ ++# define SYMBOL_NAME memset ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memset, memset, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memset, __GI_memset, __redirect_memset) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset); ++# endif ++ ++#endif +-- +2.33.0 + |