diff options
author | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-10-09 03:36:26 +0000 |
commit | db43dfdfa8bc2b938582aef3d87e43594c13ee50 (patch) | |
tree | 47b95b2f6ac8d8b7e6fa373a5bd7d661bf7234df /0008-AArch64-Add-memset_zva64.patch | |
parent | b933872de72b006230559f77acc3ccfb38a1f343 (diff) |
automatic import of glibcopeneuler20.03
Diffstat (limited to '0008-AArch64-Add-memset_zva64.patch')
-rw-r--r-- | 0008-AArch64-Add-memset_zva64.patch | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/0008-AArch64-Add-memset_zva64.patch b/0008-AArch64-Add-memset_zva64.patch new file mode 100644 index 0000000..5225816 --- /dev/null +++ b/0008-AArch64-Add-memset_zva64.patch @@ -0,0 +1,228 @@ +From 156e44845f4137d6d3ea6c2824dd459652a7efda Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Thu, 26 Oct 2023 17:07:21 +0100 +Subject: [PATCH 08/26] AArch64: Add memset_zva64 + +Add a specialized memset for the common ZVA size of 64 to avoid the +overhead of reading the ZVA size. Since the code is identical to +__memset_falkor, remove the latter. + +Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> +(cherry picked from commit 3d7090f14b13312320e425b27dcf0fe72de026fd) +--- + sysdeps/aarch64/memset.S | 10 ++-- + sysdeps/aarch64/multiarch/Makefile | 2 +- + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 4 +- + sysdeps/aarch64/multiarch/memset.c | 9 ++-- + sysdeps/aarch64/multiarch/memset_falkor.S | 54 --------------------- + sysdeps/aarch64/multiarch/memset_zva64.S | 27 +++++++++++ + 6 files changed, 38 insertions(+), 68 deletions(-) + delete mode 100644 sysdeps/aarch64/multiarch/memset_falkor.S + create mode 100644 sysdeps/aarch64/multiarch/memset_zva64.S + +diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S +index bf3cf85c8a..bbfb7184c3 100644 +--- a/sysdeps/aarch64/memset.S ++++ b/sysdeps/aarch64/memset.S +@@ -101,19 +101,19 @@ L(tail64): + ret + + L(try_zva): +-#ifdef ZVA_MACRO +- zva_macro +-#else ++#ifndef ZVA64_ONLY + .p2align 3 + mrs tmp1, dczid_el0 + tbnz tmp1w, 4, L(no_zva) + and tmp1w, tmp1w, 15 + cmp tmp1w, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) +- ++ nop ++#endif + /* Write the first and last 64 byte aligned block using stp rather + than using DC ZVA. This is faster on some cores. + */ ++ .p2align 4 + L(zva_64): + str q0, [dst, 16] + stp q0, q0, [dst, 32] +@@ -123,7 +123,6 @@ L(zva_64): + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+64+64 /* Adjust count and bias for loop. */ + add dst, dst, 128 +- nop + 1: dc zva, dst + add dst, dst, 64 + subs count, count, 64 +@@ -134,6 +133,7 @@ L(zva_64): + stp q0, q0, [dstend, -32] + ret + ++#ifndef ZVA64_ONLY + .p2align 3 + L(zva_128): + cmp tmp1w, 5 /* ZVA size is 128 bytes. */ +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index a1a4de3cd9..171ca5e4cf 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -12,10 +12,10 @@ sysdep_routines += \ + memmove_mops \ + memset_a64fx \ + memset_emag \ +- memset_falkor \ + memset_generic \ + memset_kunpeng \ + memset_mops \ ++ memset_zva64 \ + strlen_asimd \ + strlen_generic \ + # sysdep_routines +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index 3596d3c8d3..fdd9ea9246 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -54,9 +54,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) + IFUNC_IMPL (i, name, memset, +- /* Enable this on non-falkor processors too so that other cores +- can do a comparative analysis with __memset_generic. */ +- IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor) ++ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) + #if HAVE_AARCH64_SVE_ASM +diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c +index 9193b197dd..6deb6865e5 100644 +--- a/sysdeps/aarch64/multiarch/memset.c ++++ b/sysdeps/aarch64/multiarch/memset.c +@@ -28,7 +28,7 @@ + + extern __typeof (__redirect_memset) __libc_memset; + +-extern __typeof (__redirect_memset) __memset_falkor attribute_hidden; ++extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden; + extern __typeof (__redirect_memset) __memset_emag attribute_hidden; + extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden; + extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; +@@ -47,18 +47,17 @@ select_memset_ifunc (void) + { + if (IS_A64FX (midr) && zva_size == 256) + return __memset_a64fx; +- return __memset_generic; + } + + if (IS_KUNPENG920 (midr)) + return __memset_kunpeng; + +- if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64) +- return __memset_falkor; +- + if (IS_EMAG (midr)) + return __memset_emag; + ++ if (zva_size == 64) ++ return __memset_zva64; ++ + return __memset_generic; + } + +diff --git a/sysdeps/aarch64/multiarch/memset_falkor.S b/sysdeps/aarch64/multiarch/memset_falkor.S +deleted file mode 100644 +index c6946a8072..0000000000 +--- a/sysdeps/aarch64/multiarch/memset_falkor.S ++++ /dev/null +@@ -1,54 +0,0 @@ +-/* Memset for falkor. +- Copyright (C) 2017-2023 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- <https://www.gnu.org/licenses/>. */ +- +-#include <sysdep.h> +-#include <memset-reg.h> +- +-/* Reading dczid_el0 is expensive on falkor so move it into the ifunc +- resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to +- use this function only when ZVA is enabled. */ +- +-#if IS_IN (libc) +-.macro zva_macro +- .p2align 4 +- /* Write the first and last 64 byte aligned block using stp rather +- than using DC ZVA. This is faster on some cores. */ +- str q0, [dst, 16] +- stp q0, q0, [dst, 32] +- bic dst, dst, 63 +- stp q0, q0, [dst, 64] +- stp q0, q0, [dst, 96] +- sub count, dstend, dst /* Count is now 128 too large. */ +- sub count, count, 128+64+64 /* Adjust count and bias for loop. */ +- add dst, dst, 128 +-1: dc zva, dst +- add dst, dst, 64 +- subs count, count, 64 +- b.hi 1b +- stp q0, q0, [dst, 0] +- stp q0, q0, [dst, 32] +- stp q0, q0, [dstend, -64] +- stp q0, q0, [dstend, -32] +- ret +-.endm +- +-# define ZVA_MACRO zva_macro +-# define MEMSET __memset_falkor +-# include <sysdeps/aarch64/memset.S> +-#endif +diff --git a/sysdeps/aarch64/multiarch/memset_zva64.S b/sysdeps/aarch64/multiarch/memset_zva64.S +new file mode 100644 +index 0000000000..13f45fd3d8 +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memset_zva64.S +@@ -0,0 +1,27 @@ ++/* Optimized memset for zva size = 64. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++ ++#define ZVA64_ONLY 1 ++#define MEMSET __memset_zva64 ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(X) ++ ++#include "../memset.S" +-- +2.33.0 + |