summaryrefslogtreecommitdiff
path: root/0006-AArch64-Cleanup-ifuncs.patch
diff options
context:
space:
mode:
Diffstat (limited to '0006-AArch64-Cleanup-ifuncs.patch')
-rw-r--r--0006-AArch64-Cleanup-ifuncs.patch540
1 files changed, 540 insertions, 0 deletions
diff --git a/0006-AArch64-Cleanup-ifuncs.patch b/0006-AArch64-Cleanup-ifuncs.patch
new file mode 100644
index 0000000..c1ce54b
--- /dev/null
+++ b/0006-AArch64-Cleanup-ifuncs.patch
@@ -0,0 +1,540 @@
+From 25b66e8c4a75b51b0122089cf6b99860fb05470d Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Tue, 24 Oct 2023 13:51:07 +0100
+Subject: [PATCH 06/26] AArch64: Cleanup ifuncs
+
+Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than
+ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename
+strlen_mte to strlen_generic. Remove rtld-memset.
+
+Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
+(cherry picked from commit 9fd3409842b3e2d31cff5dbd6f96066c430f0aa2)
+---
+ sysdeps/aarch64/memset.S | 2 +-
+ sysdeps/aarch64/multiarch/Makefile | 2 +-
+ sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 +-
+ sysdeps/aarch64/multiarch/memchr_nosimd.S | 9 ++----
+ sysdeps/aarch64/multiarch/memcpy_a64fx.S | 14 +++-------
+ sysdeps/aarch64/multiarch/memcpy_falkor.S | 6 ++--
+ sysdeps/aarch64/multiarch/memcpy_sve.S | 2 --
+ sysdeps/aarch64/multiarch/memcpy_thunderx.S | 27 ++++--------------
+ sysdeps/aarch64/multiarch/memcpy_thunderx2.S | 28 +++----------------
+ sysdeps/aarch64/multiarch/memset_a64fx.S | 8 ++----
+ sysdeps/aarch64/multiarch/memset_base64.S | 3 +-
+ sysdeps/aarch64/multiarch/memset_emag.S | 8 ++----
+ sysdeps/aarch64/multiarch/memset_generic.S | 8 +++++-
+ sysdeps/aarch64/multiarch/memset_kunpeng.S | 9 ++----
+ sysdeps/aarch64/multiarch/rtld-memset.S | 25 -----------------
+ sysdeps/aarch64/multiarch/strlen.c | 4 +--
+ sysdeps/aarch64/multiarch/strlen_asimd.S | 1 -
+ .../{strlen_mte.S => strlen_generic.S} | 8 +++---
+ 18 files changed, 41 insertions(+), 125 deletions(-)
+ delete mode 100644 sysdeps/aarch64/multiarch/rtld-memset.S
+ rename sysdeps/aarch64/multiarch/{strlen_mte.S => strlen_generic.S} (85%)
+
+diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
+index 50e5da3e7a..bf3cf85c8a 100644
+--- a/sysdeps/aarch64/memset.S
++++ b/sysdeps/aarch64/memset.S
+@@ -29,7 +29,7 @@
+ *
+ */
+
+-ENTRY_ALIGN (MEMSET, 6)
++ENTRY (MEMSET)
+
+ PTR_ARG (0)
+ SIZE_ARG (2)
+diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
+index e6099548b9..a1a4de3cd9 100644
+--- a/sysdeps/aarch64/multiarch/Makefile
++++ b/sysdeps/aarch64/multiarch/Makefile
+@@ -17,6 +17,6 @@ sysdep_routines += \
+ memset_kunpeng \
+ memset_mops \
+ strlen_asimd \
+- strlen_mte \
++ strlen_generic \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+index da7f115377..836e8317a5 100644
+--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+@@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+
+ IFUNC_IMPL (i, name, strlen,
+ IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd)
+- IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte))
++ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic))
+
+ return 0;
+ }
+diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S
+index 57e48375e9..7800751899 100644
+--- a/sysdeps/aarch64/multiarch/memchr_nosimd.S
++++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S
+@@ -26,10 +26,6 @@
+ * Use base integer registers.
+ */
+
+-#ifndef MEMCHR
+-# define MEMCHR __memchr_nosimd
+-#endif
+-
+ /* Arguments and results. */
+ #define srcin x0
+ #define chrin x1
+@@ -62,7 +58,7 @@
+ #define REP8_7f 0x7f7f7f7f7f7f7f7f
+
+
+-ENTRY_ALIGN (MEMCHR, 6)
++ENTRY (__memchr_nosimd)
+
+ PTR_ARG (0)
+ SIZE_ARG (2)
+@@ -219,5 +215,4 @@ L(none_chr):
+ mov result, 0
+ ret
+
+-END (MEMCHR)
+-libc_hidden_builtin_def (MEMCHR)
++END (__memchr_nosimd)
+diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
+index f89b5b670a..baff7e96d0 100644
+--- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S
++++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
+@@ -39,9 +39,6 @@
+ #define vlen8 x8
+
+ #if HAVE_AARCH64_SVE_ASM
+-# if IS_IN (libc)
+-# define MEMCPY __memcpy_a64fx
+-# define MEMMOVE __memmove_a64fx
+
+ .arch armv8.2-a+sve
+
+@@ -97,7 +94,7 @@
+ #undef BTI_C
+ #define BTI_C
+
+-ENTRY (MEMCPY)
++ENTRY (__memcpy_a64fx)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -234,11 +231,10 @@ L(last_bytes):
+ st1b z3.b, p0, [dstend, -1, mul vl]
+ ret
+
+-END (MEMCPY)
+-libc_hidden_builtin_def (MEMCPY)
++END (__memcpy_a64fx)
+
+
+-ENTRY_ALIGN (MEMMOVE, 4)
++ENTRY_ALIGN (__memmove_a64fx, 4)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -307,7 +303,5 @@ L(full_overlap):
+ mov dst, dstin
+ b L(last_bytes)
+
+-END (MEMMOVE)
+-libc_hidden_builtin_def (MEMMOVE)
+-# endif /* IS_IN (libc) */
++END (__memmove_a64fx)
+ #endif /* HAVE_AARCH64_SVE_ASM */
+diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S
+index ec0e4ade24..67c4ab34eb 100644
+--- a/sysdeps/aarch64/multiarch/memcpy_falkor.S
++++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S
+@@ -71,7 +71,7 @@
+ The non-temporal stores help optimize cache utilization. */
+
+ #if IS_IN (libc)
+-ENTRY_ALIGN (__memcpy_falkor, 6)
++ENTRY (__memcpy_falkor)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -198,7 +198,6 @@ L(loop64):
+ ret
+
+ END (__memcpy_falkor)
+-libc_hidden_builtin_def (__memcpy_falkor)
+
+
+ /* RATIONALE:
+@@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor)
+
+ For small and medium cases memcpy is used. */
+
+-ENTRY_ALIGN (__memmove_falkor, 6)
++ENTRY (__memmove_falkor)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -311,5 +310,4 @@ L(move_long):
+ 3: ret
+
+ END (__memmove_falkor)
+-libc_hidden_builtin_def (__memmove_falkor)
+ #endif
+diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S
+index d11be6a443..2f14f91366 100644
+--- a/sysdeps/aarch64/multiarch/memcpy_sve.S
++++ b/sysdeps/aarch64/multiarch/memcpy_sve.S
+@@ -141,7 +141,6 @@ L(copy64_from_end):
+ ret
+
+ END (__memcpy_sve)
+-libc_hidden_builtin_def (__memcpy_sve)
+
+
+ ENTRY (__memmove_sve)
+@@ -208,5 +207,4 @@ L(return):
+ ret
+
+ END (__memmove_sve)
+-libc_hidden_builtin_def (__memmove_sve)
+ #endif
+diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+index 366287587f..14269b1a47 100644
+--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
++++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+@@ -65,21 +65,7 @@
+ Overlapping large forward memmoves use a loop that copies backwards.
+ */
+
+-#ifndef MEMMOVE
+-# define MEMMOVE memmove
+-#endif
+-#ifndef MEMCPY
+-# define MEMCPY memcpy
+-#endif
+-
+-#if IS_IN (libc)
+-
+-# undef MEMCPY
+-# define MEMCPY __memcpy_thunderx
+-# undef MEMMOVE
+-# define MEMMOVE __memmove_thunderx
+-
+-ENTRY_ALIGN (MEMMOVE, 6)
++ENTRY (__memmove_thunderx)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6)
+ b.lo L(move_long)
+
+ /* Common case falls through into memcpy. */
+-END (MEMMOVE)
+-libc_hidden_builtin_def (MEMMOVE)
+-ENTRY (MEMCPY)
++END (__memmove_thunderx)
++
++ENTRY (__memcpy_thunderx)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -316,7 +302,4 @@ L(move_long):
+ stp C_l, C_h, [dstin]
+ 3: ret
+
+-END (MEMCPY)
+-libc_hidden_builtin_def (MEMCPY)
+-
+-#endif
++END (__memcpy_thunderx)
+diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
+index d3d6f1debc..93993b9e03 100644
+--- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
++++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
+@@ -75,27 +75,12 @@
+ #define I_v v16
+ #define J_v v17
+
+-#ifndef MEMMOVE
+-# define MEMMOVE memmove
+-#endif
+-#ifndef MEMCPY
+-# define MEMCPY memcpy
+-#endif
+-
+-#if IS_IN (libc)
+-
+-#undef MEMCPY
+-#define MEMCPY __memcpy_thunderx2
+-#undef MEMMOVE
+-#define MEMMOVE __memmove_thunderx2
+-
+-
+ /* Overlapping large forward memmoves use a loop that copies backwards.
+ Otherwise memcpy is used. Small moves branch to memcopy16 directly.
+ The longer memcpy cases fall through to the memcpy head.
+ */
+
+-ENTRY_ALIGN (MEMMOVE, 6)
++ENTRY (__memmove_thunderx2)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6)
+ ccmp tmp1, count, 2, hi
+ b.lo L(move_long)
+
+-END (MEMMOVE)
+-libc_hidden_builtin_def (MEMMOVE)
++END (__memmove_thunderx2)
+
+
+ /* Copies are split into 3 main cases: small copies of up to 16 bytes,
+@@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE)
+
+ #define MEMCPY_PREFETCH_LDR 640
+
+- .p2align 4
+-ENTRY (MEMCPY)
++ENTRY (__memcpy_thunderx2)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+@@ -449,7 +432,7 @@ L(move_long):
+ 3: ret
+
+
+-END (MEMCPY)
++END (__memcpy_thunderx2)
+ .section .rodata
+ .p2align 4
+
+@@ -472,6 +455,3 @@ L(ext_table):
+ .word L(ext_size_13) -.
+ .word L(ext_size_14) -.
+ .word L(ext_size_15) -.
+-
+-libc_hidden_builtin_def (MEMCPY)
+-#endif
+diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
+index d520355143..7176f3d284 100644
+--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
++++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
+@@ -33,8 +33,6 @@
+ #define vector_length x9
+
+ #if HAVE_AARCH64_SVE_ASM
+-# if IS_IN (libc)
+-# define MEMSET __memset_a64fx
+
+ .arch armv8.2-a+sve
+
+@@ -49,7 +47,7 @@
+ #undef BTI_C
+ #define BTI_C
+
+-ENTRY (MEMSET)
++ENTRY (__memset_a64fx)
+ PTR_ARG (0)
+ SIZE_ARG (2)
+
+@@ -166,8 +164,6 @@ L(L2):
+ add count, count, CACHE_LINE_SIZE
+ b L(last)
+
+-END (MEMSET)
+-libc_hidden_builtin_def (MEMSET)
++END (__memset_a64fx)
+
+-#endif /* IS_IN (libc) */
+ #endif /* HAVE_AARCH64_SVE_ASM */
+diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S
+index 35296a6dec..0e8f709fa5 100644
+--- a/sysdeps/aarch64/multiarch/memset_base64.S
++++ b/sysdeps/aarch64/multiarch/memset_base64.S
+@@ -34,7 +34,7 @@
+ *
+ */
+
+-ENTRY_ALIGN (MEMSET, 6)
++ENTRY (MEMSET)
+
+ PTR_ARG (0)
+ SIZE_ARG (2)
+@@ -183,4 +183,3 @@ L(zva_64):
+ #endif
+
+ END (MEMSET)
+-libc_hidden_builtin_def (MEMSET)
+diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S
+index 17d609cead..6fecad4fae 100644
+--- a/sysdeps/aarch64/multiarch/memset_emag.S
++++ b/sysdeps/aarch64/multiarch/memset_emag.S
+@@ -19,8 +19,7 @@
+
+ #include <sysdep.h>
+
+-#if IS_IN (libc)
+-# define MEMSET __memset_emag
++#define MEMSET __memset_emag
+
+ /*
+ * Using DC ZVA to zero memory does not produce better performance if
+@@ -30,7 +29,6 @@
+ * workloads.
+ */
+
+-# define DC_ZVA_THRESHOLD 0
++#define DC_ZVA_THRESHOLD 0
+
+-# include "./memset_base64.S"
+-#endif
++#include "./memset_base64.S"
+diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
+index 9c23e482bf..6c1f0daac8 100644
+--- a/sysdeps/aarch64/multiarch/memset_generic.S
++++ b/sysdeps/aarch64/multiarch/memset_generic.S
+@@ -21,9 +21,15 @@
+
+ #if IS_IN (libc)
+ # define MEMSET __memset_generic
++
++/* Do not hide the generic version of memset, we use it internally. */
++# undef libc_hidden_builtin_def
++# define libc_hidden_builtin_def(name)
++
+ /* Add a hidden definition for use within libc.so. */
+ # ifdef SHARED
+ .globl __GI_memset; __GI_memset = __memset_generic
+ # endif
+-# include <sysdeps/aarch64/memset.S>
+ #endif
++
++#include <../memset.S>
+diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S
+index 86c46434fd..4a54373398 100644
+--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S
++++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S
+@@ -20,16 +20,13 @@
+ #include <sysdep.h>
+ #include <sysdeps/aarch64/memset-reg.h>
+
+-#if IS_IN (libc)
+-# define MEMSET __memset_kunpeng
+-
+ /* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses
+ *
+ */
+
+-ENTRY_ALIGN (MEMSET, 6)
++ENTRY (__memset_kunpeng)
+
+ PTR_ARG (0)
+ SIZE_ARG (2)
+@@ -108,6 +105,4 @@ L(set_long):
+ stp q0, q0, [dstend, -32]
+ ret
+
+-END (MEMSET)
+-libc_hidden_builtin_def (MEMSET)
+-#endif
++END (__memset_kunpeng)
+diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
+deleted file mode 100644
+index 4b035ed8b2..0000000000
+--- a/sysdeps/aarch64/multiarch/rtld-memset.S
++++ /dev/null
+@@ -1,25 +0,0 @@
+-/* Memset for aarch64, for the dynamic linker.
+- Copyright (C) 2017-2023 Free Software Foundation, Inc.
+-
+- This file is part of the GNU C Library.
+-
+- The GNU C Library is free software; you can redistribute it and/or
+- modify it under the terms of the GNU Lesser General Public
+- License as published by the Free Software Foundation; either
+- version 2.1 of the License, or (at your option) any later version.
+-
+- The GNU C Library is distributed in the hope that it will be useful,
+- but WITHOUT ANY WARRANTY; without even the implied warranty of
+- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- Lesser General Public License for more details.
+-
+- You should have received a copy of the GNU Lesser General Public
+- License along with the GNU C Library. If not, see
+- <https://www.gnu.org/licenses/>. */
+-
+-#include <sysdep.h>
+-
+-#if IS_IN (rtld)
+-# define MEMSET memset
+-# include <sysdeps/aarch64/memset.S>
+-#endif
+diff --git a/sysdeps/aarch64/multiarch/strlen.c b/sysdeps/aarch64/multiarch/strlen.c
+index bbdd3de8c4..728bd1936a 100644
+--- a/sysdeps/aarch64/multiarch/strlen.c
++++ b/sysdeps/aarch64/multiarch/strlen.c
+@@ -28,10 +28,10 @@
+
+ extern __typeof (__redirect_strlen) __strlen;
+
+-extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden;
++extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden;
+ extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden;
+
+-libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd));
++libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd));
+
+ # undef strlen
+ strong_alias (__strlen, strlen);
+diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
+index 490439491d..aee5ef9f78 100644
+--- a/sysdeps/aarch64/multiarch/strlen_asimd.S
++++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
+@@ -203,4 +203,3 @@ L(page_cross):
+ ret
+
+ END (__strlen_asimd)
+-libc_hidden_builtin_def (__strlen_asimd)
+diff --git a/sysdeps/aarch64/multiarch/strlen_mte.S b/sysdeps/aarch64/multiarch/strlen_generic.S
+similarity index 85%
+rename from sysdeps/aarch64/multiarch/strlen_mte.S
+rename to sysdeps/aarch64/multiarch/strlen_generic.S
+index 1c1220b767..2346296a18 100644
+--- a/sysdeps/aarch64/multiarch/strlen_mte.S
++++ b/sysdeps/aarch64/multiarch/strlen_generic.S
+@@ -17,14 +17,14 @@
+ <https://www.gnu.org/licenses/>. */
+
+ /* The actual strlen code is in ../strlen.S. If we are building libc this file
+- defines __strlen_mte. Otherwise the include of ../strlen.S will define
+- the normal __strlen entry points. */
++ defines __strlen_generic. Otherwise the include of ../strlen.S will define
++ the normal __strlen entry points. */
+
+ #include <sysdep.h>
+
+ #if IS_IN (libc)
+
+-# define STRLEN __strlen_mte
++# define STRLEN __strlen_generic
+
+ /* Do not hide the generic version of strlen, we use it internally. */
+ # undef libc_hidden_builtin_def
+@@ -32,7 +32,7 @@
+
+ # ifdef SHARED
+ /* It doesn't make sense to send libc-internal strlen calls through a PLT. */
+- .globl __GI_strlen; __GI_strlen = __strlen_mte
++ .globl __GI_strlen; __GI_strlen = __strlen_generic
+ # endif
+ #endif
+
+--
+2.33.0
+