summaryrefslogtreecommitdiff
path: root/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch
diff options
context:
space:
mode:
Diffstat (limited to '6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch')
-rw-r--r--6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch255
1 files changed, 255 insertions, 0 deletions
diff --git a/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch b/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch
new file mode 100644
index 0000000..0c1af11
--- /dev/null
+++ b/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch
@@ -0,0 +1,255 @@
+From 86290dbec63a9688ab0e0085ab8ab686fa256f18 Mon Sep 17 00:00:00 2001
+From: Xue Liu <liuxue@loongson.cn>
+Date: Sun, 29 Jan 2023 10:25:47 +0800
+Subject: [PATCH 6/6] LoongArch: Optimize string functions strlen, strnlen.
+
+Change-Id: I5df3398f9dbd9ea72c3de14e1e5f7793f6dbd794
+---
+ sysdeps/loongarch/lp64/strlen.S | 102 +++++++++++++++++++++++++
+ sysdeps/loongarch/lp64/strnlen.S | 125 +++++++++++++++++++++++++++++++
+ 2 files changed, 227 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/strlen.S
+ create mode 100644 sysdeps/loongarch/lp64/strnlen.S
+
+diff --git a/sysdeps/loongarch/lp64/strlen.S b/sysdeps/loongarch/lp64/strlen.S
+new file mode 100644
+index 00000000..3569598c
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/strlen.S
+@@ -0,0 +1,102 @@
++/* Optimized strlen implementation for LoongArch.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library. If not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sysdep.h>
++#include <sys/asm.h>
++
++/* size_t strlen (const char *s1); */
++LEAF(strlen)
++ .align 5
++
++ nor t4, zero, zero
++ lu12i.w a2, 0x01010
++ andi t5, a0, 0x7
++
++ li.w t7, 0x7
++ slli.d t6, t5, 0x3
++ andn t7, a0, t7
++ ld.d a1, t7, 0
++ sub.d t7, zero, t6
++ sll.d t4, t4, t7
++ maskeqz t4, t4, t6
++ srl.d a1, a1, t6
++ or a1, a1, t4
++
++
++ ori a2, a2, 0x101
++ nor t1, a1, zero
++ li.w a4, 8
++
++ bstrins.d a2, a2, 63, 32
++ sub.d a5, a4, t5
++ move t5, a0
++
++ sub.d t0, a1, a2
++ slli.d t4, a2, 7
++ nor a3, zero, t4
++ nor t1, a1, a3
++
++ and t0, t0, t1
++ bnez t0, strlen_count1
++ add.d a0, a0, a5
++strlen_loop:
++ ld.d a1, a0, 0
++ sub.d t0, a1, a2
++ and t1, t0, t4
++ bnez t1, strlen_count_pre
++ ld.d a1, a0, 8
++ sub.d t0, a1, a2
++ and t1, t0, t4
++ addi.d a0, a0, 16
++ beqz t1, strlen_loop
++strlen_count:
++ addi.d a0, a0, -8
++strlen_count_pre:
++ nor t1, a1, a3
++ and t0, t0, t1
++ beqz t0, strlen_noascii_start
++strlen_count1:
++ ctz.d t1, t0
++ sub.d v0, a0, t5
++ srli.w t1, t1, 3
++ add.d v0, v0, t1
++ jr ra
++strlen_noascii_start:
++ addi.d a0, a0, 8
++strlen_loop_noascii:
++ ld.d a1, a0, 0
++ sub.d t0, a1, a2
++ nor t1, a1, a3
++ and t0, t0, t1
++ bnez t0, strlen_count1
++ ld.d a1, a0, 8
++ sub.d t0, a1, a2
++ nor t1, a1, a3
++ and t0, t0, t1
++ addi.d a0, a0, 16
++ beqz t0, strlen_loop_noascii
++ addi.d a0, a0, -8
++ ctz.d t1, t0
++ sub.d v0, a0, t5
++ srli.w t1, t1, 3
++ add.d v0, v0, t1
++ jr ra
++END(strlen)
++
++libc_hidden_builtin_def (strlen)
++
+diff --git a/sysdeps/loongarch/lp64/strnlen.S b/sysdeps/loongarch/lp64/strnlen.S
+new file mode 100644
+index 00000000..8eaa60e2
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/strnlen.S
+@@ -0,0 +1,125 @@
++/* Optimized strlen implementation for LoongArch.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library. If not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sys/asm.h>
++
++/* rd <- if rc then ra else rb
++ a5 will be destroyed. */
++#define CONDITIONSEL(rd, ra, rb, rc)\
++ masknez a5, rb, rc;\
++ maskeqz rd, ra, rc;\
++ or rd, rd, a5
++
++/* Parameters and Results */
++#define srcin a0
++#define limit a1
++#define len v0
++
++/* Internal variable */
++#define data1 t0
++#define data2 t1
++#define has_nul1 t2
++#define has_nul2 t3
++#define src t4
++#define zeroones t5
++#define sevenf t6
++#define data2a t7
++#define tmp6 t7
++#define pos t8
++#define tmp1 a2
++#define tmp2 a3
++#define tmp3 a4
++#define tmp4 a5
++#define tmp5 a6
++#define limit_wd a7
++
++/* size_t strnlen (const char *s1,size_t maxlen); */
++LEAF(__strnlen)
++ .align 4
++ beqz limit, _hit_limit
++ lu12i.w zeroones, 0x01010
++ lu12i.w sevenf, 0x7f7f7
++ ori zeroones, zeroones, 0x101
++ ori sevenf, sevenf, 0xf7f
++ bstrins.d zeroones, zeroones, 63, 32
++ bstrins.d sevenf, sevenf, 63, 32
++ andi tmp1, srcin, 15
++ sub.d src, srcin, tmp1
++ bnez tmp1, misaligned
++ addi.d limit_wd, limit, -1
++ srli.d limit_wd, limit_wd, 4
++_loop:
++ ld.d data1, src, 0
++ ld.d data2, src, 8
++ addi.d src, src, 16
++_realigned:
++ sub.d tmp1, data1, zeroones
++ or tmp2, data1, sevenf
++ sub.d tmp3, data2, zeroones
++ or tmp4, data2, sevenf
++ andn has_nul1, tmp1, tmp2
++ andn has_nul2, tmp3, tmp4
++ addi.d limit_wd, limit_wd, -1
++ srli.d tmp1, limit_wd, 63
++ or tmp2, has_nul1, has_nul2
++ or tmp3, tmp1, tmp2
++ beqz tmp3, _loop
++ beqz tmp2, _hit_limit
++ sub.d len, src, srcin
++ beqz has_nul1, _nul_in_data2
++ move has_nul2, has_nul1
++ addi.d len, len, -8
++_nul_in_data2:
++ ctz.d pos, has_nul2
++ srli.d pos, pos, 3
++ addi.d len, len, -8
++ add.d len, len, pos
++ sltu tmp1, len, limit
++ CONDITIONSEL(len, len, limit, tmp1)
++ jr ra
++
++misaligned:
++ addi.d limit_wd, limit, -1
++ sub.d tmp4, zero, tmp1
++ andi tmp3, limit_wd, 15
++ srli.d limit_wd, limit_wd, 4
++ li.d tmp5, -1
++ ld.d data1, src, 0
++ ld.d data2, src, 8
++ addi.d src, src, 16
++ slli.d tmp4, tmp4, 3
++ add.d tmp3, tmp3, tmp1
++ srl.d tmp2, tmp5, tmp4
++ srli.d tmp3, tmp3, 4
++ add.d limit_wd, limit_wd, tmp3
++ or data1, data1, tmp2
++ or data2a, data2, tmp2
++ li.w tmp3, 9
++ sltu tmp1, tmp1, tmp3
++ CONDITIONSEL(data1, data1, tmp5, tmp1)
++ CONDITIONSEL(data2, data2, data2a, tmp1)
++ b _realigned
++
++_hit_limit:
++ move len, limit
++ jr ra
++END(__strnlen)
++
++weak_alias (__strnlen, strnlen)
++libc_hidden_def (strnlen)
++libc_hidden_def (__strnlen)
+--
+2.33.0
+