summaryrefslogtreecommitdiff
path: root/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch
diff options
context:
space:
mode:
Diffstat (limited to '2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch')
-rw-r--r--2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch250
1 files changed, 250 insertions, 0 deletions
diff --git a/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch b/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch
new file mode 100644
index 0000000..af11456
--- /dev/null
+++ b/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch
@@ -0,0 +1,250 @@
+From dd99689b821162293506e0344f163b82349a9298 Mon Sep 17 00:00:00 2001
+From: Xue Liu <liuxue@loongson.cn>
+Date: Sun, 29 Jan 2023 10:22:01 +0800
+Subject: [PATCH 2/6] LoongArch: Optimize string functions strchr, strchrnull.
+
+Change-Id: I8b274972642b6a1926d8fc176404bfd83344bc51
+---
+ sysdeps/loongarch/lp64/strchr.S | 107 +++++++++++++++++++++++++++
+ sysdeps/loongarch/lp64/strchrnul.S | 115 +++++++++++++++++++++++++++++
+ 2 files changed, 222 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/strchr.S
+ create mode 100644 sysdeps/loongarch/lp64/strchrnul.S
+
+diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S
+new file mode 100644
+index 00000000..3d64c684
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/strchr.S
+@@ -0,0 +1,107 @@
++/* Optimized strchr implementation for LoongArch.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library. If not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sysdep.h>
++
++/* char * strchr (const char *s1, int c); */
++LEAF(strchr)
++ .align 6
++
++ li.w t4, 0x7
++ lu12i.w a2, 0x01010
++ bstrins.d a1, a1, 15, 8
++ andi t0, a0, 0x7
++
++ ori a2, a2, 0x101
++ andn t4, a0, t4
++ slli.w t1, t0, 3
++
++ ld.d t4, t4, 0
++
++ nor t8, zero, zero
++ bstrins.d a1, a1, 31, 16
++ srl.d t4, t4, t1
++
++ bstrins.d a1, a1, 63, 32
++ bstrins.d a2, a2, 63, 32
++ srl.d a7, t8, t1
++
++ li.w t1, 8
++ nor t8, a7, zero
++ slli.d a3, a2, 7
++ or t5, t8, t4
++ and t3, a7, a1
++
++ sub.w t1, t1, t0
++ nor a3, a3, zero
++ xor t2, t5, t3
++ sub.d a7, t5, a2
++ nor a6, t5, a3
++
++ sub.d a5, t2, a2
++ nor a4, t2, a3
++
++ and a6, a7, a6
++ and a5, a5, a4
++ or a7, a6, a5
++ bnez a7, L(_mc8_a)
++
++ add.d a0, a0, t1
++L(_aloop):
++ ld.d t4, a0, 0
++
++ xor t2, t4, a1
++ sub.d a7, t4, a2
++ nor a6, t4, a3
++ sub.d a5, t2, a2
++
++ nor a4, t2, a3
++ and a6, a7, a6
++ and a5, a5, a4
++ or a7, a6, a5
++ bnez a7, L(_mc8_a)
++
++ ld.d t4, a0, 8
++ addi.d a0, a0, 16
++ xor t2, t4, a1
++ sub.d a7, t4, a2
++ nor a6, t4, a3
++ sub.d a5, t2, a2
++
++ nor a4, t2, a3
++ and a6, a7, a6
++ and a5, a5, a4
++ or a7, a6, a5
++ beqz a7, L(_aloop)
++
++ addi.d a0, a0, -8
++L(_mc8_a):
++
++ ctz.d t0, a5
++ ctz.d t2, a6
++
++ srli.w t0, t0, 3
++ srli.w t2, t2, 3
++ sltu t1, t2, t0
++ add.d v0, a0, t0
++ masknez v0, v0, t1
++ jr ra
++END(strchr)
++
++libc_hidden_builtin_def (strchr)
++weak_alias (strchr, index)
+diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S
+new file mode 100644
+index 00000000..58b8b372
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/strchrnul.S
+@@ -0,0 +1,115 @@
++/* Optimized strchrnul implementation for LoongArch.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library. If not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sysdep.h>
++
++#define MOVZ(rd,rs,rt) \
++ masknez t6, rs, rt;\
++ maskeqz rd, rd, rt;\
++ or rd, rd, t6
++
++/* char *strchrnul(const char *s, int c); */
++LEAF(__strchrnul)
++ .align 6
++
++ li.w t4, 0x7
++ lu12i.w a2, 0x01010
++ bstrins.d a1, a1, 15, 8
++ andi t0, a0, 0x7
++
++ ori a2, a2, 0x101
++ andn t4, a0, t4
++ slli.w t1, t0, 3
++ ld.d t4, t4, 0
++
++ nor t8, zero, zero
++ bstrins.d a1, a1, 31, 16
++ srl.d t4, t4, t1
++
++ preld 0, a0, 32
++ bstrins.d a1, a1, 63, 32
++ bstrins.d a2, a2, 63, 32
++ srl.d a7, t8, t1
++
++ nor t8, a7, zero
++ slli.d a3, a2, 7
++ or t5, t8, t4
++ and t3, a7, a1
++
++ nor a3, a3, zero
++ xor t2, t5, t3
++ sub.d a7, t5, a2
++ nor a6, t5, a3
++
++ li.w t1, 8
++ sub.d a5, t2, a2
++ nor a4, t2, a3
++
++ and a6, a7, a6
++ and a5, a5, a4
++ or a7, a6, a5
++ bnez a7, L(_mc8_a)
++
++ sub.w t1, t1, t0
++ add.d a0, a0, t1
++L(_aloop):
++ ld.d t4, a0, 0
++
++ xor t2, t4, a1
++ sub.d a7, t4, a2
++ nor a6, t4, a3
++ sub.d a5, t2, a2
++
++ nor a4, t2, a3
++ and a6, a7, a6
++ and a5, a5, a4
++
++ or a7, a6, a5
++ bnez a7, L(_mc8_a)
++
++ ld.d t4, a0, 8
++ addi.d a0, a0, 16
++
++ xor t2, t4, a1
++ sub.d a7, t4, a2
++ nor a6, t4, a3
++ sub.d a5, t2, a2
++
++ nor a4, t2, a3
++ and a6, a7, a6
++ and a5, a5, a4
++
++ or a7, a6, a5
++ beqz a7, L(_aloop)
++
++ addi.d a0, a0, -8
++L(_mc8_a):
++ ctz.d t0, a5
++ ctz.d t2, a6
++
++ srli.w t0, t0, 3
++ srli.w t2, t2, 3
++ slt t1, t0, t2
++
++ MOVZ(t0,t2,t1)
++
++ add.d v0, a0, t0
++ jr ra
++END(__strchrnul)
++
++weak_alias(__strchrnul, strchrnul)
+--
+2.33.0
+