diff options
Diffstat (limited to '2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch')
-rw-r--r-- | 2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch b/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch new file mode 100644 index 0000000..af11456 --- /dev/null +++ b/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch @@ -0,0 +1,250 @@ +From dd99689b821162293506e0344f163b82349a9298 Mon Sep 17 00:00:00 2001 +From: Xue Liu <liuxue@loongson.cn> +Date: Sun, 29 Jan 2023 10:22:01 +0800 +Subject: [PATCH 2/6] LoongArch: Optimize string functions strchr, strchrnull. + +Change-Id: I8b274972642b6a1926d8fc176404bfd83344bc51 +--- + sysdeps/loongarch/lp64/strchr.S | 107 +++++++++++++++++++++++++++ + sysdeps/loongarch/lp64/strchrnul.S | 115 +++++++++++++++++++++++++++++ + 2 files changed, 222 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/strchr.S + create mode 100644 sysdeps/loongarch/lp64/strchrnul.S + +diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S +new file mode 100644 +index 00000000..3d64c684 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/strchr.S +@@ -0,0 +1,107 @@ ++/* Optimized strchr implementation for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++ ++/* char * strchr (const char *s1, int c); */ ++LEAF(strchr) ++ .align 6 ++ ++ li.w t4, 0x7 ++ lu12i.w a2, 0x01010 ++ bstrins.d a1, a1, 15, 8 ++ andi t0, a0, 0x7 ++ ++ ori a2, a2, 0x101 ++ andn t4, a0, t4 ++ slli.w t1, t0, 3 ++ ++ ld.d t4, t4, 0 ++ ++ nor t8, zero, zero ++ bstrins.d a1, a1, 31, 16 ++ srl.d t4, t4, t1 ++ ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a2, a2, 63, 32 ++ srl.d a7, t8, t1 ++ ++ li.w t1, 8 ++ nor t8, a7, zero ++ slli.d a3, a2, 7 ++ or t5, t8, t4 ++ and t3, a7, a1 ++ ++ sub.w t1, t1, t0 ++ nor a3, a3, zero ++ xor t2, t5, t3 ++ sub.d a7, t5, a2 ++ nor a6, t5, a3 ++ ++ sub.d a5, t2, a2 ++ nor a4, t2, a3 ++ ++ and a6, a7, a6 ++ and a5, a5, a4 ++ or a7, a6, a5 ++ bnez a7, L(_mc8_a) ++ ++ add.d a0, a0, t1 ++L(_aloop): ++ ld.d t4, a0, 0 ++ ++ xor t2, t4, a1 ++ sub.d a7, t4, a2 ++ nor a6, t4, a3 ++ sub.d a5, t2, a2 ++ ++ nor a4, t2, a3 ++ and a6, a7, a6 ++ and a5, a5, a4 ++ or a7, a6, a5 ++ bnez a7, L(_mc8_a) ++ ++ ld.d t4, a0, 8 ++ addi.d a0, a0, 16 ++ xor t2, t4, a1 ++ sub.d a7, t4, a2 ++ nor a6, t4, a3 ++ sub.d a5, t2, a2 ++ ++ nor a4, t2, a3 ++ and a6, a7, a6 ++ and a5, a5, a4 ++ or a7, a6, a5 ++ beqz a7, L(_aloop) ++ ++ addi.d a0, a0, -8 ++L(_mc8_a): ++ ++ ctz.d t0, a5 ++ ctz.d t2, a6 ++ ++ srli.w t0, t0, 3 ++ srli.w t2, t2, 3 ++ sltu t1, t2, t0 ++ add.d v0, a0, t0 ++ masknez v0, v0, t1 ++ jr ra ++END(strchr) ++ ++libc_hidden_builtin_def (strchr) ++weak_alias (strchr, index) +diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S +new file mode 100644 +index 00000000..58b8b372 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/strchrnul.S +@@ -0,0 +1,115 @@ ++/* Optimized strchrnul implementation for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++ ++#define MOVZ(rd,rs,rt) \ ++ masknez t6, rs, rt;\ ++ maskeqz rd, rd, rt;\ ++ or rd, rd, t6 ++ ++/* char *strchrnul(const char *s, int c); */ ++LEAF(__strchrnul) ++ .align 6 ++ ++ li.w t4, 0x7 ++ lu12i.w a2, 0x01010 ++ bstrins.d a1, a1, 15, 8 ++ andi t0, a0, 0x7 ++ ++ ori a2, a2, 0x101 ++ andn t4, a0, t4 ++ slli.w t1, t0, 3 ++ ld.d t4, t4, 0 ++ ++ nor t8, zero, zero ++ bstrins.d a1, a1, 31, 16 ++ srl.d t4, t4, t1 ++ ++ preld 0, a0, 32 ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a2, a2, 63, 32 ++ srl.d a7, t8, t1 ++ ++ nor t8, a7, zero ++ slli.d a3, a2, 7 ++ or t5, t8, t4 ++ and t3, a7, a1 ++ ++ nor a3, a3, zero ++ xor t2, t5, t3 ++ sub.d a7, t5, a2 ++ nor a6, t5, a3 ++ ++ li.w t1, 8 ++ sub.d a5, t2, a2 ++ nor a4, t2, a3 ++ ++ and a6, a7, a6 ++ and a5, a5, a4 ++ or a7, a6, a5 ++ bnez a7, L(_mc8_a) ++ ++ sub.w t1, t1, t0 ++ add.d a0, a0, t1 ++L(_aloop): ++ ld.d t4, a0, 0 ++ ++ xor t2, t4, a1 ++ sub.d a7, t4, a2 ++ nor a6, t4, a3 ++ sub.d a5, t2, a2 ++ ++ nor a4, t2, a3 ++ and a6, a7, a6 ++ and a5, a5, a4 ++ ++ or a7, a6, a5 ++ bnez a7, L(_mc8_a) ++ ++ ld.d t4, a0, 8 ++ addi.d a0, a0, 16 ++ ++ xor t2, t4, a1 ++ sub.d a7, t4, a2 ++ nor a6, t4, a3 ++ sub.d a5, t2, a2 ++ ++ nor a4, t2, a3 ++ and a6, a7, a6 ++ and a5, a5, a4 ++ ++ or a7, a6, a5 ++ beqz a7, L(_aloop) ++ ++ addi.d a0, a0, -8 ++L(_mc8_a): ++ ctz.d t0, a5 ++ ctz.d t2, a6 ++ ++ srli.w t0, t0, 3 ++ srli.w t2, t2, 3 ++ slt t1, t0, t2 ++ ++ MOVZ(t0,t2,t1) ++ ++ add.d v0, a0, t0 ++ jr ra ++END(__strchrnul) ++ ++weak_alias(__strchrnul, strchrnul) +-- +2.33.0 + |