From dd99689b821162293506e0344f163b82349a9298 Mon Sep 17 00:00:00 2001 From: Xue Liu Date: Sun, 29 Jan 2023 10:22:01 +0800 Subject: [PATCH 2/6] LoongArch: Optimize string functions strchr, strchrnull. Change-Id: I8b274972642b6a1926d8fc176404bfd83344bc51 --- sysdeps/loongarch/lp64/strchr.S | 107 +++++++++++++++++++++++++++ sysdeps/loongarch/lp64/strchrnul.S | 115 +++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 sysdeps/loongarch/lp64/strchr.S create mode 100644 sysdeps/loongarch/lp64/strchrnul.S diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S new file mode 100644 index 00000000..3d64c684 --- /dev/null +++ b/sysdeps/loongarch/lp64/strchr.S @@ -0,0 +1,107 @@ +/* Optimized strchr implementation for LoongArch. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* char * strchr (const char *s1, int c); */ +LEAF(strchr) + .align 6 + + li.w t4, 0x7 + lu12i.w a2, 0x01010 + bstrins.d a1, a1, 15, 8 + andi t0, a0, 0x7 + + ori a2, a2, 0x101 + andn t4, a0, t4 + slli.w t1, t0, 3 + + ld.d t4, t4, 0 + + nor t8, zero, zero + bstrins.d a1, a1, 31, 16 + srl.d t4, t4, t1 + + bstrins.d a1, a1, 63, 32 + bstrins.d a2, a2, 63, 32 + srl.d a7, t8, t1 + + li.w t1, 8 + nor t8, a7, zero + slli.d a3, a2, 7 + or t5, t8, t4 + and t3, a7, a1 + + sub.w t1, t1, t0 + nor a3, a3, zero + xor t2, t5, t3 + sub.d a7, t5, a2 + nor a6, t5, a3 + + sub.d a5, t2, a2 + nor a4, t2, a3 + + and a6, a7, a6 + and a5, a5, a4 + or a7, a6, a5 + bnez a7, L(_mc8_a) + + add.d a0, a0, t1 +L(_aloop): + ld.d t4, a0, 0 + + xor t2, t4, a1 + sub.d a7, t4, a2 + nor a6, t4, a3 + sub.d a5, t2, a2 + + nor a4, t2, a3 + and a6, a7, a6 + and a5, a5, a4 + or a7, a6, a5 + bnez a7, L(_mc8_a) + + ld.d t4, a0, 8 + addi.d a0, a0, 16 + xor t2, t4, a1 + sub.d a7, t4, a2 + nor a6, t4, a3 + sub.d a5, t2, a2 + + nor a4, t2, a3 + and a6, a7, a6 + and a5, a5, a4 + or a7, a6, a5 + beqz a7, L(_aloop) + + addi.d a0, a0, -8 +L(_mc8_a): + + ctz.d t0, a5 + ctz.d t2, a6 + + srli.w t0, t0, 3 + srli.w t2, t2, 3 + sltu t1, t2, t0 + add.d v0, a0, t0 + masknez v0, v0, t1 + jr ra +END(strchr) + +libc_hidden_builtin_def (strchr) +weak_alias (strchr, index) diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S new file mode 100644 index 00000000..58b8b372 --- /dev/null +++ b/sysdeps/loongarch/lp64/strchrnul.S @@ -0,0 +1,115 @@ +/* Optimized strchrnul implementation for LoongArch. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +#define MOVZ(rd,rs,rt) \ + masknez t6, rs, rt;\ + maskeqz rd, rd, rt;\ + or rd, rd, t6 + +/* char *strchrnul(const char *s, int c); */ +LEAF(__strchrnul) + .align 6 + + li.w t4, 0x7 + lu12i.w a2, 0x01010 + bstrins.d a1, a1, 15, 8 + andi t0, a0, 0x7 + + ori a2, a2, 0x101 + andn t4, a0, t4 + slli.w t1, t0, 3 + ld.d t4, t4, 0 + + nor t8, zero, zero + bstrins.d a1, a1, 31, 16 + srl.d t4, t4, t1 + + preld 0, a0, 32 + bstrins.d a1, a1, 63, 32 + bstrins.d a2, a2, 63, 32 + srl.d a7, t8, t1 + + nor t8, a7, zero + slli.d a3, a2, 7 + or t5, t8, t4 + and t3, a7, a1 + + nor a3, a3, zero + xor t2, t5, t3 + sub.d a7, t5, a2 + nor a6, t5, a3 + + li.w t1, 8 + sub.d a5, t2, a2 + nor a4, t2, a3 + + and a6, a7, a6 + and a5, a5, a4 + or a7, a6, a5 + bnez a7, L(_mc8_a) + + sub.w t1, t1, t0 + add.d a0, a0, t1 +L(_aloop): + ld.d t4, a0, 0 + + xor t2, t4, a1 + sub.d a7, t4, a2 + nor a6, t4, a3 + sub.d a5, t2, a2 + + nor a4, t2, a3 + and a6, a7, a6 + and a5, a5, a4 + + or a7, a6, a5 + bnez a7, L(_mc8_a) + + ld.d t4, a0, 8 + addi.d a0, a0, 16 + + xor t2, t4, a1 + sub.d a7, t4, a2 + nor a6, t4, a3 + sub.d a5, t2, a2 + + nor a4, t2, a3 + and a6, a7, a6 + and a5, a5, a4 + + or a7, a6, a5 + beqz a7, L(_aloop) + + addi.d a0, a0, -8 +L(_mc8_a): + ctz.d t0, a5 + ctz.d t2, a6 + + srli.w t0, t0, 3 + srli.w t2, t2, 3 + slt t1, t0, t2 + + MOVZ(t0,t2,t1) + + add.d v0, a0, t0 + jr ra +END(__strchrnul) + +weak_alias(__strchrnul, strchrnul) -- 2.33.0