From 86290dbec63a9688ab0e0085ab8ab686fa256f18 Mon Sep 17 00:00:00 2001 From: Xue Liu Date: Sun, 29 Jan 2023 10:25:47 +0800 Subject: [PATCH 6/6] LoongArch: Optimize string functions strlen, strnlen. Change-Id: I5df3398f9dbd9ea72c3de14e1e5f7793f6dbd794 --- sysdeps/loongarch/lp64/strlen.S | 102 +++++++++++++++++++++++++ sysdeps/loongarch/lp64/strnlen.S | 125 +++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 sysdeps/loongarch/lp64/strlen.S create mode 100644 sysdeps/loongarch/lp64/strnlen.S diff --git a/sysdeps/loongarch/lp64/strlen.S b/sysdeps/loongarch/lp64/strlen.S new file mode 100644 index 00000000..3569598c --- /dev/null +++ b/sysdeps/loongarch/lp64/strlen.S @@ -0,0 +1,102 @@ +/* Optimized strlen implementation for LoongArch. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include + +/* size_t strlen (const char *s1); */ +LEAF(strlen) + .align 5 + + nor t4, zero, zero + lu12i.w a2, 0x01010 + andi t5, a0, 0x7 + + li.w t7, 0x7 + slli.d t6, t5, 0x3 + andn t7, a0, t7 + ld.d a1, t7, 0 + sub.d t7, zero, t6 + sll.d t4, t4, t7 + maskeqz t4, t4, t6 + srl.d a1, a1, t6 + or a1, a1, t4 + + + ori a2, a2, 0x101 + nor t1, a1, zero + li.w a4, 8 + + bstrins.d a2, a2, 63, 32 + sub.d a5, a4, t5 + move t5, a0 + + sub.d t0, a1, a2 + slli.d t4, a2, 7 + nor a3, zero, t4 + nor t1, a1, a3 + + and t0, t0, t1 + bnez t0, strlen_count1 + add.d a0, a0, a5 +strlen_loop: + ld.d a1, a0, 0 + sub.d t0, a1, a2 + and t1, t0, t4 + bnez t1, strlen_count_pre + ld.d a1, a0, 8 + sub.d t0, a1, a2 + and t1, t0, t4 + addi.d a0, a0, 16 + beqz t1, strlen_loop +strlen_count: + addi.d a0, a0, -8 +strlen_count_pre: + nor t1, a1, a3 + and t0, t0, t1 + beqz t0, strlen_noascii_start +strlen_count1: + ctz.d t1, t0 + sub.d v0, a0, t5 + srli.w t1, t1, 3 + add.d v0, v0, t1 + jr ra +strlen_noascii_start: + addi.d a0, a0, 8 +strlen_loop_noascii: + ld.d a1, a0, 0 + sub.d t0, a1, a2 + nor t1, a1, a3 + and t0, t0, t1 + bnez t0, strlen_count1 + ld.d a1, a0, 8 + sub.d t0, a1, a2 + nor t1, a1, a3 + and t0, t0, t1 + addi.d a0, a0, 16 + beqz t0, strlen_loop_noascii + addi.d a0, a0, -8 + ctz.d t1, t0 + sub.d v0, a0, t5 + srli.w t1, t1, 3 + add.d v0, v0, t1 + jr ra +END(strlen) + +libc_hidden_builtin_def (strlen) + diff --git a/sysdeps/loongarch/lp64/strnlen.S b/sysdeps/loongarch/lp64/strnlen.S new file mode 100644 index 00000000..8eaa60e2 --- /dev/null +++ b/sysdeps/loongarch/lp64/strnlen.S @@ -0,0 +1,125 @@ +/* Optimized strlen implementation for LoongArch. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* rd <- if rc then ra else rb + a5 will be destroyed. */ +#define CONDITIONSEL(rd, ra, rb, rc)\ + masknez a5, rb, rc;\ + maskeqz rd, ra, rc;\ + or rd, rd, a5 + +/* Parameters and Results */ +#define srcin a0 +#define limit a1 +#define len v0 + +/* Internal variable */ +#define data1 t0 +#define data2 t1 +#define has_nul1 t2 +#define has_nul2 t3 +#define src t4 +#define zeroones t5 +#define sevenf t6 +#define data2a t7 +#define tmp6 t7 +#define pos t8 +#define tmp1 a2 +#define tmp2 a3 +#define tmp3 a4 +#define tmp4 a5 +#define tmp5 a6 +#define limit_wd a7 + +/* size_t strnlen (const char *s1,size_t maxlen); */ +LEAF(__strnlen) + .align 4 + beqz limit, _hit_limit + lu12i.w zeroones, 0x01010 + lu12i.w sevenf, 0x7f7f7 + ori zeroones, zeroones, 0x101 + ori sevenf, sevenf, 0xf7f + bstrins.d zeroones, zeroones, 63, 32 + bstrins.d sevenf, sevenf, 63, 32 + andi tmp1, srcin, 15 + sub.d src, srcin, tmp1 + bnez tmp1, misaligned + addi.d limit_wd, limit, -1 + srli.d limit_wd, limit_wd, 4 +_loop: + ld.d data1, src, 0 + ld.d data2, src, 8 + addi.d src, src, 16 +_realigned: + sub.d tmp1, data1, zeroones + or tmp2, data1, sevenf + sub.d tmp3, data2, zeroones + or tmp4, data2, sevenf + andn has_nul1, tmp1, tmp2 + andn has_nul2, tmp3, tmp4 + addi.d limit_wd, limit_wd, -1 + srli.d tmp1, limit_wd, 63 + or tmp2, has_nul1, has_nul2 + or tmp3, tmp1, tmp2 + beqz tmp3, _loop + beqz tmp2, _hit_limit + sub.d len, src, srcin + beqz has_nul1, _nul_in_data2 + move has_nul2, has_nul1 + addi.d len, len, -8 +_nul_in_data2: + ctz.d pos, has_nul2 + srli.d pos, pos, 3 + addi.d len, len, -8 + add.d len, len, pos + sltu tmp1, len, limit + CONDITIONSEL(len, len, limit, tmp1) + jr ra + +misaligned: + addi.d limit_wd, limit, -1 + sub.d tmp4, zero, tmp1 + andi tmp3, limit_wd, 15 + srli.d limit_wd, limit_wd, 4 + li.d tmp5, -1 + ld.d data1, src, 0 + ld.d data2, src, 8 + addi.d src, src, 16 + slli.d tmp4, tmp4, 3 + add.d tmp3, tmp3, tmp1 + srl.d tmp2, tmp5, tmp4 + srli.d tmp3, tmp3, 4 + add.d limit_wd, limit_wd, tmp3 + or data1, data1, tmp2 + or data2a, data2, tmp2 + li.w tmp3, 9 + sltu tmp1, tmp1, tmp3 + CONDITIONSEL(data1, data1, tmp5, tmp1) + CONDITIONSEL(data2, data2, data2a, tmp1) + b _realigned + +_hit_limit: + move len, limit + jr ra +END(__strnlen) + +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) +libc_hidden_def (__strnlen) -- 2.33.0