diff options
author | CoprDistGit <infra@openeuler.org> | 2024-08-03 06:28:41 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-08-03 06:28:41 +0000 |
commit | d20db0561a6a36f914fde030512503b114ef9a0c (patch) | |
tree | d4e5e3494d95c269a1cee6195f11bf3201bcadbf /6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch | |
parent | 016343d99b1b269d7246ef1e143d4b54914433d4 (diff) |
automatic import of glibcopeneuler22.03_LTS_SP4openeuler22.03_LTS_SP3openeuler20.03
Diffstat (limited to '6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch')
-rw-r--r-- | 6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch b/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch new file mode 100644 index 0000000..0c1af11 --- /dev/null +++ b/6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch @@ -0,0 +1,255 @@ +From 86290dbec63a9688ab0e0085ab8ab686fa256f18 Mon Sep 17 00:00:00 2001 +From: Xue Liu <liuxue@loongson.cn> +Date: Sun, 29 Jan 2023 10:25:47 +0800 +Subject: [PATCH 6/6] LoongArch: Optimize string functions strlen, strnlen. + +Change-Id: I5df3398f9dbd9ea72c3de14e1e5f7793f6dbd794 +--- + sysdeps/loongarch/lp64/strlen.S | 102 +++++++++++++++++++++++++ + sysdeps/loongarch/lp64/strnlen.S | 125 +++++++++++++++++++++++++++++++ + 2 files changed, 227 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/strlen.S + create mode 100644 sysdeps/loongarch/lp64/strnlen.S + +diff --git a/sysdeps/loongarch/lp64/strlen.S b/sysdeps/loongarch/lp64/strlen.S +new file mode 100644 +index 00000000..3569598c +--- /dev/null ++++ b/sysdeps/loongarch/lp64/strlen.S +@@ -0,0 +1,102 @@ ++/* Optimized strlen implementation for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/asm.h> ++ ++/* size_t strlen (const char *s1); */ ++LEAF(strlen) ++ .align 5 ++ ++ nor t4, zero, zero ++ lu12i.w a2, 0x01010 ++ andi t5, a0, 0x7 ++ ++ li.w t7, 0x7 ++ slli.d t6, t5, 0x3 ++ andn t7, a0, t7 ++ ld.d a1, t7, 0 ++ sub.d t7, zero, t6 ++ sll.d t4, t4, t7 ++ maskeqz t4, t4, t6 ++ srl.d a1, a1, t6 ++ or a1, a1, t4 ++ ++ ++ ori a2, a2, 0x101 ++ nor t1, a1, zero ++ li.w a4, 8 ++ ++ bstrins.d a2, a2, 63, 32 ++ sub.d a5, a4, t5 ++ move t5, a0 ++ ++ sub.d t0, a1, a2 ++ slli.d t4, a2, 7 ++ nor a3, zero, t4 ++ nor t1, a1, a3 ++ ++ and t0, t0, t1 ++ bnez t0, strlen_count1 ++ add.d a0, a0, a5 ++strlen_loop: ++ ld.d a1, a0, 0 ++ sub.d t0, a1, a2 ++ and t1, t0, t4 ++ bnez t1, strlen_count_pre ++ ld.d a1, a0, 8 ++ sub.d t0, a1, a2 ++ and t1, t0, t4 ++ addi.d a0, a0, 16 ++ beqz t1, strlen_loop ++strlen_count: ++ addi.d a0, a0, -8 ++strlen_count_pre: ++ nor t1, a1, a3 ++ and t0, t0, t1 ++ beqz t0, strlen_noascii_start ++strlen_count1: ++ ctz.d t1, t0 ++ sub.d v0, a0, t5 ++ srli.w t1, t1, 3 ++ add.d v0, v0, t1 ++ jr ra ++strlen_noascii_start: ++ addi.d a0, a0, 8 ++strlen_loop_noascii: ++ ld.d a1, a0, 0 ++ sub.d t0, a1, a2 ++ nor t1, a1, a3 ++ and t0, t0, t1 ++ bnez t0, strlen_count1 ++ ld.d a1, a0, 8 ++ sub.d t0, a1, a2 ++ nor t1, a1, a3 ++ and t0, t0, t1 ++ addi.d a0, a0, 16 ++ beqz t0, strlen_loop_noascii ++ addi.d a0, a0, -8 ++ ctz.d t1, t0 ++ sub.d v0, a0, t5 ++ srli.w t1, t1, 3 ++ add.d v0, v0, t1 ++ jr ra ++END(strlen) ++ ++libc_hidden_builtin_def (strlen) ++ +diff --git a/sysdeps/loongarch/lp64/strnlen.S b/sysdeps/loongarch/lp64/strnlen.S +new file mode 100644 +index 00000000..8eaa60e2 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/strnlen.S +@@ -0,0 +1,125 @@ ++/* Optimized strlen implementation for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sys/asm.h> ++ ++/* rd <- if rc then ra else rb ++ a5 will be destroyed. */ ++#define CONDITIONSEL(rd, ra, rb, rc)\ ++ masknez a5, rb, rc;\ ++ maskeqz rd, ra, rc;\ ++ or rd, rd, a5 ++ ++/* Parameters and Results */ ++#define srcin a0 ++#define limit a1 ++#define len v0 ++ ++/* Internal variable */ ++#define data1 t0 ++#define data2 t1 ++#define has_nul1 t2 ++#define has_nul2 t3 ++#define src t4 ++#define zeroones t5 ++#define sevenf t6 ++#define data2a t7 ++#define tmp6 t7 ++#define pos t8 ++#define tmp1 a2 ++#define tmp2 a3 ++#define tmp3 a4 ++#define tmp4 a5 ++#define tmp5 a6 ++#define limit_wd a7 ++ ++/* size_t strnlen (const char *s1,size_t maxlen); */ ++LEAF(__strnlen) ++ .align 4 ++ beqz limit, _hit_limit ++ lu12i.w zeroones, 0x01010 ++ lu12i.w sevenf, 0x7f7f7 ++ ori zeroones, zeroones, 0x101 ++ ori sevenf, sevenf, 0xf7f ++ bstrins.d zeroones, zeroones, 63, 32 ++ bstrins.d sevenf, sevenf, 63, 32 ++ andi tmp1, srcin, 15 ++ sub.d src, srcin, tmp1 ++ bnez tmp1, misaligned ++ addi.d limit_wd, limit, -1 ++ srli.d limit_wd, limit_wd, 4 ++_loop: ++ ld.d data1, src, 0 ++ ld.d data2, src, 8 ++ addi.d src, src, 16 ++_realigned: ++ sub.d tmp1, data1, zeroones ++ or tmp2, data1, sevenf ++ sub.d tmp3, data2, zeroones ++ or tmp4, data2, sevenf ++ andn has_nul1, tmp1, tmp2 ++ andn has_nul2, tmp3, tmp4 ++ addi.d limit_wd, limit_wd, -1 ++ srli.d tmp1, limit_wd, 63 ++ or tmp2, has_nul1, has_nul2 ++ or tmp3, tmp1, tmp2 ++ beqz tmp3, _loop ++ beqz tmp2, _hit_limit ++ sub.d len, src, srcin ++ beqz has_nul1, _nul_in_data2 ++ move has_nul2, has_nul1 ++ addi.d len, len, -8 ++_nul_in_data2: ++ ctz.d pos, has_nul2 ++ srli.d pos, pos, 3 ++ addi.d len, len, -8 ++ add.d len, len, pos ++ sltu tmp1, len, limit ++ CONDITIONSEL(len, len, limit, tmp1) ++ jr ra ++ ++misaligned: ++ addi.d limit_wd, limit, -1 ++ sub.d tmp4, zero, tmp1 ++ andi tmp3, limit_wd, 15 ++ srli.d limit_wd, limit_wd, 4 ++ li.d tmp5, -1 ++ ld.d data1, src, 0 ++ ld.d data2, src, 8 ++ addi.d src, src, 16 ++ slli.d tmp4, tmp4, 3 ++ add.d tmp3, tmp3, tmp1 ++ srl.d tmp2, tmp5, tmp4 ++ srli.d tmp3, tmp3, 4 ++ add.d limit_wd, limit_wd, tmp3 ++ or data1, data1, tmp2 ++ or data2a, data2, tmp2 ++ li.w tmp3, 9 ++ sltu tmp1, tmp1, tmp3 ++ CONDITIONSEL(data1, data1, tmp5, tmp1) ++ CONDITIONSEL(data2, data2, data2a, tmp1) ++ b _realigned ++ ++_hit_limit: ++ move len, limit ++ jr ra ++END(__strnlen) ++ ++weak_alias (__strnlen, strnlen) ++libc_hidden_def (strnlen) ++libc_hidden_def (__strnlen) +-- +2.33.0 + |