diff options
author | CoprDistGit <infra@openeuler.org> | 2024-08-03 06:28:41 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-08-03 06:28:41 +0000 |
commit | d20db0561a6a36f914fde030512503b114ef9a0c (patch) | |
tree | d4e5e3494d95c269a1cee6195f11bf3201bcadbf /4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch | |
parent | 016343d99b1b269d7246ef1e143d4b54914433d4 (diff) |
automatic import of glibcopeneuler22.03_LTS_SP4openeuler22.03_LTS_SP3openeuler20.03
Diffstat (limited to '4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch')
-rw-r--r-- | 4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch b/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch new file mode 100644 index 0000000..5e0ce7d --- /dev/null +++ b/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch @@ -0,0 +1,414 @@ +From 3f3b70e39a529369e4b2936f35034215a45436a3 Mon Sep 17 00:00:00 2001 +From: Xue Liu <liuxue@loongson.cn> +Date: Sun, 29 Jan 2023 10:23:50 +0800 +Subject: [PATCH 4/6] LoongArch: Optimize string functions strcmp, strncmp. + +Change-Id: I436138a312e8ebb668223cafef84fd74dcde72fd +--- + sysdeps/loongarch/lp64/strcmp.S | 161 ++++++++++++++++++++++ + sysdeps/loongarch/lp64/strncmp.S | 225 +++++++++++++++++++++++++++++++ + 2 files changed, 386 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/strcmp.S + create mode 100644 sysdeps/loongarch/lp64/strncmp.S + +diff --git a/sysdeps/loongarch/lp64/strcmp.S b/sysdeps/loongarch/lp64/strcmp.S +new file mode 100644 +index 00000000..0f7a6d55 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/strcmp.S +@@ -0,0 +1,161 @@ ++/* Optimized strcmp implementation for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sys/asm.h> ++ ++/* Parameters and Results */ ++#define src1 a0 ++#define src2 a1 ++#define result v0 ++ ++/* Internal variable */ ++#define src1_off a2 ++#define src2_off a3 ++#define data1 t0 ++#define data2 t1 ++#define has_nul t2 ++#define diff t3 ++#define syndrome t4 ++#define zeroones t5 ++#define sevenf t6 ++#define pos t7 ++#define exchange t8 ++#define tmp1 a4 ++#define tmp2 a5 ++#define tmp3 a6 ++#define tmp4 a7 ++ ++/* rd <- if rc then ra else rb ++ tmp3 will be destroyed */ ++#define CONDITIONSEL(rd, rc, ra, rb)\ ++ masknez tmp3, rb, rc;\ ++ maskeqz rd, ra, rc;\ ++ or rd, rd, tmp3 ++ ++LEAF(strcmp) ++ .align 4 ++ ++ xor tmp1, src1, src2 ++ lu12i.w zeroones, 0x01010 ++ lu12i.w sevenf, 0x7f7f7 ++ andi src1_off, src1, 0x7 ++ ori zeroones, zeroones, 0x101 ++ ori sevenf, sevenf, 0xf7f ++ andi tmp1, tmp1, 0x7 ++ bstrins.d zeroones, zeroones, 63, 32 ++ bstrins.d sevenf, sevenf, 63, 32 ++ bnez tmp1, strcmp_misaligned8 ++ bnez src1_off, strcmp_mutual_align ++strcmp_loop_aligned: ++ ld.d data1, src1, 0 ++ addi.d src1, src1, 8 ++ ld.d data2, src2, 0 ++ addi.d src2, src2, 8 ++strcmp_start_realigned: ++ sub.d tmp1, data1, zeroones ++ or tmp2, data1, sevenf ++ xor diff, data1, data2 ++ andn has_nul, tmp1, tmp2 ++ or syndrome, diff, has_nul ++ beqz syndrome, strcmp_loop_aligned ++ ++strcmp_end: ++ ctz.d pos, syndrome ++ bstrins.d pos, zero, 2, 0 ++ srl.d data1, data1, pos ++ srl.d data2, data2, pos ++ andi data1, data1, 0xff ++ andi data2, data2, 0xff ++ sub.d result, data1, data2 ++ jr ra ++strcmp_mutual_align: ++ bstrins.d src1, zero, 2, 0 ++ bstrins.d src2, zero, 2, 0 ++ slli.d tmp1, src1_off, 0x3 ++ ld.d data1, src1, 0 ++ sub.d tmp1, zero, tmp1 ++ ld.d data2, src2, 0 ++ addi.d src1, src1, 8 ++ addi.d src2, src2, 8 ++ nor tmp2, zero, zero ++ srl.d tmp2, tmp2, tmp1 ++ or data1, data1, tmp2 ++ or data2, data2, tmp2 ++ b strcmp_start_realigned ++ ++strcmp_misaligned8: ++ /* check ++ if ((src1 != 0) && ((src2 == 0 ) || (src1 < src2))) ++ then exchange(src1,src2). */ ++ andi src2_off, src2, 0x7 ++ slt tmp2, src1_off, src2_off ++ CONDITIONSEL(tmp2, src2_off, tmp2, tmp1) ++ maskeqz exchange, tmp2, src1_off ++ xor tmp3, src1, src2 ++ maskeqz tmp3, tmp3, exchange ++ xor src1, src1, tmp3 ++ xor src2, src2, tmp3 ++ ++ andi src1_off, src1, 0x7 ++ beqz src1_off, strcmp_loop_misaligned ++strcmp_do_misaligned: ++ ld.bu data1, src1, 0 ++ ld.bu data2, src2, 0 ++ xor tmp3, data1, data2 ++ addi.d src1, src1, 1 ++ masknez tmp3, data1, tmp3 ++ addi.d src2, src2, 1 ++ beqz tmp3, strcmp_done ++ andi src1_off, src1, 0x7 ++ bnez src1_off, strcmp_do_misaligned ++ ++strcmp_loop_misaligned: ++ andi tmp1, src2, 0xff8 ++ xori tmp1, tmp1, 0xff8 ++ beqz tmp1, strcmp_do_misaligned ++ ld.d data1, src1, 0 ++ ld.d data2, src2, 0 ++ addi.d src1, src1, 8 ++ addi.d src2, src2, 8 ++ ++ sub.d tmp1, data1, zeroones ++ or tmp2, data1, sevenf ++ xor diff, data1, data2 ++ andn has_nul, tmp1, tmp2 ++ or syndrome, diff, has_nul ++ beqz syndrome, strcmp_loop_misaligned ++strcmp_misalign_end: ++ ctz.d pos, syndrome ++ bstrins.d pos, zero, 2, 0 ++ srl.d data1, data1, pos ++ srl.d data2, data2, pos ++ andi data1, data1, 0xff ++ andi data2, data2, 0xff ++ sub.d tmp1, data1, data2 ++ sub.d tmp2, data2, data1 ++ CONDITIONSEL(result, exchange, tmp2, tmp1) ++ jr ra ++ ++strcmp_done: ++ sub.d tmp1, data1, data2 ++ sub.d tmp2, data2, data1 ++ CONDITIONSEL(result, exchange, tmp2, tmp1) ++ jr ra ++END(strcmp) ++ ++libc_hidden_builtin_def (strcmp) +diff --git a/sysdeps/loongarch/lp64/strncmp.S b/sysdeps/loongarch/lp64/strncmp.S +new file mode 100644 +index 00000000..979ea40a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/strncmp.S +@@ -0,0 +1,225 @@ ++/* Optimized strncmp implementation for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sys/asm.h> ++ ++/* Parameters and Results */ ++#define src1 a0 ++#define src2 a1 ++#define limit a2 ++#define result v0 ++ ++ ++/* Internal variable */ ++#define data1 t0 ++#define data2 t1 ++#define has_nul t2 ++#define diff t3 ++#define syndrome t4 ++#define zeroones t5 ++#define sevenf t6 ++#define pos t7 ++#define exchange t8 ++#define tmp1 a5 ++#define tmp2 a6 ++#define tmp3 a7 ++#define src1_off a3 ++#define limit_wd a4 ++ ++LEAF(strncmp) ++ .align 4 ++ beqz limit, strncmp_ret0 ++ ++ xor tmp1, src1, src2 ++ lu12i.w zeroones, 0x01010 ++ lu12i.w sevenf, 0x7f7f7 ++ andi src1_off, src1, 0x7 ++ ori zeroones, zeroones, 0x101 ++ andi tmp1, tmp1, 0x7 ++ ori sevenf, sevenf, 0xf7f ++ bstrins.d zeroones, zeroones, 63, 32 ++ bstrins.d sevenf, sevenf, 63, 32 ++ bnez tmp1, strncmp_misaligned8 ++ bnez src1_off, strncmp_mutual_align ++ addi.d limit_wd, limit, -1 ++ srli.d limit_wd, limit_wd, 3 ++ ++strncmp_loop_aligned: ++ ld.d data1, src1, 0 ++ addi.d src1, src1, 8 ++ ld.d data2, src2, 0 ++ addi.d src2, src2, 8 ++strncmp_start_realigned: ++ addi.d limit_wd, limit_wd, -1 ++ sub.d tmp1, data1, zeroones ++ or tmp2, data1, sevenf ++ xor diff, data1, data2 ++ andn has_nul, tmp1, tmp2 ++ srli.d tmp1, limit_wd, 63 ++ or syndrome, diff, has_nul ++ or tmp2, syndrome, tmp1 ++ beqz tmp2, strncmp_loop_aligned ++ ++ /* if not reach limit */ ++ bge limit_wd, zero, strncmp_not_limit ++ /* if reach limit */ ++ andi limit, limit, 0x7 ++ li.w tmp1, 0x8 ++ sub.d limit, tmp1, limit ++ slli.d limit, limit, 0x3 ++ li.d tmp1, -1 ++ srl.d tmp1, tmp1, limit ++ and data1, data1, tmp1 ++ and data2, data2, tmp1 ++ orn syndrome, syndrome, tmp1 ++ ++ ++strncmp_not_limit: ++ ctz.d pos, syndrome ++ bstrins.d pos, zero, 2, 0 ++ srl.d data1, data1, pos ++ srl.d data2, data2, pos ++ andi data1, data1, 0xff ++ andi data2, data2, 0xff ++ sub.d result, data1, data2 ++ jr ra ++ ++ ++ ++strncmp_mutual_align: ++ bstrins.d src1, zero, 2, 0 ++ bstrins.d src2, zero, 2, 0 ++ slli.d tmp1, src1_off, 0x3 ++ ld.d data1, src1, 0 ++ ld.d data2, src2, 0 ++ addi.d src2, src2, 8 ++ addi.d src1, src1, 8 ++ ++ addi.d limit_wd, limit, -1 ++ andi tmp3, limit_wd, 0x7 ++ srli.d limit_wd, limit_wd, 3 ++ add.d limit, limit, src1_off ++ add.d tmp3, tmp3, src1_off ++ srli.d tmp3, tmp3, 3 ++ add.d limit_wd, limit_wd, tmp3 ++ ++ sub.d tmp1, zero, tmp1 ++ nor tmp2, zero, zero ++ srl.d tmp2, tmp2, tmp1 ++ or data1, data1, tmp2 ++ or data2, data2, tmp2 ++ b strncmp_start_realigned ++ ++strncmp_misaligned8: ++ ++ li.w tmp1, 0x10 ++ bge limit, tmp1, strncmp_try_words ++strncmp_byte_loop: ++ ld.bu data1, src1, 0 ++ ld.bu data2, src2, 0 ++ addi.d limit, limit, -1 ++ xor tmp1, data1, data2 ++ masknez tmp1, data1, tmp1 ++ maskeqz tmp1, limit, tmp1 ++ beqz tmp1, strncmp_done ++ ++ ld.bu data1, src1, 1 ++ ld.bu data2, src2, 1 ++ addi.d src1, src1, 2 ++ addi.d src2, src2, 2 ++ addi.d limit, limit, -1 ++ xor tmp1, data1, data2 ++ masknez tmp1, data1, tmp1 ++ maskeqz tmp1, limit, tmp1 ++ bnez tmp1, strncmp_byte_loop ++ ++ ++strncmp_done: ++ sub.d result, data1, data2 ++ jr ra ++ ++strncmp_try_words: ++ srli.d limit_wd, limit, 3 ++ beqz src1_off, strncmp_do_misaligned ++ ++ sub.d src1_off, zero, src1_off ++ andi src1_off, src1_off, 0x7 ++ sub.d limit, limit, src1_off ++ srli.d limit_wd, limit, 0x3 ++ ++strncmp_page_end_loop: ++ ld.bu data1, src1, 0 ++ ld.bu data2, src2, 0 ++ addi.d src1, src1, 1 ++ addi.d src2, src2, 1 ++ xor tmp1, data1, data2 ++ masknez tmp1, data1, tmp1 ++ beqz tmp1, strncmp_done ++ andi tmp1, src1, 0x7 ++ bnez tmp1, strncmp_page_end_loop ++strncmp_do_misaligned: ++ li.w src1_off, 0x8 ++ addi.d limit_wd, limit_wd, -1 ++ blt limit_wd, zero, strncmp_done_loop ++ ++strncmp_loop_misaligned: ++ andi tmp2, src2, 0xff8 ++ xori tmp2, tmp2, 0xff8 ++ beqz tmp2, strncmp_page_end_loop ++ ++ ld.d data1, src1, 0 ++ ld.d data2, src2, 0 ++ addi.d src1, src1, 8 ++ addi.d src2, src2, 8 ++ sub.d tmp1, data1, zeroones ++ or tmp2, data1, sevenf ++ xor diff, data1, data2 ++ andn has_nul, tmp1, tmp2 ++ or syndrome, diff, has_nul ++ bnez syndrome, strncmp_not_limit ++ addi.d limit_wd, limit_wd, -1 ++ bge limit_wd, zero, strncmp_loop_misaligned ++ ++strncmp_done_loop: ++ andi limit, limit, 0x7 ++ beqz limit, strncmp_not_limit ++ /* Read the last double word ++ check if the final part is about to exceed the page */ ++ andi tmp1, src2, 0x7 ++ andi tmp2, src2, 0xff8 ++ add.d tmp1, tmp1, limit ++ xori tmp2, tmp2, 0xff8 ++ andi tmp1, tmp1, 0x8 ++ masknez tmp1, tmp1, tmp2 ++ bnez tmp1, strncmp_byte_loop ++ addi.d src1, src1, -8 ++ addi.d src2, src2, -8 ++ ldx.d data1, src1, limit ++ ldx.d data2, src2, limit ++ sub.d tmp1, data1, zeroones ++ or tmp2, data1, sevenf ++ xor diff, data1, data2 ++ andn has_nul, tmp1, tmp2 ++ or syndrome, diff, has_nul ++ bnez syndrome, strncmp_not_limit ++ ++strncmp_ret0: ++ move result, zero ++ jr ra ++END(strncmp) ++libc_hidden_builtin_def (strncmp) +-- +2.33.0 + |