summaryrefslogtreecommitdiff
path: root/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2024-08-03 06:28:41 +0000
committerCoprDistGit <infra@openeuler.org>2024-08-03 06:28:41 +0000
commitd20db0561a6a36f914fde030512503b114ef9a0c (patch)
treed4e5e3494d95c269a1cee6195f11bf3201bcadbf /4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch
parent016343d99b1b269d7246ef1e143d4b54914433d4 (diff)
Diffstat (limited to '4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch')
-rw-r--r--4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch414
1 files changed, 414 insertions, 0 deletions
diff --git a/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch b/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch
new file mode 100644
index 0000000..5e0ce7d
--- /dev/null
+++ b/4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch
@@ -0,0 +1,414 @@
+From 3f3b70e39a529369e4b2936f35034215a45436a3 Mon Sep 17 00:00:00 2001
+From: Xue Liu <liuxue@loongson.cn>
+Date: Sun, 29 Jan 2023 10:23:50 +0800
+Subject: [PATCH 4/6] LoongArch: Optimize string functions strcmp, strncmp.
+
+Change-Id: I436138a312e8ebb668223cafef84fd74dcde72fd
+---
+ sysdeps/loongarch/lp64/strcmp.S | 161 ++++++++++++++++++++++
+ sysdeps/loongarch/lp64/strncmp.S | 225 +++++++++++++++++++++++++++++++
+ 2 files changed, 386 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/strcmp.S
+ create mode 100644 sysdeps/loongarch/lp64/strncmp.S
+
+diff --git a/sysdeps/loongarch/lp64/strcmp.S b/sysdeps/loongarch/lp64/strcmp.S
+new file mode 100644
+index 00000000..0f7a6d55
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/strcmp.S
+@@ -0,0 +1,161 @@
++/* Optimized strcmp implementation for LoongArch.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library. If not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sys/asm.h>
++
++/* Parameters and Results */
++#define src1 a0
++#define src2 a1
++#define result v0
++
++/* Internal variable */
++#define src1_off a2
++#define src2_off a3
++#define data1 t0
++#define data2 t1
++#define has_nul t2
++#define diff t3
++#define syndrome t4
++#define zeroones t5
++#define sevenf t6
++#define pos t7
++#define exchange t8
++#define tmp1 a4
++#define tmp2 a5
++#define tmp3 a6
++#define tmp4 a7
++
++/* rd <- if rc then ra else rb
++ tmp3 will be destroyed */
++#define CONDITIONSEL(rd, rc, ra, rb)\
++ masknez tmp3, rb, rc;\
++ maskeqz rd, ra, rc;\
++ or rd, rd, tmp3
++
++LEAF(strcmp)
++ .align 4
++
++ xor tmp1, src1, src2
++ lu12i.w zeroones, 0x01010
++ lu12i.w sevenf, 0x7f7f7
++ andi src1_off, src1, 0x7
++ ori zeroones, zeroones, 0x101
++ ori sevenf, sevenf, 0xf7f
++ andi tmp1, tmp1, 0x7
++ bstrins.d zeroones, zeroones, 63, 32
++ bstrins.d sevenf, sevenf, 63, 32
++ bnez tmp1, strcmp_misaligned8
++ bnez src1_off, strcmp_mutual_align
++strcmp_loop_aligned:
++ ld.d data1, src1, 0
++ addi.d src1, src1, 8
++ ld.d data2, src2, 0
++ addi.d src2, src2, 8
++strcmp_start_realigned:
++ sub.d tmp1, data1, zeroones
++ or tmp2, data1, sevenf
++ xor diff, data1, data2
++ andn has_nul, tmp1, tmp2
++ or syndrome, diff, has_nul
++ beqz syndrome, strcmp_loop_aligned
++
++strcmp_end:
++ ctz.d pos, syndrome
++ bstrins.d pos, zero, 2, 0
++ srl.d data1, data1, pos
++ srl.d data2, data2, pos
++ andi data1, data1, 0xff
++ andi data2, data2, 0xff
++ sub.d result, data1, data2
++ jr ra
++strcmp_mutual_align:
++ bstrins.d src1, zero, 2, 0
++ bstrins.d src2, zero, 2, 0
++ slli.d tmp1, src1_off, 0x3
++ ld.d data1, src1, 0
++ sub.d tmp1, zero, tmp1
++ ld.d data2, src2, 0
++ addi.d src1, src1, 8
++ addi.d src2, src2, 8
++ nor tmp2, zero, zero
++ srl.d tmp2, tmp2, tmp1
++ or data1, data1, tmp2
++ or data2, data2, tmp2
++ b strcmp_start_realigned
++
++strcmp_misaligned8:
++ /* check
++ if ((src1 != 0) && ((src2 == 0 ) || (src1 < src2)))
++ then exchange(src1,src2). */
++ andi src2_off, src2, 0x7
++ slt tmp2, src1_off, src2_off
++ CONDITIONSEL(tmp2, src2_off, tmp2, tmp1)
++ maskeqz exchange, tmp2, src1_off
++ xor tmp3, src1, src2
++ maskeqz tmp3, tmp3, exchange
++ xor src1, src1, tmp3
++ xor src2, src2, tmp3
++
++ andi src1_off, src1, 0x7
++ beqz src1_off, strcmp_loop_misaligned
++strcmp_do_misaligned:
++ ld.bu data1, src1, 0
++ ld.bu data2, src2, 0
++ xor tmp3, data1, data2
++ addi.d src1, src1, 1
++ masknez tmp3, data1, tmp3
++ addi.d src2, src2, 1
++ beqz tmp3, strcmp_done
++ andi src1_off, src1, 0x7
++ bnez src1_off, strcmp_do_misaligned
++
++strcmp_loop_misaligned:
++ andi tmp1, src2, 0xff8
++ xori tmp1, tmp1, 0xff8
++ beqz tmp1, strcmp_do_misaligned
++ ld.d data1, src1, 0
++ ld.d data2, src2, 0
++ addi.d src1, src1, 8
++ addi.d src2, src2, 8
++
++ sub.d tmp1, data1, zeroones
++ or tmp2, data1, sevenf
++ xor diff, data1, data2
++ andn has_nul, tmp1, tmp2
++ or syndrome, diff, has_nul
++ beqz syndrome, strcmp_loop_misaligned
++strcmp_misalign_end:
++ ctz.d pos, syndrome
++ bstrins.d pos, zero, 2, 0
++ srl.d data1, data1, pos
++ srl.d data2, data2, pos
++ andi data1, data1, 0xff
++ andi data2, data2, 0xff
++ sub.d tmp1, data1, data2
++ sub.d tmp2, data2, data1
++ CONDITIONSEL(result, exchange, tmp2, tmp1)
++ jr ra
++
++strcmp_done:
++ sub.d tmp1, data1, data2
++ sub.d tmp2, data2, data1
++ CONDITIONSEL(result, exchange, tmp2, tmp1)
++ jr ra
++END(strcmp)
++
++libc_hidden_builtin_def (strcmp)
+diff --git a/sysdeps/loongarch/lp64/strncmp.S b/sysdeps/loongarch/lp64/strncmp.S
+new file mode 100644
+index 00000000..979ea40a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/strncmp.S
+@@ -0,0 +1,225 @@
++/* Optimized strncmp implementation for LoongArch.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library. If not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sys/asm.h>
++
++/* Parameters and Results */
++#define src1 a0
++#define src2 a1
++#define limit a2
++#define result v0
++
++
++/* Internal variable */
++#define data1 t0
++#define data2 t1
++#define has_nul t2
++#define diff t3
++#define syndrome t4
++#define zeroones t5
++#define sevenf t6
++#define pos t7
++#define exchange t8
++#define tmp1 a5
++#define tmp2 a6
++#define tmp3 a7
++#define src1_off a3
++#define limit_wd a4
++
++LEAF(strncmp)
++ .align 4
++ beqz limit, strncmp_ret0
++
++ xor tmp1, src1, src2
++ lu12i.w zeroones, 0x01010
++ lu12i.w sevenf, 0x7f7f7
++ andi src1_off, src1, 0x7
++ ori zeroones, zeroones, 0x101
++ andi tmp1, tmp1, 0x7
++ ori sevenf, sevenf, 0xf7f
++ bstrins.d zeroones, zeroones, 63, 32
++ bstrins.d sevenf, sevenf, 63, 32
++ bnez tmp1, strncmp_misaligned8
++ bnez src1_off, strncmp_mutual_align
++ addi.d limit_wd, limit, -1
++ srli.d limit_wd, limit_wd, 3
++
++strncmp_loop_aligned:
++ ld.d data1, src1, 0
++ addi.d src1, src1, 8
++ ld.d data2, src2, 0
++ addi.d src2, src2, 8
++strncmp_start_realigned:
++ addi.d limit_wd, limit_wd, -1
++ sub.d tmp1, data1, zeroones
++ or tmp2, data1, sevenf
++ xor diff, data1, data2
++ andn has_nul, tmp1, tmp2
++ srli.d tmp1, limit_wd, 63
++ or syndrome, diff, has_nul
++ or tmp2, syndrome, tmp1
++ beqz tmp2, strncmp_loop_aligned
++
++ /* if not reach limit */
++ bge limit_wd, zero, strncmp_not_limit
++ /* if reach limit */
++ andi limit, limit, 0x7
++ li.w tmp1, 0x8
++ sub.d limit, tmp1, limit
++ slli.d limit, limit, 0x3
++ li.d tmp1, -1
++ srl.d tmp1, tmp1, limit
++ and data1, data1, tmp1
++ and data2, data2, tmp1
++ orn syndrome, syndrome, tmp1
++
++
++strncmp_not_limit:
++ ctz.d pos, syndrome
++ bstrins.d pos, zero, 2, 0
++ srl.d data1, data1, pos
++ srl.d data2, data2, pos
++ andi data1, data1, 0xff
++ andi data2, data2, 0xff
++ sub.d result, data1, data2
++ jr ra
++
++
++
++strncmp_mutual_align:
++ bstrins.d src1, zero, 2, 0
++ bstrins.d src2, zero, 2, 0
++ slli.d tmp1, src1_off, 0x3
++ ld.d data1, src1, 0
++ ld.d data2, src2, 0
++ addi.d src2, src2, 8
++ addi.d src1, src1, 8
++
++ addi.d limit_wd, limit, -1
++ andi tmp3, limit_wd, 0x7
++ srli.d limit_wd, limit_wd, 3
++ add.d limit, limit, src1_off
++ add.d tmp3, tmp3, src1_off
++ srli.d tmp3, tmp3, 3
++ add.d limit_wd, limit_wd, tmp3
++
++ sub.d tmp1, zero, tmp1
++ nor tmp2, zero, zero
++ srl.d tmp2, tmp2, tmp1
++ or data1, data1, tmp2
++ or data2, data2, tmp2
++ b strncmp_start_realigned
++
++strncmp_misaligned8:
++
++ li.w tmp1, 0x10
++ bge limit, tmp1, strncmp_try_words
++strncmp_byte_loop:
++ ld.bu data1, src1, 0
++ ld.bu data2, src2, 0
++ addi.d limit, limit, -1
++ xor tmp1, data1, data2
++ masknez tmp1, data1, tmp1
++ maskeqz tmp1, limit, tmp1
++ beqz tmp1, strncmp_done
++
++ ld.bu data1, src1, 1
++ ld.bu data2, src2, 1
++ addi.d src1, src1, 2
++ addi.d src2, src2, 2
++ addi.d limit, limit, -1
++ xor tmp1, data1, data2
++ masknez tmp1, data1, tmp1
++ maskeqz tmp1, limit, tmp1
++ bnez tmp1, strncmp_byte_loop
++
++
++strncmp_done:
++ sub.d result, data1, data2
++ jr ra
++
++strncmp_try_words:
++ srli.d limit_wd, limit, 3
++ beqz src1_off, strncmp_do_misaligned
++
++ sub.d src1_off, zero, src1_off
++ andi src1_off, src1_off, 0x7
++ sub.d limit, limit, src1_off
++ srli.d limit_wd, limit, 0x3
++
++strncmp_page_end_loop:
++ ld.bu data1, src1, 0
++ ld.bu data2, src2, 0
++ addi.d src1, src1, 1
++ addi.d src2, src2, 1
++ xor tmp1, data1, data2
++ masknez tmp1, data1, tmp1
++ beqz tmp1, strncmp_done
++ andi tmp1, src1, 0x7
++ bnez tmp1, strncmp_page_end_loop
++strncmp_do_misaligned:
++ li.w src1_off, 0x8
++ addi.d limit_wd, limit_wd, -1
++ blt limit_wd, zero, strncmp_done_loop
++
++strncmp_loop_misaligned:
++ andi tmp2, src2, 0xff8
++ xori tmp2, tmp2, 0xff8
++ beqz tmp2, strncmp_page_end_loop
++
++ ld.d data1, src1, 0
++ ld.d data2, src2, 0
++ addi.d src1, src1, 8
++ addi.d src2, src2, 8
++ sub.d tmp1, data1, zeroones
++ or tmp2, data1, sevenf
++ xor diff, data1, data2
++ andn has_nul, tmp1, tmp2
++ or syndrome, diff, has_nul
++ bnez syndrome, strncmp_not_limit
++ addi.d limit_wd, limit_wd, -1
++ bge limit_wd, zero, strncmp_loop_misaligned
++
++strncmp_done_loop:
++ andi limit, limit, 0x7
++ beqz limit, strncmp_not_limit
++ /* Read the last double word
++ check if the final part is about to exceed the page */
++ andi tmp1, src2, 0x7
++ andi tmp2, src2, 0xff8
++ add.d tmp1, tmp1, limit
++ xori tmp2, tmp2, 0xff8
++ andi tmp1, tmp1, 0x8
++ masknez tmp1, tmp1, tmp2
++ bnez tmp1, strncmp_byte_loop
++ addi.d src1, src1, -8
++ addi.d src2, src2, -8
++ ldx.d data1, src1, limit
++ ldx.d data2, src2, limit
++ sub.d tmp1, data1, zeroones
++ or tmp2, data1, sevenf
++ xor diff, data1, data2
++ andn has_nul, tmp1, tmp2
++ or syndrome, diff, has_nul
++ bnez syndrome, strncmp_not_limit
++
++strncmp_ret0:
++ move result, zero
++ jr ra
++END(strncmp)
++libc_hidden_builtin_def (strncmp)
+--
+2.33.0
+