From 603aa93569ec4034aa1d5a310f59504b5d6aad4d Mon Sep 17 00:00:00 2001 From: Xue Liu Date: Sun, 29 Jan 2023 10:23:06 +0800 Subject: [PATCH 3/6] LoongArch: Optimize string function memset. Change-Id: I04906c31a2eabd380b19bb3a4cab603128526cd1 --- sysdeps/loongarch/lp64/memset.S | 170 ++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 sysdeps/loongarch/lp64/memset.S diff --git a/sysdeps/loongarch/lp64/memset.S b/sysdeps/loongarch/lp64/memset.S new file mode 100644 index 00000000..261504b1 --- /dev/null +++ b/sysdeps/loongarch/lp64/memset.S @@ -0,0 +1,170 @@ +/* Optimized memset implementation for LoongArch. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +#define ST_128(n) \ + st.d a1, a0, n; \ + st.d a1, a0, n+8 ; \ + st.d a1, a0, n+16 ; \ + st.d a1, a0, n+24 ; \ + st.d a1, a0, n+32 ; \ + st.d a1, a0, n+40 ; \ + st.d a1, a0, n+48 ; \ + st.d a1, a0, n+56 ; \ + st.d a1, a0, n+64 ; \ + st.d a1, a0, n+72 ; \ + st.d a1, a0, n+80 ; \ + st.d a1, a0, n+88 ; \ + st.d a1, a0, n+96 ; \ + st.d a1, a0, n+104; \ + st.d a1, a0, n+112; \ + st.d a1, a0, n+120; \ + +/* void *memset(void *s, int c, size_t n); */ +LEAF(memset) + .align 6 + + bstrins.d a1, a1, 15, 8 + add.d t7, a0, a2 + bstrins.d a1, a1, 31, 16 + move t0, a0 + bstrins.d a1, a1, 63, 32 + srai.d t8, a2, 4 #num/16 + beqz t8, less_16bytes #num<16 + srai.d t8, a2, 6 #num/64 + bnez t8, more_64bytes #num>64 + srai.d t8, a2, 5 #num/32 + beqz t8, less_32bytes #num<32 + st.d a1, a0, 0 #32