diff options
author | CoprDistGit <infra@openeuler.org> | 2024-08-03 06:28:41 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-08-03 06:28:41 +0000 |
commit | d20db0561a6a36f914fde030512503b114ef9a0c (patch) | |
tree | d4e5e3494d95c269a1cee6195f11bf3201bcadbf /4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch | |
parent | 016343d99b1b269d7246ef1e143d4b54914433d4 (diff) |
automatic import of glibcopeneuler22.03_LTS_SP4openeuler22.03_LTS_SP3openeuler20.03
Diffstat (limited to '4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch')
-rw-r--r-- | 4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch b/4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch new file mode 100644 index 0000000..fd17671 --- /dev/null +++ b/4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch @@ -0,0 +1,51 @@ +From e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wdijkstr@arm.com> +Date: Tue, 10 Aug 2021 13:44:27 +0100 +Subject: [PATCH] [4/5] AArch64: Improve A64FX memset by removing unroll32 + +Remove unroll32 code since it doesn't improve performance. + +Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com> +--- + sysdeps/aarch64/multiarch/memset_a64fx.S | 18 +----------------- + 1 file changed, 1 insertion(+), 17 deletions(-) + +diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S +index 337c86b..ef03156 100644 +--- a/sysdeps/aarch64/multiarch/memset_a64fx.S ++++ b/sysdeps/aarch64/multiarch/memset_a64fx.S +@@ -102,22 +102,6 @@ L(vl_agnostic): // VL Agnostic + ccmp vector_length, tmp1, 0, cs + b.eq L(L1_prefetch) + +-L(unroll32): +- lsl tmp1, vector_length, 3 // vector_length * 8 +- lsl tmp2, vector_length, 5 // vector_length * 32 +- .p2align 3 +-1: cmp rest, tmp2 +- b.cc L(unroll8) +- st1b_unroll +- add dst, dst, tmp1 +- st1b_unroll +- add dst, dst, tmp1 +- st1b_unroll +- add dst, dst, tmp1 +- st1b_unroll +- add dst, dst, tmp1 +- sub rest, rest, tmp2 +- b 1b + + L(unroll8): + lsl tmp1, vector_length, 3 +@@ -155,7 +139,7 @@ L(L1_prefetch): // if rest >= L1_SIZE + sub rest, rest, CACHE_LINE_SIZE * 2 + cmp rest, L1_SIZE + b.ge 1b +- cbnz rest, L(unroll32) ++ cbnz rest, L(unroll8) + ret + + // count >= L2_SIZE +-- +1.8.3.1 + |