automatic import of glibcopeneuler22.03_LTS_SP4 openeuler22.03_LTS_SP3 openeuler20.03

author: CoprDistGit <infra@openeuler.org> 2024-08-03 06:28:41 +0000
committer: CoprDistGit <infra@openeuler.org> 2024-08-03 06:28:41 +0000
commit: d20db0561a6a36f914fde030512503b114ef9a0c (patch)
tree: d4e5e3494d95c269a1cee6195f11bf3201bcadbf /4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch
parent: 016343d99b1b269d7246ef1e143d4b54914433d4 (diff)
1 files changed, 51 insertions, 0 deletions
diff --git a/4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch b/4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch
new file mode 100644
index 0000000..fd17671
--- /dev/null
+++ b/4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch
@@ -0,0 +1,51 @@
+From e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wdijkstr@arm.com>
+Date: Tue, 10 Aug 2021 13:44:27 +0100
+Subject: [PATCH] [4/5] AArch64: Improve A64FX memset by removing unroll32
+
+Remove unroll32 code since it doesn't improve performance.
+
+Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com>
+---
+ sysdeps/aarch64/multiarch/memset_a64fx.S | 18 +-----------------
+ 1 file changed, 1 insertion(+), 17 deletions(-)
+
+diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
+index 337c86b..ef03156 100644
+--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
++++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
+@@ -102,22 +102,6 @@ L(vl_agnostic): // VL Agnostic
+ 	ccmp	vector_length, tmp1, 0, cs
+ 	b.eq	L(L1_prefetch)
+ 
+-L(unroll32):
+-	lsl	tmp1, vector_length, 3	// vector_length * 8
+-	lsl	tmp2, vector_length, 5	// vector_length * 32
+-	.p2align 3
+-1:	cmp	rest, tmp2
+-	b.cc	L(unroll8)
+-	st1b_unroll
+-	add	dst, dst, tmp1
+-	st1b_unroll
+-	add	dst, dst, tmp1
+-	st1b_unroll
+-	add	dst, dst, tmp1
+-	st1b_unroll
+-	add	dst, dst, tmp1
+-	sub	rest, rest, tmp2
+-	b	1b
+ 
+ L(unroll8):
+ 	lsl	tmp1, vector_length, 3
+@@ -155,7 +139,7 @@ L(L1_prefetch): // if rest >= L1_SIZE
+ 	sub	rest, rest, CACHE_LINE_SIZE * 2
+ 	cmp	rest, L1_SIZE
+ 	b.ge	1b
+-	cbnz	rest, L(unroll32)
++	cbnz	rest, L(unroll8)
+ 	ret
+ 
+ 	// count >= L2_SIZE
+-- 
+1.8.3.1
+
author	CoprDistGit <infra@openeuler.org>	2024-08-03 06:28:41 +0000
committer	CoprDistGit <infra@openeuler.org>	2024-08-03 06:28:41 +0000
commit	d20db0561a6a36f914fde030512503b114ef9a0c (patch)
tree	d4e5e3494d95c269a1cee6195f11bf3201bcadbf /4-5-AArch64-Improve-A64FX-memset-by-removing-unroll3.patch
parent	016343d99b1b269d7246ef1e143d4b54914433d4 (diff)