summaryrefslogtreecommitdiff
path: root/x86_64-Optimize-ffsll-function-code-size.patch
diff options
context:
space:
mode:
Diffstat (limited to 'x86_64-Optimize-ffsll-function-code-size.patch')
-rw-r--r--x86_64-Optimize-ffsll-function-code-size.patch50
1 files changed, 50 insertions, 0 deletions
diff --git a/x86_64-Optimize-ffsll-function-code-size.patch b/x86_64-Optimize-ffsll-function-code-size.patch
new file mode 100644
index 0000000..e5531b5
--- /dev/null
+++ b/x86_64-Optimize-ffsll-function-code-size.patch
@@ -0,0 +1,50 @@
+From 30e546d76e756fe4d2d20a8b2286de4fbf30ceb5 Mon Sep 17 00:00:00 2001
+From: Sunil K Pandey <skpgkp2@gmail.com>
+Date: Wed, 26 Jul 2023 08:34:05 -0700
+Subject: [PATCH 1/6] x86_64: Optimize ffsll function code size.
+
+Ffsll function randomly regress by ~20%, depending on how code gets
+aligned in memory. Ffsll function code size is 17 bytes. Since default
+function alignment is 16 bytes, it can load on 16, 32, 48 or 64 bytes
+aligned memory. When ffsll function load at 16, 32 or 64 bytes aligned
+memory, entire code fits in single 64 bytes cache line. When ffsll
+function load at 48 bytes aligned memory, it splits in two cache line,
+hence random regression.
+
+Ffsll function size reduction from 17 bytes to 12 bytes ensures that it
+will always fit in single 64 bytes cache line.
+
+This patch fixes ffsll function random performance regression.
+
+Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+(cherry picked from commit 9d94997b5f9445afd4f2bccc5fa60ff7c4361ec1)
+---
+ sysdeps/x86_64/ffsll.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c
+index a1c13d4906..0c6680735c 100644
+--- a/sysdeps/x86_64/ffsll.c
++++ b/sysdeps/x86_64/ffsll.c
+@@ -26,13 +26,13 @@ int
+ ffsll (long long int x)
+ {
+ long long int cnt;
+- long long int tmp;
+
+- asm ("bsfq %2,%0\n" /* Count low bits in X and store in %1. */
+- "cmoveq %1,%0\n" /* If number was zero, use -1 as result. */
+- : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1));
++ asm ("mov $-1,%k0\n" /* Initialize cnt to -1. */
++ "bsf %1,%0\n" /* Count low bits in x and store in cnt. */
++ "inc %k0\n" /* Increment cnt by 1. */
++ : "=&r" (cnt) : "r" (x));
+
+- return cnt + 1;
++ return cnt;
+ }
+
+ #ifndef __ILP32__
+--
+2.33.0
+