diff options
Diffstat (limited to 'x86_64-Optimize-ffsll-function-code-size.patch')
-rw-r--r-- | x86_64-Optimize-ffsll-function-code-size.patch | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/x86_64-Optimize-ffsll-function-code-size.patch b/x86_64-Optimize-ffsll-function-code-size.patch new file mode 100644 index 0000000..e5531b5 --- /dev/null +++ b/x86_64-Optimize-ffsll-function-code-size.patch @@ -0,0 +1,50 @@ +From 30e546d76e756fe4d2d20a8b2286de4fbf30ceb5 Mon Sep 17 00:00:00 2001 +From: Sunil K Pandey <skpgkp2@gmail.com> +Date: Wed, 26 Jul 2023 08:34:05 -0700 +Subject: [PATCH 1/6] x86_64: Optimize ffsll function code size. + +Ffsll function randomly regress by ~20%, depending on how code gets +aligned in memory. Ffsll function code size is 17 bytes. Since default +function alignment is 16 bytes, it can load on 16, 32, 48 or 64 bytes +aligned memory. When ffsll function load at 16, 32 or 64 bytes aligned +memory, entire code fits in single 64 bytes cache line. When ffsll +function load at 48 bytes aligned memory, it splits in two cache line, +hence random regression. + +Ffsll function size reduction from 17 bytes to 12 bytes ensures that it +will always fit in single 64 bytes cache line. + +This patch fixes ffsll function random performance regression. + +Reviewed-by: Carlos O'Donell <carlos@redhat.com> +(cherry picked from commit 9d94997b5f9445afd4f2bccc5fa60ff7c4361ec1) +--- + sysdeps/x86_64/ffsll.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c +index a1c13d4906..0c6680735c 100644 +--- a/sysdeps/x86_64/ffsll.c ++++ b/sysdeps/x86_64/ffsll.c +@@ -26,13 +26,13 @@ int + ffsll (long long int x) + { + long long int cnt; +- long long int tmp; + +- asm ("bsfq %2,%0\n" /* Count low bits in X and store in %1. */ +- "cmoveq %1,%0\n" /* If number was zero, use -1 as result. */ +- : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1)); ++ asm ("mov $-1,%k0\n" /* Initialize cnt to -1. */ ++ "bsf %1,%0\n" /* Count low bits in x and store in cnt. */ ++ "inc %k0\n" /* Increment cnt by 1. */ ++ : "=&r" (cnt) : "r" (x)); + +- return cnt + 1; ++ return cnt; + } + + #ifndef __ILP32__ +-- +2.33.0 + |