summaryrefslogtreecommitdiff
path: root/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
diff options
context:
space:
mode:
Diffstat (limited to '0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch')
-rw-r--r--0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch286
1 files changed, 286 insertions, 0 deletions
diff --git a/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch b/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
new file mode 100644
index 0000000..cb4f03e
--- /dev/null
+++ b/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
@@ -0,0 +1,286 @@
+From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001
+From: Sajan Karumanchi <sajan.karumanchi@amd.com>
+Date: Tue, 1 Aug 2023 15:20:55 +0000
+Subject: [PATCH 01/12] x86: Fix for cache computation on AMD legacy cpus.
+
+Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D'
+set to Zero, thus resulting in zeroed-out computed cache values.
+This patch reintroduces the old way of cache computation as a
+fail-safe option to handle these exceptions.
+Fixed 'level4_cache_size' value through handle_amd().
+
+Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
+Tested-by: Florian Weimer <fweimer@redhat.com>
+---
+ sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++-----
+ 1 file changed, 199 insertions(+), 27 deletions(-)
+
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index cd4d0351ae..285773039f 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
++++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -315,40 +315,206 @@ handle_amd (int name)
+ {
+ unsigned int eax;
+ unsigned int ebx;
+- unsigned int ecx;
++ unsigned int ecx = 0;
+ unsigned int edx;
+- unsigned int count = 0x1;
++ unsigned int max_cpuid = 0;
++ unsigned int fn = 0;
+
+ /* No level 4 cache (yet). */
+ if (name > _SC_LEVEL3_CACHE_LINESIZE)
+ return 0;
+
+- if (name >= _SC_LEVEL3_CACHE_SIZE)
+- count = 0x3;
+- else if (name >= _SC_LEVEL2_CACHE_SIZE)
+- count = 0x2;
+- else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+- count = 0x0;
++ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx);
++
++ if (max_cpuid >= 0x8000001D)
++ /* Use __cpuid__ '0x8000_001D' to compute cache details. */
++ {
++ unsigned int count = 0x1;
++
++ if (name >= _SC_LEVEL3_CACHE_SIZE)
++ count = 0x3;
++ else if (name >= _SC_LEVEL2_CACHE_SIZE)
++ count = 0x2;
++ else if (name >= _SC_LEVEL1_DCACHE_SIZE)
++ count = 0x0;
++
++ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
++
++ if (ecx != 0)
++ {
++ switch (name)
++ {
++ case _SC_LEVEL1_ICACHE_ASSOC:
++ case _SC_LEVEL1_DCACHE_ASSOC:
++ case _SC_LEVEL2_CACHE_ASSOC:
++ case _SC_LEVEL3_CACHE_ASSOC:
++ return ((ebx >> 22) & 0x3ff) + 1;
++ case _SC_LEVEL1_ICACHE_LINESIZE:
++ case _SC_LEVEL1_DCACHE_LINESIZE:
++ case _SC_LEVEL2_CACHE_LINESIZE:
++ case _SC_LEVEL3_CACHE_LINESIZE:
++ return (ebx & 0xfff) + 1;
++ case _SC_LEVEL1_ICACHE_SIZE:
++ case _SC_LEVEL1_DCACHE_SIZE:
++ case _SC_LEVEL2_CACHE_SIZE:
++ case _SC_LEVEL3_CACHE_SIZE:
++ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1);
++ default:
++ __builtin_unreachable ();
++ }
++ return -1;
++ }
++ }
++
++ /* Legacy cache computation for CPUs prior to Bulldozer family.
++ This is also a fail-safe mechanism for some hypervisors that
++ accidentally configure __cpuid__ '0x8000_001D' to Zero. */
+
+- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
++ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
++
++ if (max_cpuid < fn)
++ return 0;
++
++ __cpuid (fn, eax, ebx, ecx, edx);
++
++ if (name < _SC_LEVEL1_DCACHE_SIZE)
++ {
++ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
++ ecx = edx;
++ }
+
+ switch (name)
+ {
+- case _SC_LEVEL1_ICACHE_ASSOC:
+- case _SC_LEVEL1_DCACHE_ASSOC:
+- case _SC_LEVEL2_CACHE_ASSOC:
++ case _SC_LEVEL1_DCACHE_SIZE:
++ return (ecx >> 14) & 0x3fc00;
++
++ case _SC_LEVEL1_DCACHE_ASSOC:
++ ecx >>= 16;
++ if ((ecx & 0xff) == 0xff)
++ {
++ /* Fully associative. */
++ return (ecx << 2) & 0x3fc00;
++ }
++ return ecx & 0xff;
++
++ case _SC_LEVEL1_DCACHE_LINESIZE:
++ return ecx & 0xff;
++
++ case _SC_LEVEL2_CACHE_SIZE:
++ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
++
++ case _SC_LEVEL2_CACHE_ASSOC:
++ switch ((ecx >> 12) & 0xf)
++ {
++ case 0:
++ case 1:
++ case 2:
++ case 4:
++ return (ecx >> 12) & 0xf;
++ case 6:
++ return 8;
++ case 8:
++ return 16;
++ case 10:
++ return 32;
++ case 11:
++ return 48;
++ case 12:
++ return 64;
++ case 13:
++ return 96;
++ case 14:
++ return 128;
++ case 15:
++ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
++ default:
++ return 0;
++ }
++
++ case _SC_LEVEL2_CACHE_LINESIZE:
++ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
++
++ case _SC_LEVEL3_CACHE_SIZE:
++ {
++ long int total_l3_cache = 0, l3_cache_per_thread = 0;
++ unsigned int threads = 0;
++ const struct cpu_features *cpu_features;
++
++ if ((edx & 0xf000) == 0)
++ return 0;
++
++ total_l3_cache = (edx & 0x3ffc0000) << 1;
++ cpu_features = __get_cpu_features ();
++
++ /* Figure out the number of logical threads that share L3. */
++ if (max_cpuid >= 0x80000008)
++ {
++ /* Get width of APIC ID. */
++ __cpuid (0x80000008, eax, ebx, ecx, edx);
++ threads = (ecx & 0xff) + 1;
++ }
++
++ if (threads == 0)
++ {
++ /* If APIC ID width is not available, use logical
++ processor count. */
++ __cpuid (0x00000001, eax, ebx, ecx, edx);
++ if ((edx & (1 << 28)) != 0)
++ threads = (ebx >> 16) & 0xff;
++ }
++
++ /* Cap usage of highest cache level to the number of
++ supported threads. */
++ if (threads > 0)
++ l3_cache_per_thread = total_l3_cache/threads;
++
++ /* Get shared cache per ccx for Zen architectures. */
++ if (cpu_features->basic.family >= 0x17)
++ {
++ long int l3_cache_per_ccx = 0;
++ /* Get number of threads share the L3 cache in CCX. */
++ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
++ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
++ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx;
++ return l3_cache_per_ccx;
++ }
++ else
++ {
++ return l3_cache_per_thread;
++ }
++ }
++
+ case _SC_LEVEL3_CACHE_ASSOC:
+- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0;
+- case _SC_LEVEL1_ICACHE_LINESIZE:
+- case _SC_LEVEL1_DCACHE_LINESIZE:
+- case _SC_LEVEL2_CACHE_LINESIZE:
++ switch ((edx >> 12) & 0xf)
++ {
++ case 0:
++ case 1:
++ case 2:
++ case 4:
++ return (edx >> 12) & 0xf;
++ case 6:
++ return 8;
++ case 8:
++ return 16;
++ case 10:
++ return 32;
++ case 11:
++ return 48;
++ case 12:
++ return 64;
++ case 13:
++ return 96;
++ case 14:
++ return 128;
++ case 15:
++ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
++ default:
++ return 0;
++ }
++
+ case _SC_LEVEL3_CACHE_LINESIZE:
+- return ecx ? (ebx & 0xfff) + 1 : 0;
+- case _SC_LEVEL1_ICACHE_SIZE:
+- case _SC_LEVEL1_DCACHE_SIZE:
+- case _SC_LEVEL2_CACHE_SIZE:
+- case _SC_LEVEL3_CACHE_SIZE:
+- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0;
++ return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
++
+ default:
+ __builtin_unreachable ();
+ }
+@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+ data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
+ core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+ shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+- shared_per_thread = shared;
+
+ level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
+ level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
+@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+ level3_cache_size = shared;
+ level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
+ level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
++ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE);
+
+ if (shared <= 0)
+- /* No shared L3 cache. All we have is the L2 cache. */
+- shared = core;
++ {
++ /* No shared L3 cache. All we have is the L2 cache. */
++ shared = core;
++ }
++ else if (cpu_features->basic.family < 0x17)
++ {
++ /* Account for exclusive L2 and L3 caches. */
++ shared += core;
++ }
+
+- if (shared_per_thread <= 0)
+- shared_per_thread = shared;
++ shared_per_thread = shared;
+ }
+
+ cpu_features->level1_icache_size = level1_icache_size;
+--
+2.33.0
+