summaryrefslogtreecommitdiff
path: root/5f046d1a-VT-d-optimize-CPU-cache-sync.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-10-12 04:00:49 +0000
committerCoprDistGit <infra@openeuler.org>2023-10-12 04:00:49 +0000
commitc22f60e6e55f1bf300dd76d2222a93911f3b2bb2 (patch)
treeef665e7018377f53612ac2751dcaea35a1c587b6 /5f046d1a-VT-d-optimize-CPU-cache-sync.patch
parent39a4763249cd6289e5019acfe0c98dbb169f5f2e (diff)
automatic import of xenopeneuler22.03_LTS
Diffstat (limited to '5f046d1a-VT-d-optimize-CPU-cache-sync.patch')
-rw-r--r--5f046d1a-VT-d-optimize-CPU-cache-sync.patch95
1 files changed, 95 insertions, 0 deletions
diff --git a/5f046d1a-VT-d-optimize-CPU-cache-sync.patch b/5f046d1a-VT-d-optimize-CPU-cache-sync.patch
new file mode 100644
index 0000000..3eb5378
--- /dev/null
+++ b/5f046d1a-VT-d-optimize-CPU-cache-sync.patch
@@ -0,0 +1,95 @@
+# Commit a64ea16522a73a13a0d66cfa4b66a9d3b95dd9d6
+# Date 2020-07-07 14:39:54 +0200
+# Author Roger Pau Monné <roger.pau@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+vtd: optimize CPU cache sync
+
+Some VT-d IOMMUs are non-coherent, which requires a cache write back
+in order for the changes made by the CPU to be visible to the IOMMU.
+This cache write back was unconditionally done using clflush, but there are
+other more efficient instructions to do so, hence implement support
+for them using the alternative framework.
+
+This is part of XSA-321.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/drivers/passthrough/vtd/extern.h
++++ b/xen/drivers/passthrough/vtd/extern.h
+@@ -68,7 +68,6 @@ int __must_check qinval_device_iotlb_syn
+ u16 did, u16 size, u64 addr);
+
+ unsigned int get_cache_line_size(void);
+-void cacheline_flush(char *);
+ void flush_all_cache(void);
+
+ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -31,6 +31,7 @@
+ #include <xen/pci_regs.h>
+ #include <xen/keyhandler.h>
+ #include <asm/msi.h>
++#include <asm/nops.h>
+ #include <asm/irq.h>
+ #include <asm/hvm/vmx/vmx.h>
+ #include <asm/p2m.h>
+@@ -154,7 +155,42 @@ static void sync_cache(const void *addr,
+
+ addr -= (unsigned long)addr & (clflush_size - 1);
+ for ( ; addr < end; addr += clflush_size )
+- cacheline_flush((char *)addr);
++/*
++ * The arguments to a macro must not include preprocessor directives. Doing so
++ * results in undefined behavior, so we have to create some defines here in
++ * order to avoid it.
++ */
++#if defined(HAVE_AS_CLWB)
++# define CLWB_ENCODING "clwb %[p]"
++#elif defined(HAVE_AS_XSAVEOPT)
++# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
++#else
++# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
++#endif
++
++#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
++#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
++# define INPUT BASE_INPUT
++#else
++# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
++#endif
++ /*
++ * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
++ * + prefix than a clflush + nop, and hence the prefix is added instead
++ * of letting the alternative framework fill the gap by appending nops.
++ */
++ alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
++ "data16 clflush %[p]", /* clflushopt */
++ X86_FEATURE_CLFLUSHOPT,
++ CLWB_ENCODING,
++ X86_FEATURE_CLWB, /* no outputs */,
++ INPUT(addr));
++#undef INPUT
++#undef BASE_INPUT
++#undef CLWB_ENCODING
++
++ alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
++ "sfence", X86_FEATURE_CLWB);
+ }
+
+ /* Allocate page table, return its machine address */
+--- a/xen/drivers/passthrough/vtd/x86/vtd.c
++++ b/xen/drivers/passthrough/vtd/x86/vtd.c
+@@ -51,11 +51,6 @@ unsigned int get_cache_line_size(void)
+ return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
+ }
+
+-void cacheline_flush(char * addr)
+-{
+- clflush(addr);
+-}
+-
+ void flush_all_cache()
+ {
+ wbinvd();