diff options
| author | CoprDistGit <infra@openeuler.org> | 2023-10-12 04:00:49 +0000 |
|---|---|---|
| committer | CoprDistGit <infra@openeuler.org> | 2023-10-12 04:00:49 +0000 |
| commit | c22f60e6e55f1bf300dd76d2222a93911f3b2bb2 (patch) | |
| tree | ef665e7018377f53612ac2751dcaea35a1c587b6 /5f046d1a-VT-d-optimize-CPU-cache-sync.patch | |
| parent | 39a4763249cd6289e5019acfe0c98dbb169f5f2e (diff) | |
automatic import of xenopeneuler22.03_LTS
Diffstat (limited to '5f046d1a-VT-d-optimize-CPU-cache-sync.patch')
| -rw-r--r-- | 5f046d1a-VT-d-optimize-CPU-cache-sync.patch | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/5f046d1a-VT-d-optimize-CPU-cache-sync.patch b/5f046d1a-VT-d-optimize-CPU-cache-sync.patch new file mode 100644 index 0000000..3eb5378 --- /dev/null +++ b/5f046d1a-VT-d-optimize-CPU-cache-sync.patch @@ -0,0 +1,95 @@ +# Commit a64ea16522a73a13a0d66cfa4b66a9d3b95dd9d6 +# Date 2020-07-07 14:39:54 +0200 +# Author Roger Pau Monné <roger.pau@citrix.com> +# Committer Jan Beulich <jbeulich@suse.com> +vtd: optimize CPU cache sync + +Some VT-d IOMMUs are non-coherent, which requires a cache write back +in order for the changes made by the CPU to be visible to the IOMMU. +This cache write back was unconditionally done using clflush, but there are +other more efficient instructions to do so, hence implement support +for them using the alternative framework. + +This is part of XSA-321. + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/drivers/passthrough/vtd/extern.h ++++ b/xen/drivers/passthrough/vtd/extern.h +@@ -68,7 +68,6 @@ int __must_check qinval_device_iotlb_syn + u16 did, u16 size, u64 addr); + + unsigned int get_cache_line_size(void); +-void cacheline_flush(char *); + void flush_all_cache(void); + + uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node); +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -31,6 +31,7 @@ + #include <xen/pci_regs.h> + #include <xen/keyhandler.h> + #include <asm/msi.h> ++#include <asm/nops.h> + #include <asm/irq.h> + #include <asm/hvm/vmx/vmx.h> + #include <asm/p2m.h> +@@ -154,7 +155,42 @@ static void sync_cache(const void *addr, + + addr -= (unsigned long)addr & (clflush_size - 1); + for ( ; addr < end; addr += clflush_size ) +- cacheline_flush((char *)addr); ++/* ++ * The arguments to a macro must not include preprocessor directives. Doing so ++ * results in undefined behavior, so we have to create some defines here in ++ * order to avoid it. ++ */ ++#if defined(HAVE_AS_CLWB) ++# define CLWB_ENCODING "clwb %[p]" ++#elif defined(HAVE_AS_XSAVEOPT) ++# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ ++#else ++# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ ++#endif ++ ++#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) ++#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) ++# define INPUT BASE_INPUT ++#else ++# define INPUT(addr) "a" (addr), BASE_INPUT(addr) ++#endif ++ /* ++ * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush ++ * + prefix than a clflush + nop, and hence the prefix is added instead ++ * of letting the alternative framework fill the gap by appending nops. ++ */ ++ alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]", ++ "data16 clflush %[p]", /* clflushopt */ ++ X86_FEATURE_CLFLUSHOPT, ++ CLWB_ENCODING, ++ X86_FEATURE_CLWB, /* no outputs */, ++ INPUT(addr)); ++#undef INPUT ++#undef BASE_INPUT ++#undef CLWB_ENCODING ++ ++ alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT, ++ "sfence", X86_FEATURE_CLWB); + } + + /* Allocate page table, return its machine address */ +--- a/xen/drivers/passthrough/vtd/x86/vtd.c ++++ b/xen/drivers/passthrough/vtd/x86/vtd.c +@@ -51,11 +51,6 @@ unsigned int get_cache_line_size(void) + return ((cpuid_ebx(1) >> 8) & 0xff) * 8; + } + +-void cacheline_flush(char * addr) +-{ +- clflush(addr); +-} +- + void flush_all_cache() + { + wbinvd(); |
