summaryrefslogtreecommitdiff
path: root/5f560c42-x86-PV-64bit-segbase-consistency.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-10-12 04:00:49 +0000
committerCoprDistGit <infra@openeuler.org>2023-10-12 04:00:49 +0000
commitc22f60e6e55f1bf300dd76d2222a93911f3b2bb2 (patch)
treeef665e7018377f53612ac2751dcaea35a1c587b6 /5f560c42-x86-PV-64bit-segbase-consistency.patch
parent39a4763249cd6289e5019acfe0c98dbb169f5f2e (diff)
automatic import of xenopeneuler22.03_LTS
Diffstat (limited to '5f560c42-x86-PV-64bit-segbase-consistency.patch')
-rw-r--r--5f560c42-x86-PV-64bit-segbase-consistency.patch212
1 files changed, 212 insertions, 0 deletions
diff --git a/5f560c42-x86-PV-64bit-segbase-consistency.patch b/5f560c42-x86-PV-64bit-segbase-consistency.patch
new file mode 100644
index 0000000..c068fd3
--- /dev/null
+++ b/5f560c42-x86-PV-64bit-segbase-consistency.patch
@@ -0,0 +1,212 @@
+# Commit a5eaac9245f4f382a3cd0e9710e9d1cba7db20e4
+# Date 2020-09-07 11:32:34 +0100
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Andrew Cooper <andrew.cooper3@citrix.com>
+x86/pv: Fix consistency of 64bit segment bases
+
+The comments in save_segments(), _toggle_guest_pt() and write_cr() are false.
+The %fs and %gs bases can be updated at any time by the guest.
+
+As a consequence, Xen's fs_base/etc tracking state is always stale when the
+vcpu is in context, and must not be used to complete MSR_{FS,GS}_BASE reads, etc.
+
+In particular, a sequence such as:
+
+ wrmsr(MSR_FS_BASE, 0x1ull << 32);
+ write_fs(__USER_DS);
+ base = rdmsr(MSR_FS_BASE);
+
+will return the stale base, not the new base. This may cause guest a guest
+kernel's context switching of userspace to malfunction.
+
+Therefore:
+ * Update save_segments(), _toggle_guest_pt() and read_msr() to always read
+ the segment bases from hardware.
+ * Update write_cr(), write_msr() and do_set_segment_base() to not not waste
+ time caching data which is instantly going to become stale again.
+ * Provide comments explaining when the tracking state is and isn't stale.
+
+This bug has been present for 14 years, but several bugfixes since have built
+on and extended the original flawed logic.
+
+Fixes: ba9adb737ba ("Apply stricter checking to RDMSR/WRMSR emulations.")
+Fixes: c42494acb2f ("x86: fix FS/GS base handling when using the fsgsbase feature")
+Fixed: eccc170053e ("x86/pv: Don't have %cr4.fsgsbase active behind a guest kernels back")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1546,6 +1546,16 @@ static void load_segments(struct vcpu *n
+ }
+ }
+
++/*
++ * Record all guest segment state. The guest can load segment selectors
++ * without trapping, which will also alter the 64bit FS/GS bases. Arbitrary
++ * changes to bases can also be made with the WR{FS,GS}BASE instructions, when
++ * enabled.
++ *
++ * Guests however cannot use SWAPGS, so there is no mechanism to modify the
++ * inactive GS base behind Xen's back. Therefore, Xen's copy of the inactive
++ * GS base is still accurate, and doesn't need reading back from hardware.
++ */
+ static void save_segments(struct vcpu *v)
+ {
+ struct cpu_user_regs *regs = &v->arch.user_regs;
+@@ -1556,14 +1566,15 @@ static void save_segments(struct vcpu *v
+ regs->fs = read_sreg(fs);
+ regs->gs = read_sreg(gs);
+
+- /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */
+- if ( (read_cr4() & X86_CR4_FSGSBASE) && !is_pv_32bit_vcpu(v) )
++ if ( !is_pv_32bit_vcpu(v) )
+ {
+- v->arch.pv.fs_base = __rdfsbase();
++ unsigned long gs_base = rdgsbase();
++
++ v->arch.pv.fs_base = rdfsbase();
+ if ( v->arch.flags & TF_kernel_mode )
+- v->arch.pv.gs_base_kernel = __rdgsbase();
++ v->arch.pv.gs_base_kernel = gs_base;
+ else
+- v->arch.pv.gs_base_user = __rdgsbase();
++ v->arch.pv.gs_base_user = gs_base;
+ }
+
+ if ( regs->ds )
+--- a/xen/arch/x86/pv/domain.c
++++ b/xen/arch/x86/pv/domain.c
+@@ -408,16 +408,19 @@ static void _toggle_guest_pt(struct vcpu
+
+ void toggle_guest_mode(struct vcpu *v)
+ {
++ unsigned long gs_base;
++
+ ASSERT(!is_pv_32bit_vcpu(v));
+
+- /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */
+- if ( read_cr4() & X86_CR4_FSGSBASE )
+- {
+- if ( v->arch.flags & TF_kernel_mode )
+- v->arch.pv.gs_base_kernel = __rdgsbase();
+- else
+- v->arch.pv.gs_base_user = __rdgsbase();
+- }
++ /*
++ * Update the cached value of the GS base about to become inactive, as a
++ * subsequent context switch won't bother re-reading it.
++ */
++ gs_base = rdgsbase();
++ if ( v->arch.flags & TF_kernel_mode )
++ v->arch.pv.gs_base_kernel = gs_base;
++ else
++ v->arch.pv.gs_base_user = gs_base;
+ asm volatile ( "swapgs" );
+
+ _toggle_guest_pt(v);
+--- a/xen/arch/x86/pv/emul-priv-op.c
++++ b/xen/arch/x86/pv/emul-priv-op.c
+@@ -779,17 +779,6 @@ static int write_cr(unsigned int reg, un
+ }
+
+ case 4: /* Write CR4 */
+- /*
+- * If this write will disable FSGSBASE, refresh Xen's idea of the
+- * guest bases now that they can no longer change.
+- */
+- if ( (curr->arch.pv.ctrlreg[4] & X86_CR4_FSGSBASE) &&
+- !(val & X86_CR4_FSGSBASE) )
+- {
+- curr->arch.pv.fs_base = __rdfsbase();
+- curr->arch.pv.gs_base_kernel = __rdgsbase();
+- }
+-
+ curr->arch.pv.ctrlreg[4] = pv_fixup_guest_cr4(curr, val);
+ write_cr4(pv_make_cr4(curr));
+ ctxt_switch_levelling(curr);
+@@ -838,15 +827,13 @@ static int read_msr(unsigned int reg, ui
+ case MSR_FS_BASE:
+ if ( is_pv_32bit_domain(currd) )
+ break;
+- *val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdfsbase()
+- : curr->arch.pv.fs_base;
++ *val = rdfsbase();
+ return X86EMUL_OKAY;
+
+ case MSR_GS_BASE:
+ if ( is_pv_32bit_domain(currd) )
+ break;
+- *val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdgsbase()
+- : curr->arch.pv.gs_base_kernel;
++ *val = rdgsbase();
+ return X86EMUL_OKAY;
+
+ case MSR_SHADOW_GS_BASE:
+@@ -975,14 +962,12 @@ static int write_msr(unsigned int reg, u
+ if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
+ break;
+ wrfsbase(val);
+- curr->arch.pv.fs_base = val;
+ return X86EMUL_OKAY;
+
+ case MSR_GS_BASE:
+ if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
+ break;
+ wrgsbase(val);
+- curr->arch.pv.gs_base_kernel = val;
+ return X86EMUL_OKAY;
+
+ case MSR_SHADOW_GS_BASE:
+--- a/xen/arch/x86/x86_64/mm.c
++++ b/xen/arch/x86/x86_64/mm.c
+@@ -1027,10 +1027,7 @@ long do_set_segment_base(unsigned int wh
+ {
+ case SEGBASE_FS:
+ if ( is_canonical_address(base) )
+- {
+ wrfsbase(base);
+- v->arch.pv.fs_base = base;
+- }
+ else
+ ret = -EINVAL;
+ break;
+@@ -1047,10 +1044,7 @@ long do_set_segment_base(unsigned int wh
+
+ case SEGBASE_GS_KERNEL:
+ if ( is_canonical_address(base) )
+- {
+ wrgsbase(base);
+- v->arch.pv.gs_base_kernel = base;
+- }
+ else
+ ret = -EINVAL;
+ break;
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -505,7 +505,24 @@ struct pv_vcpu
+ bool_t syscall32_disables_events;
+ bool_t sysenter_disables_events;
+
+- /* Segment base addresses. */
++ /*
++ * 64bit segment bases.
++ *
++ * FS and the active GS are always stale when the vCPU is in context, as
++ * the guest can change them behind Xen's back with MOV SREG, or
++ * WR{FS,GS}BASE on capable hardware.
++ *
++ * The inactive GS base is never stale, as guests can't use SWAPGS to
++ * access it - all modification is performed by Xen either directly
++ * (hypercall, #GP emulation), or indirectly (toggle_guest_mode()).
++ *
++ * The vCPU context switch path is optimised based on this fact, so any
++ * path updating or swapping the inactive base must update the cached
++ * value as well.
++ *
++ * Which GS base is active and inactive depends on whether the vCPU is in
++ * user or kernel context.
++ */
+ unsigned long fs_base;
+ unsigned long gs_base_kernel;
+ unsigned long gs_base_user;