untrusted comment: signature from openbsd 6.1 base secret key
RWQEQa33SgQSEk3iUQjZBwKN4yXDFvjYuePYUJ88IGAlHIYDhp9eYqQik/wsL4UjnTymi1j3Orp4BPFWWEU3y7O/IjBA1IGUZgg=

OpenBSD 6.1 errata 002, May 2, 2017:

vmm(4) mismanaged floating point contexts.
 
Apply by doing:
    signify -Vep /etc/signify/openbsd-61-base.pub -x 002_vmmfpu.patch.sig \
        -m - | (cd /usr/src && patch -p0)

And then rebuild and install a new kernel:
    KK=`sysctl -n kern.osversion | cut -d# -f1`
    cd /usr/src/sys/arch/`machine`/compile/$KK
    make obj
    make config
    make
    make install

Index: sys/arch/amd64/amd64/fpu.c
===================================================================
--- sys/arch/amd64/amd64/fpu.c	21 Apr 2016 22:08:27 -0000	1.33
+++ sys/arch/amd64/amd64/fpu.c	27 Apr 2017 06:16:39 -0000	1.34
@@ -74,41 +74,10 @@
  * state is saved.
  */
 
-#define	fninit()		__asm("fninit")
-#define fwait()			__asm("fwait")
-#define fnclex()		__asm("fnclex")
-#define	fxsave(addr)		__asm("fxsave %0" : "=m" (*addr))
-#define	fxrstor(addr)		__asm("fxrstor %0" : : "m" (*addr))
-#define	ldmxcsr(addr)		__asm("ldmxcsr %0" : : "m" (*addr))
-#define fldcw(addr)		__asm("fldcw %0" : : "m" (*addr))
-#define	clts()			__asm("clts")
-#define	stts()			lcr0(rcr0() | CR0_TS)
-
 /*
  * The mask of enabled XSAVE features.
  */
 uint64_t	xsave_mask;
-
-static inline void
-xsave(struct savefpu *addr, uint64_t mask)
-{
-	uint32_t lo, hi;
-
-	lo = mask;
-	hi = mask >> 32;
-	__asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
-	    "memory");
-}
-
-static inline void
-xrstor(struct savefpu *addr, uint64_t mask)
-{
-	uint32_t lo, hi;
-
-	lo = mask;
-	hi = mask >> 32;
-	__asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
-}
 
 void fpudna(struct cpu_info *);
 static int x86fpflags_to_siginfo(u_int32_t);
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
--- sys/arch/amd64/amd64/vmm.c	26 Apr 2017 09:53:28 -0000	1.133
+++ sys/arch/amd64/amd64/vmm.c	27 Apr 2017 06:16:39 -0000	1.134
@@ -23,6 +23,7 @@
 #include <sys/device.h>
 #include <sys/pool.h>
 #include <sys/proc.h>
+#include <sys/user.h>
 #include <sys/ioctl.h>
 #include <sys/queue.h>
 #include <sys/rwlock.h>
@@ -31,6 +32,7 @@
 
 #include <uvm/uvm_extern.h>
 
+#include <machine/fpu.h>
 #include <machine/pmap.h>
 #include <machine/biosvar.h>
 #include <machine/segments.h>
@@ -145,6 +147,7 @@ int vcpu_vmx_check_cap(struct vcpu *, ui
 int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
 int vmx_get_exit_info(uint64_t *, uint64_t *);
 int vmx_handle_exit(struct vcpu *);
+int vmx_handle_xsetbv(struct vcpu *);
 int vmm_handle_cpuid(struct vcpu *);
 int vmx_handle_rdmsr(struct vcpu *);
 int vmx_handle_wrmsr(struct vcpu *);
@@ -360,7 +363,7 @@ vmm_attach(struct device *parent, struct
 
 	pool_init(&vm_pool, sizeof(struct vm), 0, IPL_NONE, PR_WAITOK,
 	    "vmpool", NULL);
-	pool_init(&vcpu_pool, sizeof(struct vcpu), 0, IPL_NONE, PR_WAITOK,
+	pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_NONE, PR_WAITOK,
 	    "vcpupl", NULL);
 
 	vmm_softc = sc;
@@ -2373,6 +2376,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
 	/* XXX CR0 shadow */
 	/* XXX CR4 shadow */
 
+	/* xcr0 power on default sets bit 0 (x87 state) */
+	vcpu->vc_gueststate.vg_xcr0 = XCR0_X87;
+
 	/* Flush the VMCS */
 	if (vmclear(&vcpu->vc_control_pa)) {
 		ret = EINVAL;
@@ -2498,7 +2504,7 @@ vcpu_init_vmx(struct vcpu *vcpu)
 	}
 
 	/* Host CR0 */
-	cr0 = rcr0();
+	cr0 = rcr0() & ~CR0_TS;
 	if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) {
 		ret = EINVAL;
 		goto exit;
@@ -3354,6 +3360,42 @@ vcpu_must_stop(struct vcpu *vcpu)
 }
 
 /*
+ * vmm_fpusave
+ *
+ * Modified version of fpusave_cpu from fpu.c that only saves the FPU context
+ * and does not call splipi/splx. Must be called with interrupts disabled.
+ */
+void
+vmm_fpusave(void)
+{
+	struct proc *p;
+	struct cpu_info *ci = curcpu();
+
+	p = ci->ci_fpcurproc;
+	if (p == NULL)
+		return;
+
+	if (ci->ci_fpsaving != 0)
+		panic("%s: recursive save!", __func__);
+	/*
+	 * Set ci->ci_fpsaving, so that any pending exception will be
+	 * thrown away.  (It will be caught again if/when the FPU
+	 * state is restored.)
+	 */
+	ci->ci_fpsaving = 1;
+	if (xsave_mask)
+		xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
+	else
+		fxsave(&p->p_addr->u_pcb.pcb_savefpu);
+	ci->ci_fpsaving = 0;
+
+	p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
+
+	p->p_addr->u_pcb.pcb_fpcpu = NULL;
+	ci->ci_fpcurproc = NULL;
+}
+
+/*
  * vcpu_run_vmx
  *
  * VMX main loop used to run a VCPU.
@@ -3404,6 +3446,8 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 			break;
 		case VMX_EXIT_CPUID:
 			break;
+		case VMX_EXIT_XSETBV:
+			break;
 #ifdef VMM_DEBUG
 		case VMX_EXIT_TRIPLE_FAULT:
 			DPRINTF("%s: vm %d vcpu %d triple fault\n",
@@ -3528,10 +3572,76 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 
 		/* Start / resume the VCPU */
 		KERNEL_ASSERT_LOCKED();
+
+		/* Disable interrupts and save the current FPU state. */
+		disable_intr();
+		clts();
+		vmm_fpusave();
+
+		/* Initialize the guest FPU if not inited already */
+		if (!vcpu->vc_fpuinited) {
+			fninit();
+			bzero(&vcpu->vc_g_fpu.fp_fxsave,
+			    sizeof(vcpu->vc_g_fpu.fp_fxsave));
+			vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
+			    __INITIAL_NPXCW__;
+			vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
+			    __INITIAL_MXCSR__;
+			fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+
+			vcpu->vc_fpuinited = 1;
+		}
+
+		if (xsave_mask) {
+			/* Restore guest XCR0 and FPU context */
+			if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
+				DPRINTF("%s: guest attempted to set invalid "
+				    "bits in xcr0\n", __func__);
+				ret = EINVAL;
+				stts();
+				enable_intr();
+				break;
+			}
+
+			/* Restore guest %xcr0 */
+			xrstor(&vcpu->vc_g_fpu, xsave_mask);
+			xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
+		} else
+			fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+
 		KERNEL_UNLOCK();
 		ret = vmx_enter_guest(&vcpu->vc_control_pa,
 		    &vcpu->vc_gueststate, resume);
 
+		/*
+		 * On exit, interrupts are disabled, and we are running with
+		 * the guest FPU state still possibly on the CPU. Save the FPU
+		 * state before re-enabling interrupts.
+		 */
+		if (xsave_mask) {
+			/* Save guest %xcr0 */
+			vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
+
+			/* Restore host %xcr0 */
+			xsetbv(0, xsave_mask);
+
+			/*
+			 * Save full copy of FPU state - guest content is
+			 * always a subset of host's save area (see xsetbv
+			 * exit handler)
+			 */	
+			xsave(&vcpu->vc_g_fpu, xsave_mask);
+		} else
+			fxsave(&vcpu->vc_g_fpu);
+
+		/*
+		 * FPU state is invalid, set CR0_TS to force DNA trap on next
+		 * access.
+		 */
+		stts();
+
+		enable_intr();
+
 		exit_reason = VM_EXIT_NONE;
 		if (ret == 0) {
 			/*
@@ -3545,6 +3655,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 				printf("%s: can't read guest rflags during "
 				    "exit\n", __func__);
 				ret = EINVAL;
+				KERNEL_LOCK();
 				break;
                         }
 		}
@@ -3826,6 +3937,10 @@ vmx_handle_exit(struct vcpu *vcpu)
 		ret = vmx_handle_wrmsr(vcpu);
 		update_rip = 1;
 		break;
+	case VMX_EXIT_XSETBV:
+		ret = vmx_handle_xsetbv(vcpu);
+		update_rip = 1;
+		break;
 	case VMX_EXIT_TRIPLE_FAULT:
 #ifdef VMM_DEBUG
 		DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
@@ -4351,6 +4466,62 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
 }
 
 /*
+ * vmx_handle_xsetbv
+ *
+ * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
+ * limited to the xsave_mask in use in the host.
+ *
+ * Parameters:
+ *  vcpu: vcpu structure containing instruction info causing the exit
+ *
+ * Return value:
+ *  0: The operation was successful
+ *  EINVAL: An error occurred
+ */
+int
+vmx_handle_xsetbv(struct vcpu *vcpu)
+{
+	uint64_t insn_length;
+	uint64_t *rax, *rdx, *rcx;;
+
+	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
+		printf("%s: can't obtain instruction length\n", __func__);
+		return (EINVAL);
+	}
+
+	/* All XSETBV instructions are 0x0F 0x01 0xD1 */
+	KASSERT(insn_length == 3);
+
+	rax = &vcpu->vc_gueststate.vg_rax;
+	rcx = &vcpu->vc_gueststate.vg_rcx;
+	rdx = &vcpu->vc_gueststate.vg_rdx;
+
+	if (*rcx != 0) {
+		DPRINTF("%s: guest specified invalid xcr register number "
+		    "%lld\n", __func__, *rcx);
+		/* XXX this should #GP(0) instead of killing the guest */
+		return (EINVAL);
+	}
+
+	/*
+	 * No bits in %edx are currently supported. Check this, and validate
+	 * against the host mask.
+	 */
+	if (*rdx != 0 || (*rax & ~xsave_mask)) {
+		DPRINTF("%s: guest specified invalid xcr0 content "
+		    "(0x%llx:0x%llx)\n", __func__, *rdx, *rax);
+		/* XXX this should #GP(0) instead of killing the guest */
+		return (EINVAL);
+	}
+
+	vcpu->vc_gueststate.vg_xcr0 = *rax;
+
+	vcpu->vc_gueststate.vg_rip += insn_length;
+
+	return (0);
+}
+	
+/*
  * vmx_handle_wrmsr
  *
  * Handler for wrmsr instructions. This handler logs the access, and discards
@@ -4413,6 +4584,7 @@ vmm_handle_cpuid(struct vcpu *vcpu)
 {
 	uint64_t insn_length;
 	uint64_t *rax, *rbx, *rcx, *rdx;
+	uint32_t eax, ebx, ecx, edx;
 
 	if (vmm_softc->mode == VMM_MODE_VMX ||
 	    vmm_softc->mode == VMM_MODE_EPT) {
@@ -4431,9 +4603,11 @@ vmm_handle_cpuid(struct vcpu *vcpu)
 	rcx = &vcpu->vc_gueststate.vg_rcx;
 	rdx = &vcpu->vc_gueststate.vg_rdx;
 
+	CPUID_LEAF(rax, 0, eax, ebx, ecx, edx);
+
 	switch (*rax) {
 	case 0x00:	/* Max level and vendor ID */
-		*rax = 0x07; /* cpuid_level */
+		*rax = 0x0d; /* cpuid_level */
 		*rbx = *((uint32_t *)&cpu_vendor);
 		*rdx = *((uint32_t *)&cpu_vendor + 1);
 		*rcx = *((uint32_t *)&cpu_vendor + 2);
@@ -4580,13 +4754,19 @@ vmm_handle_cpuid(struct vcpu *vcpu)
 		*rcx = 0;
 		*rdx = 0;
 		break;
-	case 0x0d:	/* Processor ext. state information (not supported) */
-		DPRINTF("%s: function 0x0d (ext. state info) not supported\n",
-		    __func__);
-		*rax = 0;
-		*rbx = 0;
-		*rcx = 0;
-		*rdx = 0;
+	case 0x0d:	/* Processor ext. state information */
+		if (*rcx == 0) {
+			*rax = xsave_mask;
+			*rbx = ebx;
+			*rcx = ecx;
+			*rdx = edx;
+		} else {
+			CPUID_LEAF(rax, *rcx, eax, ebx, ecx, edx);
+			*rax = eax;
+			*rbx = ebx;
+			*rcx = ecx;
+			*rdx = edx;
+		}
 		break;
 	case 0x0f:	/* QoS info (not supported) */
 		DPRINTF("%s: function 0x0f (QoS info) not supported\n",
Index: sys/arch/amd64/amd64/vmm_support.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm_support.S,v
--- sys/arch/amd64/amd64/vmm_support.S	25 Mar 2017 15:25:20 -0000	1.7
+++ sys/arch/amd64/amd64/vmm_support.S	27 Apr 2017 06:16:39 -0000	1.8
@@ -17,6 +17,7 @@
 
 #include "assym.h"
 #include <machine/asm.h>
+#include <machine/psl.h>
 #include <machine/specialreg.h>
 
 /*
@@ -154,6 +155,9 @@ skip_init:
 	 */
 
 	pushfq
+	popq	%rax
+	andq	$(~PSL_I), %rax
+	pushq	%rax
 
 	/*
 	 * Save (possibly) lazy-switched selectors
@@ -354,7 +358,6 @@ restore_host:
 	 * first. This is to accommodate possibly lazy-switched
 	 * selectors from above
 	 */
-	cli
 	popq	%rdx
 	popq	%rax
 	movq	$MSR_KERNELGSBASE, %rcx
@@ -371,7 +374,6 @@ restore_host:
 	popq	%rax
 	movq	$MSR_FSBASE, %rcx
 	wrmsr
-	sti
 
 	popw	%ax
 	movw	%ax, %ss
Index: sys/arch/amd64/include/cpufunc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
--- sys/arch/amd64/include/cpufunc.h	4 Sep 2016 09:22:28 -0000	1.13
+++ sys/arch/amd64/include/cpufunc.h	27 Apr 2017 06:16:39 -0000	1.14
@@ -333,6 +333,16 @@ xsetbv(uint32_t reg, uint64_t mask)
 	lo = mask;
 	hi = mask >> 32;
 	__asm volatile("xsetbv" :: "c" (reg), "a" (lo), "d" (hi) : "memory");
+}
+
+static __inline uint64_t
+xgetbv(uint32_t reg)
+{
+	uint32_t lo, hi;
+
+	__asm volatile("xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg));
+
+	return (((uint64_t)hi << 32) | (uint64_t)lo);
 }
 
 /* Break into DDB/KGDB. */
Index: sys/arch/amd64/include/fpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
--- sys/arch/amd64/include/fpu.h	25 Mar 2015 21:05:18 -0000	1.11
+++ sys/arch/amd64/include/fpu.h	27 Apr 2017 06:16:39 -0000	1.12
@@ -70,6 +70,37 @@ void fpusave_proc(struct proc *, int);
 void fpusave_cpu(struct cpu_info *, int);
 void fpu_kernel_enter(void);
 void fpu_kernel_exit(void);
+
+#define fninit()		__asm("fninit")
+#define fwait()			__asm("fwait")
+#define fnclex()		__asm("fnclex")
+#define fxsave(addr)		__asm("fxsave %0" : "=m" (*addr))
+#define fxrstor(addr)		__asm("fxrstor %0" : : "m" (*addr))
+#define ldmxcsr(addr)		__asm("ldmxcsr %0" : : "m" (*addr))
+#define fldcw(addr)		__asm("fldcw %0" : : "m" (*addr))
+#define clts()			__asm("clts")
+#define stts()			lcr0(rcr0() | CR0_TS)
+
+static inline void
+xsave(struct savefpu *addr, uint64_t mask)
+{
+	uint32_t lo, hi;
+
+	lo = mask;
+	hi = mask >> 32;
+	__asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
+	    "memory");
+}
+
+static inline void
+xrstor(struct savefpu *addr, uint64_t mask)
+{
+	uint32_t lo, hi;
+
+	lo = mask;
+	hi = mask >> 32;
+	__asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
+}
 
 #endif
 
Index: sys/arch/amd64/include/vmmvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
--- sys/arch/amd64/include/vmmvar.h	23 Mar 2017 08:05:58 -0000	1.32
+++ sys/arch/amd64/include/vmmvar.h	27 Apr 2017 06:16:39 -0000	1.33
@@ -638,6 +638,7 @@ struct vmx_gueststate
 	uint64_t	vg_rip;			/* 0x80 */
 	uint32_t	vg_exit_reason;		/* 0x88 */
 	uint64_t	vg_rflags;		/* 0x90 */
+	uint64_t	vg_xcr0;		/* 0x98 */
 };
 
 /*
@@ -649,6 +650,12 @@ struct vm;
  * Virtual CPU
  */
 struct vcpu {
+	/*
+	 * Guest FPU state - this must remain as the first member of the struct
+	 * to ensure 64-byte alignment (set up during vcpu_pool init)
+	 */
+	struct savefpu vc_g_fpu;
+
 	/* VMCS / VMCB pointer */
 	vaddr_t vc_control_va;
 	uint64_t vc_control_pa;
@@ -673,6 +680,10 @@ struct vcpu {
 
 	uint16_t vc_intr;
 	uint8_t vc_irqready;
+
+	uint8_t vc_fpuinited;
+
+	uint64_t vc_h_xcr0;
 
 	/* VMX only */
 	uint64_t vc_vmx_basic;