untrusted comment: signature from openbsd 6.2 base secret key
RWRVWzAMgtyg7h6Z/ES+ftCrC3y4jz05b9Q4N4uIZDqQEzb7lw6vB6BGumpp3us1ydI/8HGsYSlzPUl7ai/pMISPf6LswZDJZAI=

OpenBSD 6.2 errata 017, June 24, 2018:

Intel CPUs speculatively access FPU registers even when FPU is disabled,
so data (including AES keys) from previous contexts could be discovered
if using lazy-save approach.  Switch to eager-saving approach.

Apply by doing:
    signify -Vep /etc/signify/openbsd-62-base.pub -x 017_intelfpu.patch.sig \
        -m - | (cd /usr/src && patch -p0)

And then rebuild and install the kernel:
    KK=`sysctl -n kern.osversion | cut -d# -f1`
    cd /usr/src/sys/arch/`machine`/compile/$KK
    make obj
    make config
    make
    make install

Index: sys/arch/amd64/amd64/acpi_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
retrieving revision 1.78
diff -u -p -r1.78 acpi_machdep.c
--- sys/arch/amd64/amd64/acpi_machdep.c	27 Mar 2017 18:32:53 -0000	1.78
+++ sys/arch/amd64/amd64/acpi_machdep.c	21 Jun 2018 11:54:01 -0000
@@ -389,7 +389,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, in
 	 */
 	if (acpi_savecpu()) {
 		/* Suspend path */
-		fpusave_cpu(curcpu(), 1);
+		KASSERT((curcpu()->ci_flags & CPUF_USERXSTATE) == 0);
 		wbinvd();
 
 #ifdef HIBERNATE
@@ -416,6 +416,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, in
 		return (ECANCELED);
 	}
 	/* Resume path */
+	fpureset();
 
 	/* Reset the vectors */
 	sc->sc_facs->wakeup_vector = 0;
Index: sys/arch/amd64/amd64/acpi_wakecode.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_wakecode.S,v
retrieving revision 1.41
diff -u -p -r1.41 acpi_wakecode.S
--- sys/arch/amd64/amd64/acpi_wakecode.S	30 Aug 2017 23:40:22 -0000	1.41
+++ sys/arch/amd64/amd64/acpi_wakecode.S	21 Jun 2018 11:54:01 -0000
@@ -217,7 +217,7 @@ _C_LABEL(acpi_protected_mode_resume):
 
 	/* Reenable paging by setting the appropriate bits in CR0 */
 	movl    %cr0,%eax
-	orl     $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%eax
+	orl	$CR0_DEFAULT,%eax
 	movl    %eax,%cr0
 
 	/* Flush the prefetch queue again */
Index: sys/arch/amd64/amd64/aesni.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/aesni.c,v
retrieving revision 1.42
diff -u -p -r1.42 aesni.c
--- sys/arch/amd64/amd64/aesni.c	8 Sep 2017 05:36:51 -0000	1.42
+++ sys/arch/amd64/amd64/aesni.c	21 Jun 2018 11:54:01 -0000
@@ -256,7 +256,9 @@ aesni_newsession(u_int32_t *sidp, struct
 			bzero(ses->ses_ghash->Z, GMAC_BLOCK_LEN);
 
 			/* prepare a hash subkey */
+			fpu_kernel_enter();
 			aesni_enc(ses, ses->ses_ghash->H, ses->ses_ghash->H);
+			fpu_kernel_exit();
 			break;
 
 		case CRYPTO_MD5_HMAC:
Index: sys/arch/amd64/amd64/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v
retrieving revision 1.49
diff -u -p -r1.49 autoconf.c
--- sys/arch/amd64/amd64/autoconf.c	20 Jun 2017 21:05:46 -0000	1.49
+++ sys/arch/amd64/amd64/autoconf.c	21 Jun 2018 11:54:01 -0000
@@ -138,10 +138,6 @@ cpu_configure(void)
 
 	unmap_startup();
 
-#ifdef MULTIPROCESSOR
-	cpu_init_idle_pcbs();
-#endif
-
 	lcr8(0);
 	spl0();
 	cold = 0;
Index: sys/arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.107.2.1
diff -u -p -r1.107.2.1 cpu.c
--- sys/arch/amd64/amd64/cpu.c	26 Feb 2018 12:29:48 -0000	1.107.2.1
+++ sys/arch/amd64/amd64/cpu.c	21 Jun 2018 11:54:01 -0000
@@ -70,6 +70,7 @@
 #include "pvbus.h"
 
 #include <sys/param.h>
+#include <sys/proc.h>
 #include <sys/timeout.h>
 #include <sys/systm.h>
 #include <sys/device.h>
@@ -77,6 +78,7 @@
 #include <sys/memrange.h>
 #include <dev/rndvar.h>
 #include <sys/atomic.h>
+#include <sys/user.h>
 
 #include <uvm/uvm_extern.h>
 
@@ -409,7 +411,6 @@ cpu_attach(struct device *parent, struct
 	pcb->pcb_kstack = kstack + USPACE - 16;
 	pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE - 16;
 	pcb->pcb_pmap = pmap_kernel();
-	pcb->pcb_cr0 = rcr0();
 	pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa;
 #endif
 
@@ -491,6 +492,28 @@ cpu_attach(struct device *parent, struct
 #endif /* NVMM > 0 */
 }
 
+static void
+replacexsave(void)
+{
+	extern long _xrstor, _xsave, _xsaveopt;
+	u_int32_t eax, ebx, ecx, edx;
+	static int replacedone = 0;
+	int s;
+
+	if (replacedone)
+		return;
+	replacedone = 1;
+
+	/* find out whether xsaveopt is supported */
+	CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx);
+	s = splhigh();
+	codepatch_replace(CPTAG_XRSTOR, &_xrstor, 4);
+	codepatch_replace(CPTAG_XSAVE,
+	    (eax & XSAVE_XSAVEOPT) ? &_xsaveopt : &_xsave, 4);
+	splx(s);
+}
+
+
 /*
  * Initialize the processor appropriately.
  */
@@ -498,6 +521,7 @@ cpu_attach(struct device *parent, struct
 void
 cpu_init(struct cpu_info *ci)
 {
+	struct savefpu *sfp;
 	u_int cr4;
 
 	/* configure the CPU if needed */
@@ -509,7 +533,6 @@ cpu_init(struct cpu_info *ci)
 	 */
 	patinit(ci);
 
-	lcr0(rcr0() | CR0_WP);
 	cr4 = rcr4() | CR4_DEFAULT;
 	if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP)
 		cr4 |= CR4_SMEP;
@@ -519,7 +542,7 @@ cpu_init(struct cpu_info *ci)
 		cr4 |= CR4_FSGSBASE;
 	if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP)
 		cr4 |= CR4_UMIP;
-	if (cpu_ecxfeature & CPUIDECX_XSAVE)
+	if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd)
 		cr4 |= CR4_OSXSAVE;
 	lcr4(cr4);
 
@@ -532,9 +555,25 @@ cpu_init(struct cpu_info *ci)
 			xsave_mask |= XCR0_AVX;
 		xsetbv(0, xsave_mask);
 		CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx);
-		fpu_save_len = ebx;
+		if (CPU_IS_PRIMARY(ci)) {
+			fpu_save_len = ebx;
+			KASSERT(fpu_save_len <= sizeof(struct savefpu));
+		} else {
+			KASSERT(ebx == fpu_save_len);
+		}
+
+		replacexsave();
 	}
 
+	/* Give proc0 a clean FPU save area */
+	sfp = &proc0.p_addr->u_pcb.pcb_savefpu;
+	memset(sfp, 0, fpu_save_len);
+	if (xsave_mask) {
+		/* must not use xsaveopt here */
+		xsave(sfp, xsave_mask);
+	} else
+		fxsave(sfp);
+
 #if NVMM > 0
 	/* Re-enable VMM if needed */
 	if (ci->ci_flags & CPUF_VMM)
@@ -602,24 +641,6 @@ cpu_boot_secondary_processors(void)
 }
 
 void
-cpu_init_idle_pcbs(void)
-{
-	struct cpu_info *ci;
-	u_long i;
-
-	for (i=0; i < MAXCPUS; i++) {
-		ci = cpu_info[i];
-		if (ci == NULL)
-			continue;
-		if (ci->ci_idle_pcb == NULL)
-			continue;
-		if ((ci->ci_flags & CPUF_PRESENT) == 0)
-			continue;
-		x86_64_init_pcb_tss_ldt(ci);
-	}
-}
-
-void
 cpu_start_secondary(struct cpu_info *ci)
 {
 	int i;
@@ -738,7 +759,6 @@ cpu_hatch(void *v)
 		panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
-	lcr0(ci->ci_idle_pcb->pcb_cr0);
 	cpu_init_idt();
 	lapic_set_lvt();
 	gdt_init_cpu(ci);
@@ -780,15 +800,14 @@ cpu_debug_dump(void)
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
 
-	db_printf("addr		dev	id	flags	ipis	curproc		fpcurproc\n");
+	db_printf("addr		dev	id	flags	ipis	curproc\n");
 	CPU_INFO_FOREACH(cii, ci) {
-		db_printf("%p	%s	%u	%x	%x	%10p	%10p\n",
+		db_printf("%p	%s	%u	%x	%x	%10p\n",
 		    ci,
 		    ci->ci_dev == NULL ? "BOOT" : ci->ci_dev->dv_xname,
 		    ci->ci_cpuid,
 		    ci->ci_flags, ci->ci_ipis,
-		    ci->ci_curproc,
-		    ci->ci_fpcurproc);
+		    ci->ci_curproc);
 	}
 }
 #endif
Index: sys/arch/amd64/amd64/db_interface.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/db_interface.c,v
retrieving revision 1.29
diff -u -p -r1.29 db_interface.c
--- sys/arch/amd64/amd64/db_interface.c	19 Jul 2017 14:34:10 -0000	1.29
+++ sys/arch/amd64/amd64/db_interface.c	21 Jun 2018 11:54:01 -0000
@@ -66,8 +66,8 @@
 #endif
 
 extern label_t *db_recover;
-extern char *trap_type[];
-extern int trap_types;
+extern const char * const trap_type[];
+extern const int trap_types;
 
 #ifdef MULTIPROCESSOR
 struct mutex ddb_mp_mutex =
Index: sys/arch/amd64/amd64/fpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
retrieving revision 1.37
diff -u -p -r1.37 fpu.c
--- sys/arch/amd64/amd64/fpu.c	4 Oct 2017 02:10:33 -0000	1.37
+++ sys/arch/amd64/amd64/fpu.c	21 Jun 2018 11:54:01 -0000
@@ -53,35 +53,13 @@
 #include <machine/specialreg.h>
 #include <machine/fpu.h>
 
-#include <dev/isa/isavar.h>
-
-int	xrstor_user(struct savefpu *_addr, uint64_t _mask);
 void	trap(struct trapframe *);
 
 /*
- * We do lazy initialization and switching using the TS bit in cr0 and the
- * MDP_USEDFPU bit in mdproc.
- *
- * DNA exceptions are handled like this:
- *
- * 1) If there is no FPU, return and go to the emulator.
- * 2) If someone else has used the FPU, save its state into that process' PCB.
- * 3a) If MDP_USEDFPU is not set, set it and initialize the FPU.
- * 3b) Otherwise, reload the process' previous FPU state.
- *
- * When a process is created or exec()s, its saved cr0 image has the TS bit
- * set and the MDP_USEDFPU bit clear.  The MDP_USEDFPU bit is set when the
- * process first gets a DNA and the FPU is initialized.  The TS bit is turned
- * off when the FPU is used, and turned on again later when the process' FPU
- * state is saved.
- */
-
-/*
  * The mask of enabled XSAVE features.
  */
 uint64_t	xsave_mask;
 
-void fpudna(struct cpu_info *, struct trapframe *);
 static int x86fpflags_to_siginfo(u_int32_t);
 
 /*
@@ -101,7 +79,6 @@ uint32_t	fpu_mxcsr_mask;
 void
 fpuinit(struct cpu_info *ci)
 {
-	lcr0(rcr0() & ~(CR0_EM|CR0_TS));
 	fninit();
 	if (fpu_mxcsr_mask == 0) {
 		struct fxsave64 fx __attribute__((aligned(16)));
@@ -113,7 +90,6 @@ fpuinit(struct cpu_info *ci)
 		else
 			fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
 	}
-	lcr0(rcr0() | (CR0_TS));
 }
 
 /*
@@ -126,23 +102,18 @@ fpuinit(struct cpu_info *ci)
 void
 fputrap(struct trapframe *frame)
 {
-	struct proc *p = curcpu()->ci_fpcurproc;
+	struct cpu_info *ci = curcpu();
+	struct proc *p = curproc;
 	struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
 	u_int32_t mxcsr, statbits;
 	u_int16_t cw;
 	int code;
 	union sigval sv;
 
-#ifdef DIAGNOSTIC
-	/*
-	 * At this point, fpcurproc should be curproc.  If it wasn't,
-	 * the TS bit should be set, and we should have gotten a DNA exception.
-	 */
-	if (p != curproc)
-		panic("fputrap: wrong proc");
-#endif
+	KASSERT(ci->ci_flags & CPUF_USERXSTATE);
+	ci->ci_flags &= ~CPUF_USERXSTATE;
+	fpusavereset(sfp);
 
-	fxsave(sfp);
 	if (frame->tf_trapno == T_XMM) {
 		mxcsr = sfp->fp_fxsave.fx_mxcsr;
 	  	statbits = mxcsr;
@@ -187,211 +158,21 @@ x86fpflags_to_siginfo(u_int32_t flags)
         return (FPE_FLTINV);
 }
 
-/*
- * Implement device not available (DNA) exception
- *
- * If we were the last process to use the FPU, we can simply return.
- * Otherwise, we save the previous state, if necessary, and restore our last
- * saved state.
- */
-void
-fpudna(struct cpu_info *ci, struct trapframe *frame)
-{
-	struct savefpu *sfp;
-	struct proc *p;
-	int s;
-
-	if (ci->ci_fpsaving) {
-		printf("recursive fpu trap; cr0=%x\n", rcr0());
-		return;
-	}
-
-	s = splipi();
-
-#ifdef MULTIPROCESSOR
-	p = ci->ci_curproc;
-#else
-	p = curproc;
-#endif
-
-	/*
-	 * Initialize the FPU state to clear any exceptions.  If someone else
-	 * was using the FPU, save their state.
-	 */
-	if (ci->ci_fpcurproc != NULL && ci->ci_fpcurproc != p) {
-		fpusave_cpu(ci, ci->ci_fpcurproc != &proc0);
-		uvmexp.fpswtch++;
-	}
-	splx(s);
-
-	if (p == NULL) {
-		clts();
-		return;
-	}
-
-	KDASSERT(ci->ci_fpcurproc == NULL);
-#ifndef MULTIPROCESSOR
-	KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
-#else
-	if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-		fpusave_proc(p, 1);
-#endif
-
-	p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
-	clts();
-
-	s = splipi();
-	ci->ci_fpcurproc = p;
-	p->p_addr->u_pcb.pcb_fpcpu = ci;
-	splx(s);
-
-	sfp = &p->p_addr->u_pcb.pcb_savefpu;
-
-	if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
-		fninit();
-		bzero(&sfp->fp_fxsave, sizeof(sfp->fp_fxsave));
-		sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__;
-		sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
-		fxrstor(&sfp->fp_fxsave);
-		p->p_md.md_flags |= MDP_USEDFPU;
-	} else {
-		if (xsave_mask) {
-			if (xrstor_user(sfp, xsave_mask)) {
-				fpusave_proc(p, 0);	/* faulted */
-				frame->tf_trapno = T_PROTFLT;
-				trap(frame);
-				return;
-			}
-		} else {
-			static double	zero = 0.0;
-
-			/*
-			 * amd fpu does not restore fip, fdp, fop on fxrstor
-			 * thus leaking other process's execution history.
-			 */
-			fnclex();
-			__asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero));
-			fxrstor(sfp);
-		}
-	}
-}
-
-
-void
-fpusave_cpu(struct cpu_info *ci, int save)
-{
-	struct proc *p;
-	int s;
-
-	KDASSERT(ci == curcpu());
-
-	p = ci->ci_fpcurproc;
-	if (p == NULL)
-		return;
-
-	if (save) {
-#ifdef DIAGNOSTIC
-		if (ci->ci_fpsaving != 0)
-			panic("fpusave_cpu: recursive save!");
-#endif
-		/*
-		 * Set ci->ci_fpsaving, so that any pending exception will be
-		 * thrown away.  (It will be caught again if/when the FPU
-		 * state is restored.)
-		 */
-		clts();
-		ci->ci_fpsaving = 1;
-		if (xsave_mask)
-			xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
-		else
-			fxsave(&p->p_addr->u_pcb.pcb_savefpu);
-		ci->ci_fpsaving = 0;
-	}
-
-	stts();
-	p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
-
-	s = splipi();
-	p->p_addr->u_pcb.pcb_fpcpu = NULL;
-	ci->ci_fpcurproc = NULL;
-	splx(s);
-}
-
-/*
- * Save p's FPU state, which may be on this processor or another processor.
- */
-void
-fpusave_proc(struct proc *p, int save)
-{
-	struct cpu_info *ci = curcpu();
-	struct cpu_info *oci;
-
-	KDASSERT(p->p_addr != NULL);
-
-	oci = p->p_addr->u_pcb.pcb_fpcpu;
-	if (oci == NULL)
-		return;
-
-#if defined(MULTIPROCESSOR)
-	if (oci == ci) {
-		int s = splipi();
-		fpusave_cpu(ci, save);
-		splx(s);
-	} else {
-		oci->ci_fpsaveproc = p;
-		x86_send_ipi(oci,
-	    	    save ? X86_IPI_SYNCH_FPU : X86_IPI_FLUSH_FPU);
-		while (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-			CPU_BUSY_CYCLE();
-	}
-#else
-	KASSERT(ci->ci_fpcurproc == p);
-	fpusave_cpu(ci, save);
-#endif
-}
-
 void
 fpu_kernel_enter(void)
 {
-	struct cpu_info	*ci = curcpu();
-	uint32_t	 cw;
-	int		 s;
-
-	/*
-	 * Fast path.  If the kernel was using the FPU before, there
-	 * is no work to do besides clearing TS.
-	 */
-	if (ci->ci_fpcurproc == &proc0) {
-		clts();
-		return;
-	}
-
-	s = splipi();
+	struct cpu_info *ci = curcpu();
 
-	if (ci->ci_fpcurproc != NULL) {
-		fpusave_cpu(ci, 1);
-		uvmexp.fpswtch++;
+	/* save curproc's FPU state if we haven't already */
+	if (ci->ci_flags & CPUF_USERXSTATE) {
+		ci->ci_flags &= ~CPUF_USERXSTATE;
+		fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
 	}
-
-	/* Claim the FPU */
-	ci->ci_fpcurproc = &proc0;
-
-	splx(s);
-
-	/* Disable DNA exceptions */
-	clts();
-
-	/* Initialize the FPU */
-	fninit();
-	cw = __INITIAL_NPXCW__;
-	fldcw(&cw);
-	cw = __INITIAL_MXCSR__;
-	ldmxcsr(&cw);
 }
 
 void
 fpu_kernel_exit(void)
 {
-	/* Enable DNA exceptions */
-	stts();
+	/* make sure we don't leave anything in the registers */
+	fpureset();
 }
Index: sys/arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.31.8.1
diff -u -p -r1.31.8.1 genassym.cf
--- sys/arch/amd64/amd64/genassym.cf	26 Feb 2018 12:29:48 -0000	1.31.8.1
+++ sys/arch/amd64/amd64/genassym.cf	21 Jun 2018 11:54:01 -0000
@@ -94,9 +94,8 @@ member	pcb_rbp
 member	pcb_kstack
 member	pcb_fsbase
 member	pcb_onfault
-member	pcb_fpcpu
 member	pcb_pmap
-member	pcb_cr0
+member	pcb_savefpu
 
 struct pmap
 member	pm_cpus
@@ -131,7 +130,8 @@ member	CPU_INFO_USER_CR3	ci_user_cr3
 member	CPU_INFO_KERN_RSP	ci_kern_rsp
 member	CPU_INFO_INTR_RSP	ci_intr_rsp
 
-export	CPUF_USERSEGS_BIT
+export	CPUF_USERSEGS
+export	CPUF_USERXSTATE
 
 struct	intrsource
 member	is_recurse
Index: sys/arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.87.2.1
diff -u -p -r1.87.2.1 identcpu.c
--- sys/arch/amd64/amd64/identcpu.c	26 Feb 2018 12:29:48 -0000	1.87.2.1
+++ sys/arch/amd64/amd64/identcpu.c	21 Jun 2018 11:54:01 -0000
@@ -217,6 +217,11 @@ const struct {
 	{ CPUIDEDX_ITSC,	"ITSC" },
 }, cpu_amdspec_ebxfeatures[] = {
 	{ CPUIDEBX_IBPB,	"IBPB" },
+}, cpu_xsave_extfeatures[] = {
+	{ XSAVE_XSAVEOPT,	"XSAVEOPT" },
+	{ XSAVE_XSAVEC,		"XSAVEC" },
+	{ XSAVE_XGETBV1,	"XGETBV1" },
+	{ XSAVE_XSAVES,		"XSAVES" },
 };
 
 int
@@ -651,6 +656,14 @@ identifycpu(struct cpu_info *ci)
 					printf(",%s",
 					    cpu_amdspec_ebxfeatures[i].str);
 		}
+	}
+
+	/* xsave subfeatures */
+	if (cpuid_level >= 0xd) {
+		CPUID_LEAF(0xd, 1, val, dummy, dummy, dummy);
+		for (i = 0; i < nitems(cpu_xsave_extfeatures); i++)
+			if (val & cpu_xsave_extfeatures[i].bit)
+				printf(",%s", cpu_xsave_extfeatures[i].str);
 	}
 
 	if (cpu_meltdown)
Index: sys/arch/amd64/amd64/ipifuncs.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v
retrieving revision 1.28
diff -u -p -r1.28 ipifuncs.c
--- sys/arch/amd64/amd64/ipifuncs.c	23 Nov 2015 22:57:12 -0000	1.28
+++ sys/arch/amd64/amd64/ipifuncs.c	21 Jun 2018 11:54:01 -0000
@@ -62,9 +62,6 @@
 void x86_64_ipi_nop(struct cpu_info *);
 void x86_64_ipi_halt(struct cpu_info *);
 
-void x86_64_ipi_synch_fpu(struct cpu_info *);
-void x86_64_ipi_flush_fpu(struct cpu_info *);
-
 #if NVMM > 0
 void x86_64_ipi_start_vmm(struct cpu_info *);
 void x86_64_ipi_stop_vmm(struct cpu_info *);
@@ -85,8 +82,8 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf
 {
 	x86_64_ipi_halt,
 	x86_64_ipi_nop,
-	x86_64_ipi_flush_fpu,
-	x86_64_ipi_synch_fpu,
+	NULL,
+	NULL,
 	NULL,
 	x86_64_ipi_reload_mtrr,
 	x86_setperf_ipi,
@@ -115,7 +112,6 @@ x86_64_ipi_halt(struct cpu_info *ci)
 	SCHED_ASSERT_UNLOCKED();
 	KASSERT(!__mp_lock_held(&kernel_lock));
 	
-	fpusave_cpu(ci, 1);
 	disable_intr();
 	lapic_disable();
 	wbinvd();
@@ -125,20 +121,6 @@ x86_64_ipi_halt(struct cpu_info *ci)
 	for(;;) {
 		__asm volatile("hlt");
 	}
-}
-
-void
-x86_64_ipi_flush_fpu(struct cpu_info *ci)
-{
-	if (ci->ci_fpsaveproc == ci->ci_fpcurproc)
-		fpusave_cpu(ci, 0);
-}
-
-void
-x86_64_ipi_synch_fpu(struct cpu_info *ci)
-{
-	if (ci->ci_fpsaveproc == ci->ci_fpcurproc)
-		fpusave_cpu(ci, 1);
 }
 
 #ifdef MTRR
Index: sys/arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.89.2.1
diff -u -p -r1.89.2.1 locore.S
--- sys/arch/amd64/amd64/locore.S	26 Feb 2018 12:29:48 -0000	1.89.2.1
+++ sys/arch/amd64/amd64/locore.S	21 Jun 2018 11:54:01 -0000
@@ -113,10 +113,11 @@
 #include <sys/syscall.h>
 
 #include <machine/param.h>
+#include <machine/codepatch.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
-#include <machine/trap.h>
+#include <machine/trap.h>			/* T_PROTFLT */
 #include <machine/frameasm.h>
 
 #if NLAPIC > 0
@@ -345,7 +346,12 @@ ENTRY(cpu_switchto)
 	movb	$SONPROC,P_STAT(%r12)	# p->p_stat = SONPROC
 	SET_CURPROC(%r12,%rcx)
 
-	movl	CPUVAR(CPUID),%edi
+	movl	CPUVAR(CPUID),%r9d
+
+	/* for the FPU/"extended CPU state" handling below */
+	movq	xsave_mask(%rip),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
 
 	/* If old proc exited, don't bother. */
 	testq	%r13,%r13
@@ -358,7 +364,7 @@ ENTRY(cpu_switchto)
 	 *   %rax, %rcx - scratch
 	 *   %r13 - old proc, then old pcb
 	 *   %r12 - new proc
-	 *   %edi - cpuid
+	 *   %r9d - cpuid
 	 */
 
 	movq	P_ADDR(%r13),%r13
@@ -366,16 +372,46 @@ ENTRY(cpu_switchto)
 	/* clear the old pmap's bit for the cpu */
 	movq	PCB_PMAP(%r13),%rcx
 	lock
-	btrq	%rdi,PM_CPUS(%rcx)
+	btrq	%r9,PM_CPUS(%rcx)
 
 	/* Save stack pointers. */
 	movq	%rsp,PCB_RSP(%r13)
 	movq	%rbp,PCB_RBP(%r13)
 
+	/*
+	 * If the old proc ran in userspace then save the
+	 * floating-point/"extended state" registers
+	 */
+	testl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
+	jz	.Lxstate_reset
+
+	movq	%r13, %rdi
+#if PCB_SAVEFPU != 0
+	addq	$PCB_SAVEFPU,%rdi
+#endif
+	CODEPATCH_START
+	.byte 0x48; fxsave	(%rdi)		/* really fxsave64 */
+	CODEPATCH_END(CPTAG_XSAVE)
+
 switch_exited:
-	/* did old proc run in userspace?  then reset the segment regs */
-	btrl	$CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
-	jnc	restore_saved
+	/* now clear the xstate */
+	movq	proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+	addq	$PCB_SAVEFPU,%rdi
+#endif
+	CODEPATCH_START
+	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
+	CODEPATCH_END(CPTAG_XRSTOR)
+	andl	$~CPUF_USERXSTATE,CPUVAR(FLAGS)
+
+.Lxstate_reset:
+	/*
+	 * If the segment registers haven't been reset since the old proc
+	 * ran in userspace then reset them now
+	 */
+	testl	$CPUF_USERSEGS,CPUVAR(FLAGS)
+	jz	restore_saved
+	andl	$~CPUF_USERSEGS,CPUVAR(FLAGS)
 
 	/* set %ds, %es, %fs, and %gs to expected value to prevent info leak */
 	movw	$(GSEL(GUDATA_SEL, SEL_UPL)),%ax
@@ -432,32 +468,17 @@ restore_saved:
 0:
 
 	/* set the new pmap's bit for the cpu */
-	movl	CPUVAR(CPUID),%edi
 	lock
-	btsq	%rdi,PM_CPUS(%rcx)
+	btsq	%r9,PM_CPUS(%rcx)
 #ifdef DIAGNOSTIC
 	jc	_C_LABEL(switch_pmcpu_set)
 #endif
 
 switch_restored:
-	/* Restore cr0 (including FPU state). */
-	movl	PCB_CR0(%r13),%ecx
-#ifdef MULTIPROCESSOR
-	movq	PCB_FPCPU(%r13),%r8
-	cmpq	CPUVAR(SELF),%r8
-	jz	1f
-	orl	$CR0_TS,%ecx
-1:
-#endif
-	movq	%rcx,%cr0
-
 	SET_CURPCB(%r13)
 
 	/* Interrupts are okay again. */
 	sti
-
-switch_return:
-
 	popq	%r15
 	popq	%r14
 	popq	%r13
@@ -497,7 +518,7 @@ ENTRY(cpu_idle_leave)
 
 #ifdef DIAGNOSTIC
 NENTRY(switch_pmcpu_set)
-	movabsq	$switch_active,%rdi
+	leaq	switch_active(%rip),%rdi
 	call	_C_LABEL(panic)
 	/* NOTREACHED */
 
@@ -529,7 +550,7 @@ IDTVEC(syscall)
 	 * %rip and the original rflags has been copied to %r11.  %cs and
 	 * %ss have been updated to the kernel segments, but %rsp is still
 	 * the user-space value.
-	 * First order of business is to swap to the kernel gs.base so that
+	 * First order of business is to swap to the kernel GS.base so that
 	 * we can access our struct cpu_info and use the scratch space there
 	 * to switch to the kernel page tables (thank you, Intel), then
 	 * switch to our kernel stack.  Once that's in place we can
@@ -563,7 +584,7 @@ NENTRY(Xsyscall_untramp)
 	movq	%r11, TF_RFLAGS(%rsp)	/* old rflags from syscall insn */
 	movq	$(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
 	movq	%rcx,TF_RIP(%rsp)
-	movq	$2,TF_ERR(%rsp)		/* ignored */
+	movq	%rax,TF_ERR(%rsp)	/* stash syscall # for SPL check */
 
 	movq	CPUVAR(CURPROC),%r14
 	movq	%rsp,P_MD_REGS(%r14)	# save pointer to frame
@@ -590,8 +611,17 @@ NENTRY(Xsyscall_untramp)
 
 	/* Could registers have been changed that require an iretq? */
 	testl	$MDP_IRET, P_MD_FLAGS(%r14)
-	jne	intr_fast_exit
+	jne	intr_user_exit_post_ast
+
+	/* Restore FPU/"extended CPU state" if it's not already in the CPU */
+	testl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
+	jz	.Lsyscall_restore_xstate
+
+	/* Restore FS.base if it's not already in the CPU */
+	testl	$CPUF_USERSEGS,CPUVAR(FLAGS)
+	jz	.Lsyscall_restore_fsbase
 
+.Lsyscall_restore_registers:
 	movq	TF_RDI(%rsp),%rdi
 	movq	TF_RSI(%rsp),%rsi
 	movq	TF_R8(%rsp),%r8
@@ -604,17 +634,6 @@ NENTRY(Xsyscall_untramp)
 	movq	TF_RBP(%rsp),%rbp
 	movq	TF_RBX(%rsp),%rbx
 
-	/* Restore FS.base if it's not already in the CPU */
-	btsl	$CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
-	jc	99f
-	movq	CPUVAR(CURPCB),%rdx
-	movq	PCB_FSBASE(%rdx),%rax
-	movq	%rax,%rdx
-	shrq	$32,%rdx
-	movl	$MSR_FSBASE,%ecx
-	wrmsr
-99:
-
 	/*
 	 * We need to finish reading from the trapframe, then switch
 	 * to the user page tables, swapgs, and return.  We need
@@ -642,11 +661,42 @@ KUENTRY(syscall_trampback)
 	sysretq
 
 	.text
+	.align	16,0xcc
+	/* in this case, need FS.base but not xstate, rarely happens */
+.Lsyscall_restore_fsbase:	/* CPU doesn't have curproc's FS.base */
+	orl	$CPUF_USERSEGS,CPUVAR(FLAGS)
+	movq	CPUVAR(CURPCB),%rdi
+	jmp	.Lsyscall_restore_fsbase_real
+
+	.align	16,0xcc
+.Lsyscall_restore_xstate:	/* CPU doesn't have curproc's xstate */
+	orl	$(CPUF_USERXSTATE|CPUF_USERSEGS),CPUVAR(FLAGS)
+	movq	CPUVAR(CURPCB),%rdi
+	movq	xsave_mask(%rip),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+#if PCB_SAVEFPU != 0
+	addq	$PCB_SAVEFPU,%rdi
+#endif
+	/* untouched state so can't fault */
+	CODEPATCH_START
+	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
+	CODEPATCH_END(CPTAG_XRSTOR)
+#if PCB_SAVEFPU != 0
+	subq	$PCB_SAVEFPU,%rdi
+#endif
+.Lsyscall_restore_fsbase_real:
+	movq	PCB_FSBASE(%rdi),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	movl	$MSR_FSBASE,%ecx
+	wrmsr
+	jmp	.Lsyscall_restore_registers
 
 #ifdef DIAGNOSTIC
 .Lsyscall_spl_not_lowered:
-	movabsq	$spl_lowered, %rdi
-	movl	TF_RAX(%rsp),%esi
+	leaq	spl_lowered(%rip), %rdi
+	movl	TF_ERR(%rsp),%esi	/* syscall # stashed above */
 	movl	TF_RDI(%rsp),%edx
 	movl	%ebx,%ecx
 	movl	CPUVAR(ILEVEL),%r8d
@@ -676,15 +726,54 @@ NENTRY(proc_trampoline)
 
 
 /*
- * Return via iretq, for real interrupts and signal returns
+ * Returning to userspace via iretq.  We do things in this order:
+ *  - check for ASTs
+ *  - restore FPU/"extended CPU state" if it's not already in the CPU
+ *  - DIAGNOSTIC: no more C calls after this, so check the SPL
+ *  - restore FS.base if it's not already in the CPU
+ *  - restore most registers
+ *  - update the iret frame from the trapframe
+ *  - finish reading from the trapframe
+ *  - switch to the trampoline stack	\
+ *  - jump to the .kutext segment	|-- Meltdown workaround
+ *  - switch to the user page tables	/
+ *  - swapgs
+ *  - iretq
  */
-NENTRY(intr_fast_exit)
+NENTRY(intr_user_exit)
 #ifdef DIAGNOSTIC
 	pushfq
 	popq	%rdx
 	testq	$PSL_I,%rdx
-	jnz	.Lintr_exit_not_blocked
+	jnz	.Lintr_user_exit_not_blocked
+#endif /* DIAGNOSTIC */
+
+	/* Check for ASTs */
+	CHECK_ASTPENDING(%r11)
+	je	intr_user_exit_post_ast
+	CLEAR_ASTPENDING(%r11)
+	sti
+	movq	%rsp,%rdi
+	call	_C_LABEL(ast)
+	cli
+	jmp	intr_user_exit
+
+intr_user_exit_post_ast:
+	/* Restore FPU/"extended CPU state" if it's not already in the CPU */
+	testl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
+	jz	.Lintr_restore_xstate
+
+#ifdef DIAGNOSTIC
+	/* no more C calls after this, so check the SPL */
+	cmpl	$0,CPUVAR(ILEVEL)
+	jne	.Luser_spl_not_lowered
 #endif /* DIAGNOSTIC */
+
+	/* Restore FS.base if it's not already in the CPU */
+	testl	$CPUF_USERSEGS,CPUVAR(FLAGS)
+	jz	.Lintr_restore_fsbase
+
+.Lintr_restore_registers:
 	movq	TF_RDI(%rsp),%rdi
 	movq	TF_RSI(%rsp),%rsi
 	movq	TF_R8(%rsp),%r8
@@ -697,30 +786,7 @@ NENTRY(intr_fast_exit)
 	movq	TF_RBP(%rsp),%rbp
 	movq	TF_RBX(%rsp),%rbx
 
-	testq	$SEL_RPL,TF_CS(%rsp)
-	je	intr_exit_recurse		/* returning back to kernel? */
-
-	/* returning to userspace.  XXX fix up iret frame here */
-
-	/* restore FS.base if it's not already in the CPU */
-	btsl	$CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
-	jc	99f
-	movq	CPUVAR(CURPCB),%rdx		/* for below */
-	movq	PCB_FSBASE(%rdx),%rax
-	movq	%rax,%rdx
-	shrq	$32,%rdx
-	movl	$MSR_FSBASE,%ecx
-	wrmsr
-99:
 	/*
-	 * Returning to userspace.  We need to go things in this order:
-	 *  - update the iret frame from the trapframe
-	 *  - finish reading from the trapframe
-	 *  - switch to the trampoline stack
-	 *  - jump to the .kutext segment
-	 *  - switch to the user page tables
-	 *  - swapgs
-	 *  - iretq
 	 * To get the final value for the register that was used
 	 * for the mov to %cr3, we need access to somewhere accessible
 	 * on the user page tables, so we save it in CPUVAR(SCRATCH)
@@ -758,7 +824,101 @@ KUENTRY(iretq_tramp)
 _C_LABEL(doreti_iret):
 	iretq
 
-NENTRY(intr_exit_recurse)
+	.text
+	.align	16,0xcc
+.Lintr_restore_xstate:		/* CPU doesn't have curproc's xstate */
+	orl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
+	movq	CPUVAR(CURPCB),%rdi
+#if PCB_SAVEFPU != 0
+	addq	$PCB_SAVEFPU,%rdi
+#endif
+	movq	xsave_mask(%rip),%rsi
+	call	xrstor_user
+	testl	%eax,%eax
+	jnz	.Lintr_xrstor_faulted
+.Lintr_restore_fsbase:		/* CPU doesn't have curproc's FS.base */
+	orl	$CPUF_USERSEGS,CPUVAR(FLAGS)
+	movq	CPUVAR(CURPCB),%rdx
+	movq	PCB_FSBASE(%rdx),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	movl	$MSR_FSBASE,%ecx
+	wrmsr
+	jmp	.Lintr_restore_registers
+
+.Lintr_xrstor_faulted:
+	/*
+	 * xrstor faulted; we need to reset the FPU state and call trap()
+	 * to post a signal, which requires interrupts be enabled.
+	 */
+	sti
+	movq	proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+	addq	$PCB_SAVEFPU,%rdi
+#endif
+	CODEPATCH_START
+	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
+	CODEPATCH_END(CPTAG_XRSTOR)
+	movq	$T_PROTFLT,TF_TRAPNO(%rsp)
+	jmp	recall_trap
+
+#ifdef DIAGNOSTIC
+.Lintr_user_exit_not_blocked:
+	movl	warn_once(%rip),%edi
+	testl	%edi,%edi
+	jnz	1f
+	incl	%edi
+	movl	%edi,warn_once(%rip)
+	leaq	.Lnot_blocked(%rip),%rdi
+	call	_C_LABEL(printf)
+#ifdef DDB
+	int	$3
+#endif /* DDB */
+1:	cli
+	jmp	intr_user_exit
+
+.Luser_spl_not_lowered:
+	sti
+	leaq	intr_spl_lowered(%rip),%rdi
+	movl	CPUVAR(ILEVEL),%esi
+	xorl	%edx,%edx		/* always SPL zero for userspace */
+	xorl	%eax,%eax
+	call	_C_LABEL(printf)
+#ifdef DDB
+	int	$3
+#endif /* DDB */
+	movl	$0,CPUVAR(ILEVEL)
+	cli
+	jmp	intr_user_exit
+
+	.section .rodata
+intr_spl_lowered:
+	.asciz	"WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n"
+	.text
+#endif /* DIAGNOSTIC */
+
+
+/*
+ * Return to supervisor mode from trap or interrupt
+ */
+NENTRY(intr_fast_exit)
+#ifdef DIAGNOSTIC
+	pushfq
+	popq	%rdx
+	testq	$PSL_I,%rdx
+	jnz	.Lintr_exit_not_blocked
+#endif /* DIAGNOSTIC */
+	movq	TF_RDI(%rsp),%rdi
+	movq	TF_RSI(%rsp),%rsi
+	movq	TF_R8(%rsp),%r8
+	movq	TF_R9(%rsp),%r9
+	movq	TF_R10(%rsp),%r10
+	movq	TF_R12(%rsp),%r12
+	movq	TF_R13(%rsp),%r13
+	movq	TF_R14(%rsp),%r14
+	movq	TF_R15(%rsp),%r15
+	movq	TF_RBP(%rsp),%rbp
+	movq	TF_RBX(%rsp),%rbx
 	movq	TF_RDX(%rsp),%rdx
 	movq	TF_RCX(%rsp),%rcx
 	movq	TF_R11(%rsp),%r11
@@ -813,7 +973,6 @@ NENTRY(intr_exit_recurse)
 
 #ifdef DIAGNOSTIC
 .Lintr_exit_not_blocked:
-	xchgw	%bx, %bx
 	movl	warn_once(%rip),%edi
 	testl	%edi,%edi
 	jnz	1f
@@ -837,18 +996,71 @@ warn_once:
 	.text
 #endif
 
+/*
+ * FPU/"extended CPU state" handling
+ * 	int xrstor_user(sfp, mask)
+ *		load given state, returns 0/1 if okay/it trapped
+ *	void fpusave(sfp) 
+ *		save current state, but retain it in the FPU
+ *	void fpusavereset(sfp)
+ *		save current state and reset FPU to initial/kernel state
+ */
+
 ENTRY(xrstor_user)
 	movq	%rsi, %rdx
 	movl	%esi, %eax
 	shrq	$32, %rdx
 	.globl	xrstor_fault
 xrstor_fault:
-	xrstor	(%rdi)
+	CODEPATCH_START
+	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
+	CODEPATCH_END(CPTAG_XRSTOR)
 	xorl	%eax, %eax
 	ret
-ENTRY(xrstor_resume)
+NENTRY(xrstor_resume)
 	movl	$1, %eax
 	ret
+END(xrstor_user)
+
+ENTRY(fpusave)
+	movq	xsave_mask(%rip),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	CODEPATCH_START
+	.byte 0x48; fxsave	(%rdi)		/* really fxsave64 */
+	CODEPATCH_END(CPTAG_XSAVE)
+	ret
+END(fpusave)
+
+ENTRY(fpusavereset)
+	movq	xsave_mask(%rip),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	CODEPATCH_START
+	.byte 0x48; fxsave	(%rdi)		/* really fxsave64 */
+	CODEPATCH_END(CPTAG_XSAVE)
+	movq	proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+	addq	$PCB_SAVEFPU,%rdi
+#endif
+	CODEPATCH_START
+	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
+	CODEPATCH_END(CPTAG_XRSTOR)
+	ret
+END(fpusavereset)
+
+	.section .rodata
+	.globl	_C_LABEL(_xrstor)
+_C_LABEL(_xrstor):
+	.byte 0x48; xrstor	(%rdi)		/* really xrstor64 */
+
+	.globl	_C_LABEL(_xsave)
+_C_LABEL(_xsave):
+	.byte 0x48; xsave	(%rdi)		/* really xsave64 */
+
+	.globl	_C_LABEL(_xsaveopt)
+_C_LABEL(_xsaveopt):
+	.byte 0x48; xsaveopt	(%rdi)		/* really xsaveopt64 */
 
 ENTRY(pagezero)
 	movq    $-PAGE_SIZE,%rdx
Index: sys/arch/amd64/amd64/locore0.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore0.S,v
retrieving revision 1.2.2.1
diff -u -p -r1.2.2.1 locore0.S
--- sys/arch/amd64/amd64/locore0.S	26 Feb 2018 12:29:48 -0000	1.2.2.1
+++ sys/arch/amd64/amd64/locore0.S	21 Jun 2018 11:54:01 -0000
@@ -601,7 +601,7 @@ write_efer:	
 	 * 4. Enable paging and the rest of it.
 	 */
 	movl	%cr0,%eax
-	orl	$(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%eax
+	orl	$CR0_DEFAULT,%eax
 	movl	%eax,%cr0
 	jmp	compat
 compat:
Index: sys/arch/amd64/amd64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.231.2.1
diff -u -p -r1.231.2.1 machdep.c
--- sys/arch/amd64/amd64/machdep.c	26 Feb 2018 12:29:48 -0000	1.231.2.1
+++ sys/arch/amd64/amd64/machdep.c	21 Jun 2018 11:54:01 -0000
@@ -395,7 +395,6 @@ x86_64_proc0_tss_ldt_init(void)
 	struct pcb *pcb;
 
 	cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb;
-	pcb->pcb_cr0 = rcr0();
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_kstack = (u_int64_t)proc0.p_addr + USPACE - 16;
 	proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1;
@@ -404,20 +403,6 @@ x86_64_proc0_tss_ldt_init(void)
 	lldt(0);
 }
 
-/*       
- * Set up TSS for a new PCB.
- */         
-         
-#ifdef MULTIPROCESSOR
-void    
-x86_64_init_pcb_tss_ldt(struct cpu_info *ci)   
-{
-	struct pcb *pcb = ci->ci_idle_pcb;
- 
-	pcb->pcb_cr0 = rcr0();
-}
-#endif	/* MULTIPROCESSOR */
-
 bios_diskinfo_t *
 bios_getdiskinfo(dev_t dev)
 {
@@ -579,6 +564,7 @@ sendsig(sig_t catcher, int sig, int mask
 	struct trapframe *tf = p->p_md.md_regs;
 	struct sigacts *psp = p->p_p->ps_sigacts;
 	struct sigcontext ksc;
+	struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
 	siginfo_t ksi;
 	register_t sp, scp, sip;
 	u_long sss;
@@ -597,17 +583,19 @@ sendsig(sig_t catcher, int sig, int mask
 	sp &= ~15ULL;	/* just in case */
 	sss = (sizeof(ksc) + 15) & ~15;
 
-	if (p->p_md.md_flags & MDP_USEDFPU) {
-		fpusave_proc(p, 1);
-		sp -= fpu_save_len;
-		ksc.sc_fpstate = (struct fxsave64 *)sp;
-		if (copyout(&p->p_addr->u_pcb.pcb_savefpu.fp_fxsave,
-		    (void *)sp, fpu_save_len))
-			sigexit(p, SIGILL);
+	/* Save FPU state to PCB if necessary, then copy it out */
+	if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+		curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+		fpusavereset(&p->p_addr->u_pcb.pcb_savefpu);
+	}
+	sp -= fpu_save_len;
+	ksc.sc_fpstate = (struct fxsave64 *)sp;
+	if (copyout(sfp, (void *)sp, fpu_save_len))
+		sigexit(p, SIGILL);
 
-		/* Signal handlers get a completely clean FP state */
-		p->p_md.md_flags &= ~MDP_USEDFPU;
-	}
+	/* Now reset the FPU state in PCB */
+	memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+	    &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
 
 	sip = 0;
 	if (psp->ps_siginfo & sigmask(sig)) {
@@ -637,6 +625,9 @@ sendsig(sig_t catcher, int sig, int mask
 	tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
 	tf->tf_rsp = scp;
 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
+
+	/* The reset state _is_ the userspace state for this thread now */
+	curcpu()->ci_flags |= CPUF_USERXSTATE;
 }
 
 /*
@@ -681,16 +672,23 @@ sys_sigreturn(struct proc *p, void *v, r
 	    !USERMODE(ksc.sc_cs, ksc.sc_eflags))
 		return (EINVAL);
 
-	if (p->p_md.md_flags & MDP_USEDFPU)
-		fpusave_proc(p, 0);
+	/* Current state is obsolete; toss it and force a reload */
+	if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+		curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+		fpureset();
+	}
 
-	if (ksc.sc_fpstate) {
+	/* Copy in the FPU state to restore */
+	if (__predict_true(ksc.sc_fpstate != NULL)) {
 		struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;
 
 		if ((error = copyin(ksc.sc_fpstate, fx, fpu_save_len)))
 			return (error);
 		fx->fx_mxcsr &= fpu_mxcsr_mask;
-		p->p_md.md_flags |= MDP_USEDFPU;
+	} else {
+		/* shouldn't happen, but handle it */
+		memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+		    &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
 	}
 
 	ksc.sc_trapno = tf->tf_trapno;
@@ -707,6 +705,7 @@ sys_sigreturn(struct proc *p, void *v, r
 	 * when a signal was being delivered, the process will be
 	 * completely restored, including the userland %rcx and %r11
 	 * registers which the 'sysretq' instruction cannot restore.
+	 * Also need to make sure we can handle faulting on xrstor.
 	 */
 	p->p_md.md_flags |= MDP_IRET;
 
@@ -1092,10 +1091,19 @@ setregs(struct proc *p, struct exec_pack
 {
 	struct trapframe *tf;
 
-	/* If we were using the FPU, forget about it. */
-	if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-		fpusave_proc(p, 0);
-	p->p_md.md_flags &= ~MDP_USEDFPU;
+	/* Reset FPU state in PCB */
+	memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+	    &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
+
+	if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+		/* state in CPU is obsolete; reset it */
+		fpureset();
+	} else {
+		/* the reset state _is_ the userspace state now */
+		curcpu()->ci_flags |= CPUF_USERXSTATE;
+	}
+
+	/* To reset all registers we have to return via iretq */
 	p->p_md.md_flags |= MDP_IRET;
 
 	reset_segs();
Index: sys/arch/amd64/amd64/mptramp.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/mptramp.S,v
retrieving revision 1.15
diff -u -p -r1.15 mptramp.S
--- sys/arch/amd64/amd64/mptramp.S	29 Jun 2017 08:14:36 -0000	1.15
+++ sys/arch/amd64/amd64/mptramp.S	21 Jun 2018 11:54:01 -0000
@@ -120,7 +120,7 @@ _C_LABEL(cpu_spinup_trampoline):
 	movw    %ax, %ss
 	addr32 lgdtl (.Lmptramp_gdt32_desc)   # load flat descriptor table
 	movl    %cr0, %eax       # get cr0
-	orl     $0x1, %eax      # enable protected mode
+	orl	$CR0_PE, %eax	# enable protected mode
 	movl    %eax, %cr0      # doit
 	ljmpl	$0x8, $.Lmp_startup
 
@@ -179,7 +179,7 @@ _TRMP_LABEL(.Lmp_startup)
 	movl	$.Lmptramp_jmp64,%eax
 
 	movl    %cr0,%ecx               # get control word
-	orl     $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%ecx
+	orl	$CR0_DEFAULT,%ecx
 	movl	%ecx, %cr0
 
 	ljmp	*(%eax)
@@ -230,7 +230,7 @@ _C_LABEL(cpu_spinup_trampoline_end):	#en
 	/* Switch address space. */
 	movq	PCB_CR3(%rsi),%rax
 	movq	%rax,%cr3
-	movl    PCB_CR0(%rsi),%eax
+	movl	$CR0_DEFAULT,%eax
 	movq    %rax,%cr0
 	call	_C_LABEL(cpu_hatch)
 	/* NOTREACHED */
Index: sys/arch/amd64/amd64/process_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
retrieving revision 1.14
diff -u -p -r1.14 process_machdep.c
--- sys/arch/amd64/amd64/process_machdep.c	28 Jun 2015 18:54:54 -0000	1.14
+++ sys/arch/amd64/amd64/process_machdep.c	21 Jun 2018 11:54:01 -0000
@@ -127,19 +127,6 @@ process_read_fpregs(struct proc *p, stru
 {
 	struct fxsave64 *frame = process_fpframe(p);
 
-	if (p->p_md.md_flags & MDP_USEDFPU) {
-		fpusave_proc(p, 1);
-	} else {
-		/* Fake a FNINIT. */
-		memset(frame, 0, sizeof(*regs));
-		frame->fx_fcw = __INITIAL_NPXCW__;
-		frame->fx_fsw = 0x0000;
-		frame->fx_ftw = 0x00;
-		frame->fx_mxcsr = __INITIAL_MXCSR__;
-		frame->fx_mxcsr_mask = fpu_mxcsr_mask;
-		p->p_md.md_flags |= MDP_USEDFPU;
-	}
-
 	memcpy(&regs->fxstate, frame, sizeof(*regs));
 	return (0);
 }
@@ -189,14 +176,11 @@ process_write_fpregs(struct proc *p, str
 {
 	struct fxsave64 *frame = process_fpframe(p);
 
-	if (p->p_md.md_flags & MDP_USEDFPU) {
-		fpusave_proc(p, 0);
-	} else {
-		p->p_md.md_flags |= MDP_USEDFPU;
-	}
-
 	memcpy(frame, &regs->fxstate, sizeof(*regs));
 	frame->fx_mxcsr &= fpu_mxcsr_mask;
+
+	/* force target to return via iretq so bogus xstate can be handled */
+	p->p_md.md_flags |= MDP_IRET;
 	return (0);
 }
 
Index: sys/arch/amd64/amd64/spl.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/spl.S,v
retrieving revision 1.11.4.1
diff -u -p -r1.11.4.1 spl.S
--- sys/arch/amd64/amd64/spl.S	26 Feb 2018 12:29:48 -0000	1.11.4.1
+++ sys/arch/amd64/amd64/spl.S	21 Jun 2018 11:54:01 -0000
@@ -158,18 +158,6 @@ KIDTVEC(doreti)
 	jmp	*IS_RESUME(%rax)
 2:	/* Check for ASTs on exit to user mode. */
 	movl	%ebx,CPUVAR(ILEVEL)
-5:	CHECK_ASTPENDING(%r11)
-	je	3f
-	testb   $SEL_RPL,TF_CS(%rsp)
-	jz	3f
-4:	CLEAR_ASTPENDING(%r11)
-	sti
-	movq	%rsp, %rdi
-	call	_C_LABEL(ast)
-	cli
-	jmp	5b
-3: 	
-#ifdef DIAGNOSTIC
-	movl	$254,%esi
-#endif /* DIAGNOSTIC */
+	testb	$SEL_RPL,TF_CS(%rsp)
+	jnz	intr_user_exit
 	INTRFASTEXIT
Index: sys/arch/amd64/amd64/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.61.2.1
diff -u -p -r1.61.2.1 trap.c
--- sys/arch/amd64/amd64/trap.c	26 Feb 2018 12:29:48 -0000	1.61.2.1
+++ sys/arch/amd64/amd64/trap.c	21 Jun 2018 11:54:01 -0000
@@ -97,7 +97,7 @@ void trap(struct trapframe *);
 void ast(struct trapframe *);
 void syscall(struct trapframe *);
 
-const char *trap_type[] = {
+const char * const trap_type[] = {
 	"privileged instruction fault",		/*  0 T_PRIVINFLT */
 	"breakpoint trap",			/*  1 T_BPTFLT */
 	"arithmetic trap",			/*  2 T_ARITHTRAP */
@@ -119,17 +119,18 @@ const char *trap_type[] = {
 	"machine check",			/* 18 T_MCA */
 	"SSE FP exception",			/* 19 T_XMM */
 };
-int	trap_types = nitems(trap_type);
+const int	trap_types = nitems(trap_type);
 
 #ifdef DEBUG
 int	trapdebug = 0;
 #endif
 
-#define	IDTVEC(name)	__CONCAT(X, name)
+static inline void frame_dump(struct trapframe *_tf, struct proc *_p,
+    const char *_sig, uint64_t _cr2);
+static inline void verify_smap(const char *_func);
+static inline void debug_trap(struct trapframe *_frame, struct proc *_p,
+    long _type);
 
-#ifdef TRAP_SIGDEBUG
-static void frame_dump(struct trapframe *);
-#endif
 
 /*
  * trap(frame):
@@ -144,38 +145,17 @@ trap(struct trapframe *frame)
 	struct proc *p = curproc;
 	int type = (int)frame->tf_trapno;
 	struct pcb *pcb;
-	extern char doreti_iret[], resume_iret[];
-	extern char xrstor_fault[], xrstor_resume[];
 	caddr_t onfault;
 	int error;
 	uint64_t cr2;
 	union sigval sv;
 
+	verify_smap(__func__);
 	uvmexp.traps++;
+	debug_trap(frame, p, type);
 
 	pcb = (p != NULL && p->p_addr != NULL) ? &p->p_addr->u_pcb : NULL;
 
-#ifdef DEBUG
-	if (trapdebug) {
-		printf("trap %d code %llx rip %llx cs %llx rflags %llx "
-		       "cr2 %llx cpl %x\n",
-		    type, frame->tf_err, frame->tf_rip, frame->tf_cs,
-		    frame->tf_rflags, rcr2(), curcpu()->ci_ilevel);
-		printf("curproc %p\n", (void *)p);
-		if (p != NULL)
-			printf("pid %d\n", p->p_p->ps_pid);
-	}
-#endif
-#ifdef DIAGNOSTIC
-	if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
-		u_long rf = read_rflags();
-		if (rf & PSL_AC) {
-			write_rflags(rf & ~PSL_AC);
-			panic("%s: AC set on entry", "trap");
-		}
-	}
-#endif
-
 	if (!KERNELMODE(frame->tf_cs, frame->tf_rflags)) {
 		type |= T_USER;
 		p->p_md.md_regs = frame;
@@ -205,27 +185,6 @@ trap(struct trapframe *frame)
 		/*NOTREACHED*/
 
 	case T_PROTFLT:
-		/*
-		 * Check for xrstor faulting because of invalid xstate
-		 * We do this by looking at the address of the
-		 * instruction that faulted.
-		 */
-		if (frame->tf_rip == (u_int64_t)xrstor_fault && p != NULL) {
-			frame->tf_rip = (u_int64_t)xrstor_resume;
-			return;
-		}
-
-		/*
-		 * Check for failure during return to user mode.
-		 * We do this by looking at the address of the
-		 * instruction that faulted.
-		 */
-		if (frame->tf_rip == (u_int64_t)doreti_iret) {
-			frame->tf_rip = (u_int64_t)resume_iret;
-			return;
-		}
-		/* FALLTHROUGH */
-
 	case T_SEGNPFLT:
 	case T_ALIGNFLT:
 	case T_TSSFLT:
@@ -243,12 +202,7 @@ copyfault:
 	case T_TSSFLT|T_USER:
 	case T_SEGNPFLT|T_USER:
 	case T_STKFLT|T_USER:
-#ifdef TRAP_SIGDEBUG
-		printf("pid %d (%s): %s at rip %llx addr %llx\n",
-		    p->p_p->ps_pid, p->p_p->ps_comm, "BUS",
-		    frame->tf_rip, rcr2());
-		frame_dump(frame);
-#endif
+		frame_dump(frame, p, "BUS", 0);
 		sv.sival_ptr = (void *)frame->tf_rip;
 		KERNEL_LOCK();
 		trapsignal(p, SIGBUS, type & ~T_USER, BUS_OBJERR, sv);
@@ -267,30 +221,11 @@ copyfault:
 		trapsignal(p, SIGILL, type & ~T_USER, ILL_PRVOPC, sv);
 		KERNEL_UNLOCK();
 		goto out;
-	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
-#ifdef TRAP_SIGDEBUG
-		printf("pid %d (%s): %s at rip %llx addr %llx\n",
-		    p->p_p->ps_pid, p->p_p->ps_comm, "ILL",
-		    frame->tf_rip, rcr2());
-		frame_dump(frame);
-#endif
-		sv.sival_ptr = (void *)frame->tf_rip;
-		KERNEL_LOCK();
-		trapsignal(p, SIGILL, type & ~T_USER, ILL_COPROC, sv);
-		KERNEL_UNLOCK();
-		goto out;
+	case T_FPOPFLT|T_USER:		/* impossible without 32bit compat */
 	case T_BOUND|T_USER:
-		sv.sival_ptr = (void *)frame->tf_rip;
-		KERNEL_LOCK();
-		trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
-		KERNEL_UNLOCK();
-		goto out;
 	case T_OFLOW|T_USER:
-		sv.sival_ptr = (void *)frame->tf_rip;
-		KERNEL_LOCK();
-		trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
-		KERNEL_UNLOCK();
-		goto out;
+	case T_DNA|T_USER:
+		panic("impossible trap");
 	case T_DIVIDE|T_USER:
 		sv.sival_ptr = (void *)frame->tf_rip;
 		KERNEL_LOCK();
@@ -401,18 +336,13 @@ faultcommon:
 			    p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
 			signal = SIGKILL;
 		} else {
-#ifdef TRAP_SIGDEBUG
-			printf("pid %d (%s): %s at rip %llx addr %llx\n",
-			    p->p_p->ps_pid, p->p_p->ps_comm, "SEGV",
-			    frame->tf_rip, rcr2());
-			frame_dump(frame);
-#endif
-		}
-		if (error == EACCES)
-			sicode = SEGV_ACCERR;
-		if (error == EIO) {
-			signal = SIGBUS;
-			sicode = BUS_OBJERR;
+			frame_dump(frame, p, "SEGV", cr2);
+			if (error == EACCES)
+				sicode = SEGV_ACCERR;
+			else if (error == EIO) {
+				signal = SIGBUS;
+				sicode = BUS_OBJERR;
+			}
 		}
 		sv.sival_ptr = (void *)fa;
 		trapsignal(p, signal, T_PAGEFLT, sicode, sv);
@@ -455,10 +385,12 @@ out:
 	userret(p);
 }
 
-#ifdef TRAP_SIGDEBUG
-static void
-frame_dump(struct trapframe *tf)
+static inline void
+frame_dump(struct trapframe *tf, struct proc *p, const char *sig, uint64_t cr2)
 {
+#ifdef TRAP_SIGDEBUG
+	printf("pid %d (%s): %s at rip %llx addr %llx\n",
+	    p->p_p->ps_pid, p->p_p->ps_comm, sig, tf->tf_rip, cr2);
 	printf("rip %p  cs 0x%x  rfl %p  rsp %p  ss 0x%x\n",
 	    (void *)tf->tf_rip, (unsigned)tf->tf_cs & 0xffff,
 	    (void *)tf->tf_rflags,
@@ -475,8 +407,38 @@ frame_dump(struct trapframe *tf)
 	    (void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15);
 	printf("rbp %p  rbx %p  rax %p\n",
 	    (void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax);
+#endif
 }
+
+static inline void
+verify_smap(const char *func)
+{
+#ifdef DIAGNOSTIC
+	if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
+		u_long rf = read_rflags();
+		if (rf & PSL_AC) {
+			write_rflags(rf & ~PSL_AC);
+			panic("%s: AC set on entry", func);
+		}
+	}
 #endif
+}
+
+static inline void
+debug_trap(struct trapframe *frame, struct proc *p, long type)
+{
+#ifdef DEBUG
+	if (trapdebug) {
+		printf("trap %ld code %llx rip %llx cs %llx rflags %llx "
+		       "cr2 %llx cpl %x\n",
+		    type, frame->tf_err, frame->tf_rip, frame->tf_cs,
+		    frame->tf_rflags, rcr2(), curcpu()->ci_ilevel);
+		printf("curproc %p\n", (void *)p);
+		if (p != NULL)
+			printf("pid %d\n", p->p_p->ps_pid);
+	}
+#endif
+}
 
 
 /*
@@ -514,16 +476,7 @@ syscall(struct trapframe *frame)
 	size_t argsize, argoff;
 	register_t code, args[9], rval[2], *argp;
 
-#ifdef DIAGNOSTIC
-	if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
-		u_long rf = read_rflags();
-		if (rf & PSL_AC) {
-			write_rflags(rf & ~PSL_AC);
-			panic("%s: AC set on entry", "syscall");
-		}
-	}
-#endif
-
+	verify_smap(__func__);
 	uvmexp.syscalls++;
 	p = curproc;
 
Index: sys/arch/amd64/amd64/vector.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v
retrieving revision 1.51.2.2
diff -u -p -r1.51.2.2 vector.S
--- sys/arch/amd64/amd64/vector.S	28 Feb 2018 17:01:34 -0000	1.51.2.2
+++ sys/arch/amd64/amd64/vector.S	21 Jun 2018 11:54:01 -0000
@@ -179,17 +179,7 @@ IDTVEC(trap05)
 IDTVEC(trap06)
 	ZTRAP(T_PRIVINFLT)
 IDTVEC(trap07)
-	pushq	$0			# dummy error code
-	pushq	$T_DNA
-	INTRENTRY(trap07)
-	sti
-	cld
-	SMAP_CLAC
-	movq	CPUVAR(SELF),%rdi
-	movq	%rsp, %rsi
-	call	_C_LABEL(fpudna)
-	cli
-	INTRFASTEXIT
+	ZTRAP(T_DNA)		# impossible: we don't do lazy FPU
 IDTVEC(trap08)
 	pushq	$T_DOUBLEFLT
 	jmp	calltrap_specstk
@@ -202,59 +192,47 @@ IDTVEC(trap0b)
 IDTVEC(trap0c)
 	TRAP(T_STKFLT)
 
-	/*
-	 * If iretq faults, we'll get a trap at doreti_iret with CPL==0 but
-	 * the user's GS.base, which INTRENTRY wouldn't handle correctly
-	 * (it would skip the swapgs), so locally expand both it and
-	 * INTR_SAVE_GPRS, but add an extra test comparing %rip to doreti_iret
-	 * so that we can do the necessary swapgs in that case.
-	 */
+/*
+ * The #GP (general protection fault) handler has a couple weird cases
+ * to handle:
+ *  - trapping in iretq to userspace and
+ *  - trapping in xrstor in the kernel.
+ * We detect both of these by examining the %rip in the iretq_frame.
+ * Handling them is done by updating %rip in the iretq_frame to point
+ * to a stub handler of some sort and then iretq'ing to it.  For the
+ * iretq fault we resume in a stub which acts like we got a fresh #GP.
+ * For the xrstor fault we resume to a stub which returns an error to
+ * the routine that requested the xrstor.
+ */
 IDTVEC(trap0d)
+	pushq	%rdx
 	pushq	%rcx
-	leaq	_C_LABEL(doreti_iret)(%rip),%rcx
-	cmpq	%rcx,16(%rsp)		/* over %rcx and err to %rip */
+	movq	24(%rsp),%rdx		/* over %r[cd]x and err to %rip */
+	leaq	doreti_iret(%rip),%rcx
+	cmpq	%rcx,%rdx
+	je	.Lhandle_doreti
+	leaq	xrstor_fault(%rip),%rcx
+	cmpq	%rcx,%rdx
+	je	.Lhandle_xrstor
 	popq	%rcx
-	je	1f
-	testq	$SEL_RPL,16(%rsp)	/* over err and %rip to %cs */
-	je	INTRENTRY_LABEL(trap0d)
-1:	swapgs
-	movq	%rax,CPUVAR(SCRATCH)
-	movq	CPUVAR(KERN_CR3),%rax
-	testq	%rax,%rax
-	jz	98f
-	movq	%rax,%cr3
-	jmp	98f
-	.text
-	.globl	INTRENTRY_LABEL(trap0d)
-INTRENTRY_LABEL(trap0d):	/* from kernel */
-	pushq	$T_PROTFLT
-	subq	$152,%rsp
-	movq	%rcx,TF_RCX(%rsp)
-	jmp	99f
-98:	/* from userspace */
-	movq	CPUVAR(KERN_RSP),%rax
-	xchgq	%rax,%rsp
-	movq	%rcx,TF_RCX(%rsp)
-	/* set trapno in the trap frame */
-	movq	$T_PROTFLT,TF_TRAPNO(%rsp)
-	/* copy err and iretq frame to the trap frame */
-	movq	0(%rax),%rcx
-	movq	%rcx,TF_ERR(%rsp)
-	add	$8,%rax
-	movq	IRETQ_RIP(%rax),%rcx
-	movq	%rcx,TF_RIP(%rsp)
-	movq	IRETQ_CS(%rax),%rcx
-	movq	%rcx,TF_CS(%rsp)
-	movq	IRETQ_RFLAGS(%rax),%rcx
-	movq	%rcx,TF_RFLAGS(%rsp)
-	movq	IRETQ_RSP(%rax),%rcx
-	movq	%rcx,TF_RSP(%rsp)
-	movq	IRETQ_SS(%rax),%rcx
-	movq	%rcx,TF_SS(%rsp)
-	movq	CPUVAR(SCRATCH),%rax
-99:	INTR_SAVE_MOST_GPRS_NO_ADJ
-	sti
-	jmp	calltrap
+	popq	%rdx
+	TRAP(T_PROTFLT)
+
+.Lhandle_xrstor:
+	/* xrstor faulted; just resume in xrstor_resume */
+	leaq	xrstor_resume(%rip),%rcx
+	jmp	1f
+
+.Lhandle_doreti:
+	/* iretq faulted; resume in a stub that acts like we got a #GP */
+	leaq	.Lhandle_doreti_resume(%rip),%rcx
+1:	movq	%rcx,24(%rsp)		/* over %r[cd]x and err to %rip */
+	popq	%rcx
+	popq	%rdx
+	addq	$8,%rsp			/* pop the err code */
+	jmp	doreti_iret
+.Lhandle_doreti_resume:
+	ZTRAP(T_PROTFLT)
 
 IDTVEC(trap0e)
 	TRAP(T_PAGEFLT)
@@ -305,55 +283,12 @@ Xexceptions:
 	.quad	_C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f)
 
 /*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %rip to point to this label.  At that point, we'll be running with
- * the kernel GS.base, but the trap frame will be from CPL==3, so we can't
- * go through INTRENTRY as it would do the swapgs that we don't want/need.
- * So, locally expand INTRENTRY but without the swapgs: manually
- * clean up the stack and resume as if we were handling a general
- * protection fault.  This will cause the process to get a SIGBUS.
- */
-NENTRY(resume_iret)
-	movq	%rax,CPUVAR(SCRATCH)
-	movq	CPUVAR(KERN_CR3),%rax
-	testq	%rax,%rax
-	jz	INTRENTRY_LABEL(iret)
-	movq	%rax,%cr3
-	jmp	INTRENTRY_LABEL(iret)
-	.text
-	.globl	INTRENTRY_LABEL(iret)
-INTRENTRY_LABEL(iret):	/* from kernel */
-	movq	CPUVAR(KERN_RSP),%rax
-	xchgq	%rax,%rsp
-	movq	%rcx,TF_RCX(%rsp)
-	/* set trapno+err in the trap frame */
-	movq	$T_PROTFLT,TF_TRAPNO(%rsp)
-	movq	$0,TF_ERR(%rsp)
-	/* copy iretq frame to the trap frame */
-	movq	IRETQ_RIP(%rax),%rcx
-	movq	%rcx,TF_RIP(%rsp)
-	movq	IRETQ_CS(%rax),%rcx
-	movq	%rcx,TF_CS(%rsp)
-	movq	IRETQ_RFLAGS(%rax),%rcx
-	movq	%rcx,TF_RFLAGS(%rsp)
-	movq	IRETQ_RSP(%rax),%rcx
-	movq	%rcx,TF_RSP(%rsp)
-	movq	IRETQ_SS(%rax),%rcx
-	movq	%rcx,TF_SS(%rsp)
-	movq	CPUVAR(SCRATCH),%rax
-	INTR_SAVE_MOST_GPRS_NO_ADJ
-	sti
-	jmp	calltrap
-
-
-/*
  * All traps go through here. Call the generic trap handler, and
  * check for ASTs afterwards.
  */
 KUENTRY(alltraps)
 	INTRENTRY(alltraps)
 	sti
-calltrap:
 	cld
 	SMAP_CLAC
 #ifdef DIAGNOSTIC
@@ -376,19 +311,14 @@ calltrap:
 	jz	2f
 .Lreal_trap:
 #endif /* !defined(GPROF) && defined(DDBPROF) */
+	.globl	recall_trap
+recall_trap:
 	movq	%rsp, %rdi
 	call	_C_LABEL(trap)
 2:	/* Check for ASTs on exit to user mode. */
 	cli
-	CHECK_ASTPENDING(%r11)
-	je	1f
 	testb	$SEL_RPL,TF_CS(%rsp)
-	jz	1f
-5:	CLEAR_ASTPENDING(%r11)
-	sti
-	movq	%rsp, %rdi
-	call	_C_LABEL(ast)
-	jmp	2b
+	jnz	intr_user_exit
 #ifndef DIAGNOSTIC
 1:	INTRFASTEXIT
 #else /* DIAGNOSTIC */
@@ -396,7 +326,7 @@ calltrap:
 	jne	3f
 	INTRFASTEXIT
 3:	sti
-	movabsq	$spl_lowered,%rdi
+	leaq	spl_lowered(%rip),%rdi
 	movl	CPUVAR(ILEVEL),%esi
 	movl	%ebx,%edx
 	xorq	%rax,%rax
@@ -601,7 +531,6 @@ KIDTVEC(resume_xen_upcall)
 2:
 	movq	$(1 << LIR_XEN),%rax
 	orq	%rax,CPUVAR(IPENDING)
-3:
 	INTRFASTEXIT
 #endif /* NXEN > 0 */
 
@@ -636,7 +565,6 @@ KIDTVEC(resume_hyperv_upcall)
 2:
 	movq	$(1 << LIR_HYPERV),%rax
 	orq	%rax,CPUVAR(IPENDING)
-3:
 	INTRFASTEXIT
 #endif /* NHYPERV > 0 */
 #endif /* NLAPIC > 0 */
@@ -682,7 +610,7 @@ IDTVEC(intr_##name##num)						;\
 	SMAP_CLAC							;\
 	incl	CPUVAR(IDEPTH)						;\
 	movq	IS_HANDLERS(%r14),%rbx					;\
-6:									\
+6:	/* loop, walking chain of handlers */				\
 	movl	IH_LEVEL(%rbx),%r12d					;\
 	cmpl	%r13d,%r12d						;\
 	jle	7f							;\
@@ -693,6 +621,8 @@ IDTVEC(intr_##name##num)						;\
 	orl	%eax,%eax		/* should it be counted? */	;\
 	jz	4f			/* no, skip it */		;\
 	incq	IH_COUNT(%rbx)		/* count the intrs */		;\
+	cmpl	$2,%eax			/* can't know if it was ours */ ;\
+	je	4f			/* keep trying */		;\
 	cmpl	$0,_C_LABEL(intr_shared_edge)				;\
 	jne	4f			/* if no shared edges ... */	;\
 	orl	%eax,%eax		/* 1 means stop trying */	;\
@@ -700,13 +630,13 @@ IDTVEC(intr_##name##num)						;\
 4:	movq	IH_NEXT(%rbx),%rbx	/* next handler in chain */	;\
 	testq	%rbx,%rbx						;\
 	jnz	6b							;\
-5:									\
+5:	/* successfully handled */					\
 	cli								;\
 	unmask(num)			/* unmask it in hardware */	;\
 	late_ack(num)							;\
 	sti								;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
-7:									\
+7:	/* current IPL > handler's ih_level */				\
 	cli								;\
 	movq	$(1 << num),%rax					;\
 	orq     %rax,CPUVAR(IPENDING)					;\
@@ -714,16 +644,18 @@ IDTVEC(intr_##name##num)						;\
 	late_ack(num)							;\
 	sti								;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
-10:									\
+10:	/* currently masked */						\
 	cli								;\
 	movq	$(1 << num),%rax					;\
 	orq	%rax,CPUVAR(IPENDING)					;\
 	level_mask(num)							;\
 	late_ack(num)							;\
 	INTRFASTEXIT							;\
-9:									\
+9:	/* spurious interrupt */					\
 	unmask(num)							;\
 	late_ack(num)							;\
+	testb	$SEL_RPL,TF_CS(%rsp)					;\
+	jnz	intr_user_exit						;\
 	INTRFASTEXIT
 
 #define ICUADDR IO_ICU1
Index: sys/arch/amd64/amd64/via.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/via.c,v
retrieving revision 1.23
diff -u -p -r1.23 via.c
--- sys/arch/amd64/amd64/via.c	2 May 2017 11:47:49 -0000	1.23
+++ sys/arch/amd64/amd64/via.c	21 Jun 2018 11:54:01 -0000
@@ -317,18 +317,11 @@ static __inline void
 viac3_cbc(void *cw, void *src, void *dst, void *key, int rep,
     void *iv)
 {
-	unsigned int creg0;
-
-	creg0 = rcr0();		/* Permit access to SIMD/FPU path */
-	lcr0(creg0 & ~(CR0_EM|CR0_TS));
-
 	/* Do the deed */
 	__asm volatile("pushfq; popfq");
 	__asm volatile("rep xcryptcbc" :
 	    : "b" (key), "a" (iv), "c" (rep), "d" (cw), "S" (src), "D" (dst)
 	    : "memory", "cc");
-
-	lcr0(creg0);
 }
 
 int
@@ -521,14 +514,8 @@ void
 viac3_rnd(void *v)
 {
 	struct timeout *tmo = v;
-	unsigned int *p, i, rv, creg0, len = VIAC3_RNG_BUFSIZ;
+	unsigned int *p, i, rv, len = VIAC3_RNG_BUFSIZ;
 	static int buffer[VIAC3_RNG_BUFSIZ + 2];	/* XXX why + 2? */
-#ifdef MULTIPROCESSOR
-	int s = splipi();
-#endif
-
-	creg0 = rcr0();		/* Permit access to SIMD/FPU path */
-	lcr0(creg0 & ~(CR0_EM|CR0_TS));
 
 	/*
 	 * Here we collect the random data from the VIA C3 RNG.  We make
@@ -538,12 +525,6 @@ viac3_rnd(void *v)
 	__asm volatile("rep xstorerng"
 	    : "=a" (rv) : "d" (3), "D" (buffer), "c" (len*sizeof(int))
 	    : "memory", "cc");
-
-	lcr0(creg0);
-
-#ifdef MULTIPROCESSOR
-	splx(s);
-#endif
 
 	for (i = 0, p = buffer; i < VIAC3_RNG_BUFSIZ; i++, p++)
 		add_true_randomness(*p);
Index: sys/arch/amd64/amd64/vm_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vm_machdep.c,v
retrieving revision 1.40
diff -u -p -r1.40 vm_machdep.c
--- sys/arch/amd64/amd64/vm_machdep.c	12 Sep 2017 02:58:08 -0000	1.40
+++ sys/arch/amd64/amd64/vm_machdep.c	21 Jun 2018 11:54:01 -0000
@@ -73,19 +73,12 @@ cpu_fork(struct proc *p1, struct proc *p
     void (*func)(void *), void *arg)
 {
 	struct pcb *pcb = &p2->p_addr->u_pcb;
+	struct pcb *pcb1 = &p1->p_addr->u_pcb;
 	struct trapframe *tf;
 	struct switchframe *sf;
 
-	/*
-	 * If fpuproc != p1, then the fpu h/w state is irrelevant and the
-	 * state had better already be in the pcb.  This is true for forks
-	 * but not for dumps.
-	 *
-	 * If fpuproc == p1, then we have to save the fpu h/w state to
-	 * p1's pcb so that we can copy it.
-	 */
-	if (p1->p_addr->u_pcb.pcb_fpcpu != NULL)
-		fpusave_proc(p1, 1);
+	/* Save the fpu h/w state to p1's pcb so that we can copy it. */
+	fpusave(&pcb1->pcb_savefpu);
 
 	p2->p_md.md_flags = p1->p_md.md_flags;
 
@@ -93,7 +86,7 @@ cpu_fork(struct proc *p1, struct proc *p
 	if (p1 != curproc && p1 != &proc0)
 		panic("cpu_fork: curproc");
 #endif
-	*pcb = p1->p_addr->u_pcb;
+	*pcb = *pcb1;
 
 	/*
 	 * Activate the address space.
@@ -137,11 +130,6 @@ cpu_fork(struct proc *p1, struct proc *p
 void
 cpu_exit(struct proc *p)
 {
-
-	/* If we were using the FPU, forget about it. */
-	if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-		fpusave_proc(p, 0);
-
 	pmap_deactivate(p);
 	sched_exit(p);
 }
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.170
diff -u -p -r1.170 vmm.c
--- sys/arch/amd64/amd64/vmm.c	8 Sep 2017 05:36:51 -0000	1.170
+++ sys/arch/amd64/amd64/vmm.c	21 Jun 2018 11:54:01 -0000
@@ -3584,39 +3584,67 @@ vcpu_must_stop(struct vcpu *vcpu)
 }
 
 /*
- * vmm_fpusave
+ * vmm_fpurestore
  *
- * Modified version of fpusave_cpu from fpu.c that only saves the FPU context
- * and does not call splipi/splx. Must be called with interrupts disabled.
+ * Restore the guest's FPU state, saving the existing userland thread's
+ * FPU context if necessary.  Must be called with interrupts disabled.
  */
-void
-vmm_fpusave(void)
+int
+vmm_fpurestore(struct vcpu *vcpu)
 {
-	struct proc *p;
 	struct cpu_info *ci = curcpu();
 
-	p = ci->ci_fpcurproc;
-	if (p == NULL)
-		return;
+	/* save vmmd's FPU state if we haven't already */
+	if (ci->ci_flags & CPUF_USERXSTATE) {
+		ci->ci_flags &= ~CPUF_USERXSTATE;
+		fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
+	}
 
-	if (ci->ci_fpsaving != 0)
-		panic("%s: recursive save!", __func__);
-	/*
-	 * Set ci->ci_fpsaving, so that any pending exception will be
-	 * thrown away.  (It will be caught again if/when the FPU
-	 * state is restored.)
-	 */
-	ci->ci_fpsaving = 1;
-	if (xsave_mask)
-		xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
-	else
-		fxsave(&p->p_addr->u_pcb.pcb_savefpu);
-	ci->ci_fpsaving = 0;
+	if (vcpu->vc_fpuinited) {
+		/* Restore guest XCR0 and FPU context */
+		if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
+			DPRINTF("%s: guest attempted to set invalid %s\n"
+			    __func__, "bits in xcr0");
+			return EINVAL;
+		}
 
-	p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
+		if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) {
+			DPRINTF("%s: guest attempted to set invalid %s\n"
+			    __func__, "xsave/xrstor state");
+			return EINVAL;
+		}
+	}
+
+	if (xsave_mask) {
+		/* Restore guest %xcr0 */
+		xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
+	}
 
-	p->p_addr->u_pcb.pcb_fpcpu = NULL;
-	ci->ci_fpcurproc = NULL;
+	return 0;
+}
+
+/*
+ * vmm_fpusave
+ *
+ * Save the guest's FPU state.  Must be called with interrupts disabled.
+ */
+void
+vmm_fpusave(struct vcpu *vcpu)
+{
+	if (xsave_mask) {
+		/* Save guest %xcr0 */
+		vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
+
+		/* Restore host %xcr0 */
+		xsetbv(0, xsave_mask);
+	}
+
+	/*
+	 * Save full copy of FPU state - guest content is always
+	 * a subset of host's save area (see xsetbv exit handler)
+	 */	
+	fpusavereset(&vcpu->vc_g_fpu);
+	vcpu->vc_fpuinited = 1;
 }
 
 /*
@@ -3839,39 +3867,10 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 
 		/* Disable interrupts and save the current FPU state. */
 		disable_intr();
-		clts();
-		vmm_fpusave();
-
-		/* Initialize the guest FPU if not inited already */
-		if (!vcpu->vc_fpuinited) {
-			fninit();
-			bzero(&vcpu->vc_g_fpu.fp_fxsave,
-			    sizeof(vcpu->vc_g_fpu.fp_fxsave));
-			vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
-			    __INITIAL_NPXCW__;
-			vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
-			    __INITIAL_MXCSR__;
-			fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
-			vcpu->vc_fpuinited = 1;
-		}
-
-		if (xsave_mask) {
-			/* Restore guest XCR0 and FPU context */
-			if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
-				DPRINTF("%s: guest attempted to set invalid "
-				    "bits in xcr0\n", __func__);
-				ret = EINVAL;
-				stts();
-				enable_intr();
-				break;
-			}
-
-			/* Restore guest %xcr0 */
-			xrstor(&vcpu->vc_g_fpu, xsave_mask);
-			xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
-		} else
-			fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+		if ((ret = vmm_fpurestore(vcpu))) {
+			enable_intr();
+			break;
+		}
 
 		KERNEL_UNLOCK();
 		ret = vmx_enter_guest(&vcpu->vc_control_pa,
@@ -3882,27 +3881,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 		 * the guest FPU state still possibly on the CPU. Save the FPU
 		 * state before re-enabling interrupts.
 		 */
-		if (xsave_mask) {
-			/* Save guest %xcr0 */
-			vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
-			/* Restore host %xcr0 */
-			xsetbv(0, xsave_mask);
-
-			/*
-			 * Save full copy of FPU state - guest content is
-			 * always a subset of host's save area (see xsetbv
-			 * exit handler)
-			 */	
-			xsave(&vcpu->vc_g_fpu, xsave_mask);
-		} else
-			fxsave(&vcpu->vc_g_fpu);
-
-		/*
-		 * FPU state is invalid, set CR0_TS to force DNA trap on next
-		 * access.
-		 */
-		stts();
+		vmm_fpusave(vcpu);
 
 		enable_intr();
 
@@ -5715,39 +5694,10 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
 
 		/* Disable interrupts and save the current FPU state. */
 		disable_intr();
-		clts();
-		vmm_fpusave();
-
-		/* Initialize the guest FPU if not inited already */
-		if (!vcpu->vc_fpuinited) {
-			fninit();
-			bzero(&vcpu->vc_g_fpu.fp_fxsave,
-			    sizeof(vcpu->vc_g_fpu.fp_fxsave));
-			vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
-			    __INITIAL_NPXCW__;
-			vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
-			    __INITIAL_MXCSR__;
-			fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
-			vcpu->vc_fpuinited = 1;
-		}
-
-		if (xsave_mask) {
-			/* Restore guest XCR0 and FPU context */
-			if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
-				DPRINTF("%s: guest attempted to set invalid "
-				    "bits in xcr0\n", __func__);
-				ret = EINVAL;
-				stts();
-				enable_intr();
-				break;
-			}
-
-			/* Restore guest %xcr0 */
-			xrstor(&vcpu->vc_g_fpu, xsave_mask);
-			xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
-		} else
-			fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+		if ((ret = vmm_fpurestore(vcpu))) {
+			enable_intr();
+			break;
+		}
 
 		KERNEL_UNLOCK();
 
@@ -5761,27 +5711,7 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
 		 * the guest FPU state still possibly on the CPU. Save the FPU
 		 * state before re-enabling interrupts.
 		 */
-		if (xsave_mask) {
-			/* Save guest %xcr0 */
-			vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
-			/* Restore host %xcr0 */
-			xsetbv(0, xsave_mask);
-
-			/*
-			 * Save full copy of FPU state - guest content is
-			 * always a subset of host's save area (see xsetbv
-			 * exit handler)
-			 */	
-			xsave(&vcpu->vc_g_fpu, xsave_mask);
-		} else
-			fxsave(&vcpu->vc_g_fpu);
-
-		/*
-		 * FPU state is invalid, set CR0_TS to force DNA trap on next
-		 * access.
-		 */
-		stts();
+		vmm_fpusave(vcpu);
 
 		enable_intr();
 
Index: sys/arch/amd64/include/codepatch.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/codepatch.h,v
retrieving revision 1.4
diff -u -p -r1.4 codepatch.h
--- sys/arch/amd64/include/codepatch.h	25 Aug 2017 19:28:48 -0000	1.4
+++ sys/arch/amd64/include/codepatch.h	21 Jun 2018 11:54:01 -0000
@@ -50,6 +50,8 @@ void codepatch_call(uint16_t tag, void *
 #define CPTAG_STAC		1
 #define CPTAG_CLAC		2
 #define CPTAG_EOI		3
+#define CPTAG_XRSTOR		4
+#define CPTAG_XSAVE		5
 
 /*
  * As stac/clac SMAP instructions are 3 bytes, we want the fastest
Index: sys/arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.114.4.1
diff -u -p -r1.114.4.1 cpu.h
--- sys/arch/amd64/include/cpu.h	26 Feb 2018 12:29:48 -0000	1.114.4.1
+++ sys/arch/amd64/include/cpu.h	21 Jun 2018 11:54:01 -0000
@@ -115,10 +115,6 @@ struct cpu_info {
 	u_int64_t ci_intr_rsp;	/* U<-->K trampoline stack */
 	u_int64_t ci_user_cr3;	/* U-K page table */
 
-	struct proc *ci_fpcurproc;
-	struct proc *ci_fpsaveproc;
-	int ci_fpsaving;
-
 	struct pcb *ci_curpcb;
 	struct pcb *ci_idle_pcb;
 
@@ -216,9 +212,9 @@ struct cpu_info {
 #define CPUF_IDENTIFIED	0x0020		/* CPU has been identified */
 
 #define CPUF_CONST_TSC	0x0040		/* CPU has constant TSC */
-#define CPUF_USERSEGS_BIT	7	/* CPU has curproc's segments */
-#define CPUF_USERSEGS	(1<<CPUF_USERSEGS_BIT)		/* and FS.base */
+#define CPUF_USERSEGS	0x0080		/* CPU has curproc's segs and FS.base */
 #define CPUF_INVAR_TSC	0x0100		/* CPU has invariant TSC */
+#define CPUF_USERXSTATE	0x0200		/* CPU has curproc's xsave state */
 
 #define CPUF_PRESENT	0x1000		/* CPU is present */
 #define CPUF_RUNNING	0x2000		/* CPU is running */
@@ -268,7 +264,6 @@ extern void need_resched(struct cpu_info
 extern struct cpu_info *cpu_info[MAXCPUS];
 
 void cpu_boot_secondary_processors(void);
-void cpu_init_idle_pcbs(void);    
 
 void cpu_kick(struct cpu_info *);
 void cpu_unidle(struct cpu_info *);
@@ -371,7 +366,6 @@ void	dumpconf(void);
 void	cpu_reset(void);
 void	x86_64_proc0_tss_ldt_init(void);
 void	x86_64_bufinit(void);
-void	x86_64_init_pcb_tss_ldt(struct cpu_info *);
 void	cpu_proc_fork(struct proc *, struct proc *);
 int	amd64_pa_used(paddr_t);
 extern void (*cpu_idle_enter_fcn)(void);
Index: sys/arch/amd64/include/fpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
retrieving revision 1.12
diff -u -p -r1.12 fpu.h
--- sys/arch/amd64/include/fpu.h	27 Apr 2017 06:16:39 -0000	1.12
+++ sys/arch/amd64/include/fpu.h	21 Jun 2018 11:54:01 -0000
@@ -7,10 +7,11 @@
 #include <sys/types.h>
 
 /*
- * amd64 only uses the extended save/restore format used
- * by fxsave/fsrestore, to always deal with the SSE registers,
- * which are part of the ABI to pass floating point values.
- * Must be stored in memory on a 16-byte boundary.
+ * If the CPU supports xsave/xrstor then we use them so that we can provide
+ * AVX support.  Otherwise we require fxsave/fxrstor, as the SSE registers
+ * are part of the ABI for passing floating point values.
+ * While fxsave/fxrstor only required 16-byte alignment for the save area,
+ * xsave/xrstor requires the save area to have 64-byte alignment.
  */
 
 struct fxsave64 {
@@ -63,23 +64,22 @@ extern uint32_t	fpu_mxcsr_mask;
 extern uint64_t	xsave_mask;
 
 void fpuinit(struct cpu_info *);
-void fpudrop(void);
-void fpudiscard(struct proc *);
 void fputrap(struct trapframe *);
-void fpusave_proc(struct proc *, int);
-void fpusave_cpu(struct cpu_info *, int);
+void fpusave(struct savefpu *);
+void fpusavereset(struct savefpu *);
 void fpu_kernel_enter(void);
 void fpu_kernel_exit(void);
 
+int	xrstor_user(struct savefpu *_addr, uint64_t _mask);
+#define	fpureset() \
+	xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask)
+
 #define fninit()		__asm("fninit")
 #define fwait()			__asm("fwait")
-#define fnclex()		__asm("fnclex")
+/* should be fxsave64, but where we use this it doesn't matter */
 #define fxsave(addr)		__asm("fxsave %0" : "=m" (*addr))
-#define fxrstor(addr)		__asm("fxrstor %0" : : "m" (*addr))
 #define ldmxcsr(addr)		__asm("ldmxcsr %0" : : "m" (*addr))
 #define fldcw(addr)		__asm("fldcw %0" : : "m" (*addr))
-#define clts()			__asm("clts")
-#define stts()			lcr0(rcr0() | CR0_TS)
 
 static inline void
 xsave(struct savefpu *addr, uint64_t mask)
@@ -88,18 +88,9 @@ xsave(struct savefpu *addr, uint64_t mas
 
 	lo = mask;
 	hi = mask >> 32;
+	/* should be xsave64, but where we use this it doesn't matter */
 	__asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
 	    "memory");
-}
-
-static inline void
-xrstor(struct savefpu *addr, uint64_t mask)
-{
-	uint32_t lo, hi;
-
-	lo = mask;
-	hi = mask >> 32;
-	__asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
 }
 
 #endif
Index: sys/arch/amd64/include/intrdefs.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v
retrieving revision 1.16
diff -u -p -r1.16 intrdefs.h
--- sys/arch/amd64/include/intrdefs.h	22 Jun 2016 01:12:38 -0000	1.16
+++ sys/arch/amd64/include/intrdefs.h	21 Jun 2018 11:54:01 -0000
@@ -75,8 +75,6 @@
 
 #define X86_IPI_HALT			0x00000001
 #define X86_IPI_NOP			0x00000002
-#define X86_IPI_FLUSH_FPU		0x00000004
-#define X86_IPI_SYNCH_FPU		0x00000008
 #define X86_IPI_TLB			0x00000010
 #define X86_IPI_MTRR			0x00000020
 #define X86_IPI_SETPERF			0x00000040
@@ -84,10 +82,10 @@
 #define X86_IPI_START_VMM		0x00000100
 #define X86_IPI_STOP_VMM		0x00000200
 
-#define X86_NIPI			10
+#define X86_NIPI			11
 
-#define X86_IPI_NAMES { "halt IPI", "nop IPI", "FPU flush IPI", \
-			 "FPU synch IPI", "TLB shootdown IPI", \
+#define X86_IPI_NAMES { "halt IPI", "nop IPI", NULL, \
+			 NULL, "TLB shootdown IPI", \
 			 "MTRR update IPI", "setperf IPI", "ddb IPI", \
 			 "VMM start IPI", "VMM stop IPI" }
 
Index: sys/arch/amd64/include/pcb.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/pcb.h,v
retrieving revision 1.16
diff -u -p -r1.16 pcb.h
--- sys/arch/amd64/include/pcb.h	26 Apr 2017 07:05:24 -0000	1.16
+++ sys/arch/amd64/include/pcb.h	21 Jun 2018 11:54:01 -0000
@@ -69,7 +69,6 @@
 
 #include <sys/signal.h>
 
-#include <machine/tss.h>
 #include <machine/fpu.h>
 
 /*
@@ -84,9 +83,7 @@ struct pcb {
 	u_int64_t	pcb_kstack;	/* kernel stack address */
 	u_int64_t	pcb_fsbase;	/* per-thread offset: %fs */
 	caddr_t	pcb_onfault;		/* copyin/out fault recovery */
-	struct	cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */
 	struct	pmap *pcb_pmap;		/* back pointer to our pmap */
-	int	pcb_cr0;		/* saved image of CR0 */
 };
 
 #ifdef _KERNEL
Index: sys/arch/amd64/include/proc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/proc.h,v
retrieving revision 1.9
diff -u -p -r1.9 proc.h
--- sys/arch/amd64/include/proc.h	13 Apr 2017 03:52:25 -0000	1.9
+++ sys/arch/amd64/include/proc.h	21 Jun 2018 11:54:01 -0000
@@ -46,7 +46,6 @@ struct mdproc {
 };
 
 /* md_flags */
-#define	MDP_USEDFPU	0x0001	/* has used the FPU */
 #define MDP_IRET	0x0002	/* return via iret, not sysret */
 				/* (iret can restore r11 and rcx) */
 
Index: sys/arch/amd64/include/specialreg.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.61.4.1
diff -u -p -r1.61.4.1 specialreg.h
--- sys/arch/amd64/include/specialreg.h	26 Feb 2018 12:29:48 -0000	1.61.4.1
+++ sys/arch/amd64/include/specialreg.h	21 Jun 2018 11:54:01 -0000
@@ -1386,3 +1386,15 @@
 #define PAT_WB          0x6UL
 #define PAT_UCMINUS     0x7UL
 
+/*
+ * XSAVE subfeatures (cpuid 0xd, leaf 1)
+ */
+#define XSAVE_XSAVEOPT		0x1UL
+#define XSAVE_XSAVEC		0x2UL
+#define XSAVE_XGETBV1		0x4UL
+#define XSAVE_XSAVES		0x8UL
+
+/*
+ * Default cr0 flags.
+ */
+#define CR0_DEFAULT	(CR0_PE|CR0_PG|CR0_NE|CR0_WP)