diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index de10cb8a2c97b8cde91e730bcb1486442fc4f82e..5e95b257ee26208612b8f28229612e8ce6796660 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -615,6 +615,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		prev->gsindex = gsindex;
 	}
 
+	/* Must be after DS reload */
+	unlazy_fpu(prev_p);
+
 	/* 
 	 * Switch the PDA and FPU contexts.
 	 */
@@ -622,10 +625,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	write_pda(oldrsp, next->userrsp); 
 	write_pda(pcurrent, next_p); 
 
-	/* This must be here to ensure both math_state_restore() and
-	   kernel_fpu_begin() work consistently. 
-	   And the AMD workaround requires it to be after DS reload. */
-	unlazy_fpu(prev_p);
 	write_pda(kernelstack,
 	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR