diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 903de4aa509496205cf792980d1b6f57b3d87177..ebe7deedd5b42b27974b1b8884e1b68bdebb82cb 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
+obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
 
@@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
+
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..caba99601703de3fafefd5e84b00abbff71a8eaa
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -0,0 +1,896 @@
+/*
+ * Implement AES algorithm in Intel AES-NI instructions.
+ *
+ * The white paper of AES-NI instructions can be downloaded from:
+ *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
+ *
+ * Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *            Vinodh Gopal <vinodh.gopal@intel.com>
+ *            Kahraman Akdemir
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.text
+
+#define STATE1	%xmm0
+#define STATE2	%xmm4
+#define STATE3	%xmm5
+#define STATE4	%xmm6
+#define STATE	STATE1
+#define IN1	%xmm1
+#define IN2	%xmm7
+#define IN3	%xmm8
+#define IN4	%xmm9
+#define IN	IN1
+#define KEY	%xmm2
+#define IV	%xmm3
+
+#define KEYP	%rdi
+#define OUTP	%rsi
+#define INP	%rdx
+#define LEN	%rcx
+#define IVP	%r8
+#define KLEN	%r9d
+#define T1	%r10
+#define TKEYP	T1
+#define T2	%r11
+
+_key_expansion_128:
+_key_expansion_256a:
+	pshufd $0b11111111, %xmm1, %xmm1
+	shufps $0b00010000, %xmm0, %xmm4
+	pxor %xmm4, %xmm0
+	shufps $0b10001100, %xmm0, %xmm4
+	pxor %xmm4, %xmm0
+	pxor %xmm1, %xmm0
+	movaps %xmm0, (%rcx)
+	add $0x10, %rcx
+	ret
+
+_key_expansion_192a:
+	pshufd $0b01010101, %xmm1, %xmm1
+	shufps $0b00010000, %xmm0, %xmm4
+	pxor %xmm4, %xmm0
+	shufps $0b10001100, %xmm0, %xmm4
+	pxor %xmm4, %xmm0
+	pxor %xmm1, %xmm0
+
+	movaps %xmm2, %xmm5
+	movaps %xmm2, %xmm6
+	pslldq $4, %xmm5
+	pshufd $0b11111111, %xmm0, %xmm3
+	pxor %xmm3, %xmm2
+	pxor %xmm5, %xmm2
+
+	movaps %xmm0, %xmm1
+	shufps $0b01000100, %xmm0, %xmm6
+	movaps %xmm6, (%rcx)
+	shufps $0b01001110, %xmm2, %xmm1
+	movaps %xmm1, 16(%rcx)
+	add $0x20, %rcx
+	ret
+
+_key_expansion_192b:
+	pshufd $0b01010101, %xmm1, %xmm1
+	shufps $0b00010000, %xmm0, %xmm4
+	pxor %xmm4, %xmm0
+	shufps $0b10001100, %xmm0, %xmm4
+	pxor %xmm4, %xmm0
+	pxor %xmm1, %xmm0
+
+	movaps %xmm2, %xmm5
+	pslldq $4, %xmm5
+	pshufd $0b11111111, %xmm0, %xmm3
+	pxor %xmm3, %xmm2
+	pxor %xmm5, %xmm2
+
+	movaps %xmm0, (%rcx)
+	add $0x10, %rcx
+	ret
+
+_key_expansion_256b:
+	pshufd $0b10101010, %xmm1, %xmm1
+	shufps $0b00010000, %xmm2, %xmm4
+	pxor %xmm4, %xmm2
+	shufps $0b10001100, %xmm2, %xmm4
+	pxor %xmm4, %xmm2
+	pxor %xmm1, %xmm2
+	movaps %xmm2, (%rcx)
+	add $0x10, %rcx
+	ret
+
+/*
+ * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ *                   unsigned int key_len)
+ */
+ENTRY(aesni_set_key)
+	movups (%rsi), %xmm0		# user key (first 16 bytes)
+	movaps %xmm0, (%rdi)
+	lea 0x10(%rdi), %rcx		# key addr
+	movl %edx, 480(%rdi)
+	pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x
+	cmp $24, %dl
+	jb .Lenc_key128
+	je .Lenc_key192
+	movups 0x10(%rsi), %xmm2	# other user key
+	movaps %xmm2, (%rcx)
+	add $0x10, %rcx
+	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+	call _key_expansion_256a
+	# aeskeygenassist $0x1, %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+	call _key_expansion_256b
+	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+	call _key_expansion_256a
+	# aeskeygenassist $0x2, %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+	call _key_expansion_256b
+	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+	call _key_expansion_256a
+	# aeskeygenassist $0x4, %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+	call _key_expansion_256b
+	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+	call _key_expansion_256a
+	# aeskeygenassist $0x8, %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+	call _key_expansion_256b
+	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+	call _key_expansion_256a
+	# aeskeygenassist $0x10, %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+	call _key_expansion_256b
+	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+	call _key_expansion_256a
+	# aeskeygenassist $0x20, %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+	call _key_expansion_256b
+	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+	call _key_expansion_256a
+	jmp .Ldec_key
+.Lenc_key192:
+	movq 0x10(%rsi), %xmm2		# other user key
+	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+	call _key_expansion_192a
+	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+	call _key_expansion_192b
+	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+	call _key_expansion_192a
+	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+	call _key_expansion_192b
+	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+	call _key_expansion_192a
+	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+	call _key_expansion_192b
+	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+	call _key_expansion_192a
+	# aeskeygenassist $0x80, %xmm2, %xmm1	# round 8
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+	call _key_expansion_192b
+	jmp .Ldec_key
+.Lenc_key128:
+	# aeskeygenassist $0x1, %xmm0, %xmm1	# round 1
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+	call _key_expansion_128
+	# aeskeygenassist $0x2, %xmm0, %xmm1	# round 2
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+	call _key_expansion_128
+	# aeskeygenassist $0x4, %xmm0, %xmm1	# round 3
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+	call _key_expansion_128
+	# aeskeygenassist $0x8, %xmm0, %xmm1	# round 4
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+	call _key_expansion_128
+	# aeskeygenassist $0x10, %xmm0, %xmm1	# round 5
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+	call _key_expansion_128
+	# aeskeygenassist $0x20, %xmm0, %xmm1	# round 6
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+	call _key_expansion_128
+	# aeskeygenassist $0x40, %xmm0, %xmm1	# round 7
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+	call _key_expansion_128
+	# aeskeygenassist $0x80, %xmm0, %xmm1	# round 8
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+	call _key_expansion_128
+	# aeskeygenassist $0x1b, %xmm0, %xmm1	# round 9
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+	call _key_expansion_128
+	# aeskeygenassist $0x36, %xmm0, %xmm1	# round 10
+	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+	call _key_expansion_128
+.Ldec_key:
+	sub $0x10, %rcx
+	movaps (%rdi), %xmm0
+	movaps (%rcx), %xmm1
+	movaps %xmm0, 240(%rcx)
+	movaps %xmm1, 240(%rdi)
+	add $0x10, %rdi
+	lea 240-16(%rcx), %rsi
+.align 4
+.Ldec_key_loop:
+	movaps (%rdi), %xmm0
+	# aesimc %xmm0, %xmm1
+	.byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+	movaps %xmm1, (%rsi)
+	add $0x10, %rdi
+	sub $0x10, %rsi
+	cmp %rcx, %rdi
+	jb .Ldec_key_loop
+	xor %rax, %rax
+	ret
+
+/*
+ * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ */
+ENTRY(aesni_enc)
+	movl 480(KEYP), KLEN		# key length
+	movups (INP), STATE		# input
+	call _aesni_enc1
+	movups STATE, (OUTP)		# output
+	ret
+
+/*
+ * _aesni_enc1:		internal ABI
+ * input:
+ *	KEYP:		key struct pointer
+ *	KLEN:		round count
+ *	STATE:		initial state (input)
+ * output:
+ *	STATE:		finial state (output)
+ * changed:
+ *	KEY
+ *	TKEYP (T1)
+ */
+_aesni_enc1:
+	movaps (KEYP), KEY		# key
+	mov KEYP, TKEYP
+	pxor KEY, STATE		# round 0
+	add $0x30, TKEYP
+	cmp $24, KLEN
+	jb .Lenc128
+	lea 0x20(TKEYP), TKEYP
+	je .Lenc192
+	add $0x20, TKEYP
+	movaps -0x60(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps -0x50(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+.align 4
+.Lenc192:
+	movaps -0x40(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps -0x30(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+.align 4
+.Lenc128:
+	movaps -0x20(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps -0x10(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps (TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x10(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x20(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x30(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x40(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x50(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x60(TKEYP), KEY
+	# aesenc KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	movaps 0x70(TKEYP), KEY
+	# aesenclast KEY, STATE	# last round
+	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+	ret
+
+/*
+ * _aesni_enc4:	internal ABI
+ * input:
+ *	KEYP:		key struct pointer
+ *	KLEN:		round count
+ *	STATE1:		initial state (input)
+ *	STATE2
+ *	STATE3
+ *	STATE4
+ * output:
+ *	STATE1:		finial state (output)
+ *	STATE2
+ *	STATE3
+ *	STATE4
+ * changed:
+ *	KEY
+ *	TKEYP (T1)
+ */
+_aesni_enc4:
+	movaps (KEYP), KEY		# key
+	mov KEYP, TKEYP
+	pxor KEY, STATE1		# round 0
+	pxor KEY, STATE2
+	pxor KEY, STATE3
+	pxor KEY, STATE4
+	add $0x30, TKEYP
+	cmp $24, KLEN
+	jb .L4enc128
+	lea 0x20(TKEYP), TKEYP
+	je .L4enc192
+	add $0x20, TKEYP
+	movaps -0x60(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps -0x50(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+#.align 4
+.L4enc192:
+	movaps -0x40(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps -0x30(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+#.align 4
+.L4enc128:
+	movaps -0x20(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps -0x10(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps (TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x10(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x20(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x30(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x40(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x50(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x60(TKEYP), KEY
+	# aesenc KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	# aesenc KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+	# aesenc KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+	# aesenc KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	movaps 0x70(TKEYP), KEY
+	# aesenclast KEY, STATE1	# last round
+	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+	# aesenclast KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
+	# aesenclast KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdd, 0xea
+	# aesenclast KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
+	ret
+
+/*
+ * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ */
+ENTRY(aesni_dec)
+	mov 480(KEYP), KLEN		# key length
+	add $240, KEYP
+	movups (INP), STATE		# input
+	call _aesni_dec1
+	movups STATE, (OUTP)		#output
+	ret
+
+/*
+ * _aesni_dec1:		internal ABI
+ * input:
+ *	KEYP:		key struct pointer
+ *	KLEN:		key length
+ *	STATE:		initial state (input)
+ * output:
+ *	STATE:		finial state (output)
+ * changed:
+ *	KEY
+ *	TKEYP (T1)
+ */
+_aesni_dec1:
+	movaps (KEYP), KEY		# key
+	mov KEYP, TKEYP
+	pxor KEY, STATE		# round 0
+	add $0x30, TKEYP
+	cmp $24, KLEN
+	jb .Ldec128
+	lea 0x20(TKEYP), TKEYP
+	je .Ldec192
+	add $0x20, TKEYP
+	movaps -0x60(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps -0x50(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+.align 4
+.Ldec192:
+	movaps -0x40(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps -0x30(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+.align 4
+.Ldec128:
+	movaps -0x20(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps -0x10(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps (TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x10(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x20(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x30(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x40(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x50(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x60(TKEYP), KEY
+	# aesdec KEY, STATE
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	movaps 0x70(TKEYP), KEY
+	# aesdeclast KEY, STATE		# last round
+	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+	ret
+
+/*
+ * _aesni_dec4:	internal ABI
+ * input:
+ *	KEYP:		key struct pointer
+ *	KLEN:		key length
+ *	STATE1:		initial state (input)
+ *	STATE2
+ *	STATE3
+ *	STATE4
+ * output:
+ *	STATE1:		finial state (output)
+ *	STATE2
+ *	STATE3
+ *	STATE4
+ * changed:
+ *	KEY
+ *	TKEYP (T1)
+ */
+_aesni_dec4:
+	movaps (KEYP), KEY		# key
+	mov KEYP, TKEYP
+	pxor KEY, STATE1		# round 0
+	pxor KEY, STATE2
+	pxor KEY, STATE3
+	pxor KEY, STATE4
+	add $0x30, TKEYP
+	cmp $24, KLEN
+	jb .L4dec128
+	lea 0x20(TKEYP), TKEYP
+	je .L4dec192
+	add $0x20, TKEYP
+	movaps -0x60(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps -0x50(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+.align 4
+.L4dec192:
+	movaps -0x40(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps -0x30(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+.align 4
+.L4dec128:
+	movaps -0x20(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps -0x10(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps (TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x10(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x20(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x30(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x40(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x50(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x60(TKEYP), KEY
+	# aesdec KEY, STATE1
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	# aesdec KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+	# aesdec KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
+	# aesdec KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	movaps 0x70(TKEYP), KEY
+	# aesdeclast KEY, STATE1	# last round
+	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+	# aesdeclast KEY, STATE2
+	.byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
+	# aesdeclast KEY, STATE3
+	.byte 0x66, 0x0f, 0x38, 0xdf, 0xea
+	# aesdeclast KEY, STATE4
+	.byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
+	ret
+
+/*
+ * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *		      size_t len)
+ */
+ENTRY(aesni_ecb_enc)
+	test LEN, LEN		# check length
+	jz .Lecb_enc_ret
+	mov 480(KEYP), KLEN
+	cmp $16, LEN
+	jb .Lecb_enc_ret
+	cmp $64, LEN
+	jb .Lecb_enc_loop1
+.align 4
+.Lecb_enc_loop4:
+	movups (INP), STATE1
+	movups 0x10(INP), STATE2
+	movups 0x20(INP), STATE3
+	movups 0x30(INP), STATE4
+	call _aesni_enc4
+	movups STATE1, (OUTP)
+	movups STATE2, 0x10(OUTP)
+	movups STATE3, 0x20(OUTP)
+	movups STATE4, 0x30(OUTP)
+	sub $64, LEN
+	add $64, INP
+	add $64, OUTP
+	cmp $64, LEN
+	jge .Lecb_enc_loop4
+	cmp $16, LEN
+	jb .Lecb_enc_ret
+.align 4
+.Lecb_enc_loop1:
+	movups (INP), STATE1
+	call _aesni_enc1
+	movups STATE1, (OUTP)
+	sub $16, LEN
+	add $16, INP
+	add $16, OUTP
+	cmp $16, LEN
+	jge .Lecb_enc_loop1
+.Lecb_enc_ret:
+	ret
+
+/*
+ * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *		      size_t len);
+ */
+ENTRY(aesni_ecb_dec)
+	test LEN, LEN
+	jz .Lecb_dec_ret
+	mov 480(KEYP), KLEN
+	add $240, KEYP
+	cmp $16, LEN
+	jb .Lecb_dec_ret
+	cmp $64, LEN
+	jb .Lecb_dec_loop1
+.align 4
+.Lecb_dec_loop4:
+	movups (INP), STATE1
+	movups 0x10(INP), STATE2
+	movups 0x20(INP), STATE3
+	movups 0x30(INP), STATE4
+	call _aesni_dec4
+	movups STATE1, (OUTP)
+	movups STATE2, 0x10(OUTP)
+	movups STATE3, 0x20(OUTP)
+	movups STATE4, 0x30(OUTP)
+	sub $64, LEN
+	add $64, INP
+	add $64, OUTP
+	cmp $64, LEN
+	jge .Lecb_dec_loop4
+	cmp $16, LEN
+	jb .Lecb_dec_ret
+.align 4
+.Lecb_dec_loop1:
+	movups (INP), STATE1
+	call _aesni_dec1
+	movups STATE1, (OUTP)
+	sub $16, LEN
+	add $16, INP
+	add $16, OUTP
+	cmp $16, LEN
+	jge .Lecb_dec_loop1
+.Lecb_dec_ret:
+	ret
+
+/*
+ * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *		      size_t len, u8 *iv)
+ */
+ENTRY(aesni_cbc_enc)
+	cmp $16, LEN
+	jb .Lcbc_enc_ret
+	mov 480(KEYP), KLEN
+	movups (IVP), STATE	# load iv as initial state
+.align 4
+.Lcbc_enc_loop:
+	movups (INP), IN	# load input
+	pxor IN, STATE
+	call _aesni_enc1
+	movups STATE, (OUTP)	# store output
+	sub $16, LEN
+	add $16, INP
+	add $16, OUTP
+	cmp $16, LEN
+	jge .Lcbc_enc_loop
+	movups STATE, (IVP)
+.Lcbc_enc_ret:
+	ret
+
+/*
+ * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *		      size_t len, u8 *iv)
+ */
+ENTRY(aesni_cbc_dec)
+	cmp $16, LEN
+	jb .Lcbc_dec_ret
+	mov 480(KEYP), KLEN
+	add $240, KEYP
+	movups (IVP), IV
+	cmp $64, LEN
+	jb .Lcbc_dec_loop1
+.align 4
+.Lcbc_dec_loop4:
+	movups (INP), IN1
+	movaps IN1, STATE1
+	movups 0x10(INP), IN2
+	movaps IN2, STATE2
+	movups 0x20(INP), IN3
+	movaps IN3, STATE3
+	movups 0x30(INP), IN4
+	movaps IN4, STATE4
+	call _aesni_dec4
+	pxor IV, STATE1
+	pxor IN1, STATE2
+	pxor IN2, STATE3
+	pxor IN3, STATE4
+	movaps IN4, IV
+	movups STATE1, (OUTP)
+	movups STATE2, 0x10(OUTP)
+	movups STATE3, 0x20(OUTP)
+	movups STATE4, 0x30(OUTP)
+	sub $64, LEN
+	add $64, INP
+	add $64, OUTP
+	cmp $64, LEN
+	jge .Lcbc_dec_loop4
+	cmp $16, LEN
+	jb .Lcbc_dec_ret
+.align 4
+.Lcbc_dec_loop1:
+	movups (INP), IN
+	movaps IN, STATE
+	call _aesni_dec1
+	pxor IV, STATE
+	movups STATE, (OUTP)
+	movaps IN, IV
+	sub $16, LEN
+	add $16, INP
+	add $16, OUTP
+	cmp $16, LEN
+	jge .Lcbc_dec_loop1
+	movups IV, (IVP)
+.Lcbc_dec_ret:
+	ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
new file mode 100644
index 0000000000000000000000000000000000000000..02af0af65497a1d860052b97a25309f4a436dd2a
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -0,0 +1,461 @@
+/*
+ * Support for Intel AES-NI instructions. This file contains glue
+ * code, the real AES implementation is in intel-aes_asm.S.
+ *
+ * Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/cryptd.h>
+#include <asm/i387.h>
+#include <asm/aes.h>
+
+struct async_aes_ctx {
+	struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+#define AESNI_ALIGN	16
+#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
+
+asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+			     unsigned int key_len);
+asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
+			  const u8 *in);
+asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
+			  const u8 *in);
+asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len);
+asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len);
+asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len, u8 *iv);
+
+static inline int kernel_fpu_using(void)
+{
+	if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
+		return 1;
+	return 0;
+}
+
+static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
+{
+	unsigned long addr = (unsigned long)raw_ctx;
+	unsigned long align = AESNI_ALIGN;
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return (struct crypto_aes_ctx *)ALIGN(addr, align);
+}
+
+static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
+			      const u8 *in_key, unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	if (kernel_fpu_using())
+		err = crypto_aes_expand_key(ctx, in_key, key_len);
+	else {
+		kernel_fpu_begin();
+		err = aesni_set_key(ctx, in_key, key_len);
+		kernel_fpu_end();
+	}
+
+	return err;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+	if (kernel_fpu_using())
+		crypto_aes_encrypt_x86(ctx, dst, src);
+	else {
+		kernel_fpu_begin();
+		aesni_enc(ctx, dst, src);
+		kernel_fpu_end();
+	}
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+	if (kernel_fpu_using())
+		crypto_aes_decrypt_x86(ctx, dst, src);
+	else {
+		kernel_fpu_begin();
+		aesni_dec(ctx, dst, src);
+		kernel_fpu_end();
+	}
+}
+
+static struct crypto_alg aesni_alg = {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-aesni",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+	.cra_alignmask		= 0,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(aesni_alg.cra_list),
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= AES_MIN_KEY_SIZE,
+			.cia_max_keysize	= AES_MAX_KEY_SIZE,
+			.cia_setkey		= aes_set_key,
+			.cia_encrypt		= aes_encrypt,
+			.cia_decrypt		= aes_decrypt
+		}
+	}
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_fpu_begin();
+	while ((nbytes = walk.nbytes)) {
+		aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes & AES_BLOCK_MASK);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	kernel_fpu_end();
+
+	return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_fpu_begin();
+	while ((nbytes = walk.nbytes)) {
+		aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes & AES_BLOCK_MASK);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	kernel_fpu_end();
+
+	return err;
+}
+
+static struct crypto_alg blk_ecb_alg = {
+	.cra_name		= "__ecb-aes-aesni",
+	.cra_driver_name	= "__driver-ecb-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+};
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_fpu_begin();
+	while ((nbytes = walk.nbytes)) {
+		aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes & AES_BLOCK_MASK, walk.iv);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	kernel_fpu_end();
+
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_fpu_begin();
+	while ((nbytes = walk.nbytes)) {
+		aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes & AES_BLOCK_MASK, walk.iv);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	kernel_fpu_end();
+
+	return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+	.cra_name		= "__cbc-aes-aesni",
+	.cra_driver_name	= "__driver-cbc-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+};
+
+static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+			unsigned int key_len)
+{
+	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
+}
+
+static int ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (kernel_fpu_using()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+		return crypto_ablkcipher_encrypt(cryptd_req);
+	} else {
+		struct blkcipher_desc desc;
+		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+		desc.info = req->info;
+		desc.flags = 0;
+		return crypto_blkcipher_crt(desc.tfm)->encrypt(
+			&desc, req->dst, req->src, req->nbytes);
+	}
+}
+
+static int ablk_decrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (kernel_fpu_using()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+		return crypto_ablkcipher_decrypt(cryptd_req);
+	} else {
+		struct blkcipher_desc desc;
+		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+		desc.info = req->info;
+		desc.flags = 0;
+		return crypto_blkcipher_crt(desc.tfm)->decrypt(
+			&desc, req->dst, req->src, req->nbytes);
+	}
+}
+
+static void ablk_exit(struct crypto_tfm *tfm)
+{
+	struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+
+static void ablk_init_common(struct crypto_tfm *tfm,
+			     struct cryptd_ablkcipher *cryptd_tfm)
+{
+	struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->cryptd_tfm = cryptd_tfm;
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+}
+
+static int ablk_ecb_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_ecb_alg = {
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
+	.cra_init		= ablk_ecb_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+};
+
+static int ablk_cbc_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_cbc_alg = {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
+	.cra_init		= ablk_cbc_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+};
+
+static int __init aesni_init(void)
+{
+	int err;
+
+	if (!cpu_has_aes) {
+		printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
+		return -ENODEV;
+	}
+	if ((err = crypto_register_alg(&aesni_alg)))
+		goto aes_err;
+	if ((err = crypto_register_alg(&blk_ecb_alg)))
+		goto blk_ecb_err;
+	if ((err = crypto_register_alg(&blk_cbc_alg)))
+		goto blk_cbc_err;
+	if ((err = crypto_register_alg(&ablk_ecb_alg)))
+		goto ablk_ecb_err;
+	if ((err = crypto_register_alg(&ablk_cbc_alg)))
+		goto ablk_cbc_err;
+
+	return err;
+
+ablk_cbc_err:
+	crypto_unregister_alg(&ablk_ecb_alg);
+ablk_ecb_err:
+	crypto_unregister_alg(&blk_cbc_alg);
+blk_cbc_err:
+	crypto_unregister_alg(&blk_ecb_alg);
+blk_ecb_err:
+	crypto_unregister_alg(&aesni_alg);
+aes_err:
+	return err;
+}
+
+static void __exit aesni_exit(void)
+{
+	crypto_unregister_alg(&ablk_cbc_alg);
+	crypto_unregister_alg(&ablk_ecb_alg);
+	crypto_unregister_alg(&blk_cbc_alg);
+	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_alg(&aesni_alg);
+}
+
+module_init(aesni_init);
+module_exit(aesni_exit);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7301e60dc4a83c56e28b6e8867ab6a15419347bf..0beba0d1468db24bceaa1e81cdf8c652eb5180ee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
 #define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_aes		boot_cpu_has(X86_FEATURE_AES)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 8dde4fcf99c947b7c292be0a775b102e43260879..a83ce0462b6b26d6ba80614f9f5ee9001c130705 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -470,6 +470,31 @@ config CRYPTO_AES_X86_64
 
 	  See <http://csrc.nist.gov/encryption/aes/> for more information.
 
+config CRYPTO_AES_NI_INTEL
+	tristate "AES cipher algorithms (AES-NI)"
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_AES_X86_64
+	select CRYPTO_CRYPTD
+	select CRYPTO_ALGAPI
+	help
+	  Use Intel AES-NI instructions for AES algorithm.
+
+	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
+	  algorithm.
+
+	  Rijndael appears to be consistently a very good performer in
+	  both hardware and software across a wide range of computing
+	  environments regardless of its use in feedback or non-feedback
+	  modes. Its key setup time is excellent, and its key agility is
+	  good. Rijndael's very low memory requirements make it very well
+	  suited for restricted-space environments, in which it also
+	  demonstrates excellent performance. Rijndael's operations are
+	  among the easiest to defend against power and timing attacks.
+
+	  The AES specifies three key sizes: 128, 192 and 256 bits
+
+	  See <http://csrc.nist.gov/encryption/aes/> for more information.
+
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
 	select CRYPTO_ALGAPI