diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index adcf794b22e208f8fa88847b06b466e3e1c88c75..be6d9e365a800a8569fb4915fccbfc50b0793a09 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1612,6 +1612,7 @@ _zero_cipher_left_encrypt:
         movdqa SHUF_MASK(%rip), %xmm10
 	PSHUFB_XMM %xmm10, %xmm0
 
+
 	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
 	sub $16, %r11
 	add %r13, %r11
@@ -1634,7 +1635,9 @@ _zero_cipher_left_encrypt:
 	# GHASH computation for the last <16 byte block
 	sub	%r13, %r11
 	add	$16, %r11
-	PSHUFB_XMM %xmm10, %xmm1
+
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm0
 
 	# shuffle xmm0 back to output as ciphertext
 
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e0e6340c8dad35be4b4c922b96c44300ea301879..2577613fb32b04ea65893f5a838bac0f4b317496 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -828,9 +828,15 @@ static int rfc4106_init(struct crypto_tfm *tfm)
 	struct cryptd_aead *cryptd_tfm;
 	struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *)
 		PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
+	struct crypto_aead *cryptd_child;
+	struct aesni_rfc4106_gcm_ctx *child_ctx;
 	cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
+
+	cryptd_child = cryptd_aead_child(cryptd_tfm);
+	child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child);
+	memcpy(child_ctx, ctx, sizeof(*ctx));
 	ctx->cryptd_tfm = cryptd_tfm;
 	tfm->crt_aead.reqsize = sizeof(struct aead_request)
 		+ crypto_aead_reqsize(&cryptd_tfm->base);
@@ -923,6 +929,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
 	int ret = 0;
 	struct crypto_tfm *tfm = crypto_aead_tfm(parent);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
+	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
+	struct aesni_rfc4106_gcm_ctx *child_ctx =
+                                 aesni_rfc4106_gcm_ctx_get(cryptd_child);
 	u8 *new_key_mem = NULL;
 
 	if (key_len < 4) {
@@ -966,6 +975,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
 		goto exit;
 	}
 	ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
+	memcpy(child_ctx, ctx, sizeof(*ctx));
 exit:
 	kfree(new_key_mem);
 	return ret;
@@ -997,7 +1007,6 @@ static int rfc4106_encrypt(struct aead_request *req)
 	int ret;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
 
 	if (!irq_fpu_usable()) {
 		struct aead_request *cryptd_req =
@@ -1006,6 +1015,7 @@ static int rfc4106_encrypt(struct aead_request *req)
 		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
 		return crypto_aead_encrypt(cryptd_req);
 	} else {
+		struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
 		kernel_fpu_begin();
 		ret = cryptd_child->base.crt_aead.encrypt(req);
 		kernel_fpu_end();
@@ -1018,7 +1028,6 @@ static int rfc4106_decrypt(struct aead_request *req)
 	int ret;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
 
 	if (!irq_fpu_usable()) {
 		struct aead_request *cryptd_req =
@@ -1027,6 +1036,7 @@ static int rfc4106_decrypt(struct aead_request *req)
 		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
 		return crypto_aead_decrypt(cryptd_req);
 	} else {
+		struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
 		kernel_fpu_begin();
 		ret = cryptd_child->base.crt_aead.decrypt(req);
 		kernel_fpu_end();