diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
index fb1b961a2e2f185143fbbea09e2e6a43606b72fb..6f77e7700d329506a453b21984f7fb15efe51b88 100644
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
@@ -77,12 +77,11 @@ static inline u8 byte(const u32 x, const unsigned n)
 struct aes_ctx
 {
 	u32 key_length;
-	u32 E[60];
-	u32 D[60];
+	u32 buf[120];
 };
 
-#define E_KEY ctx->E
-#define D_KEY ctx->D
+#define E_KEY (&ctx->buf[0])
+#define D_KEY (&ctx->buf[60])
 
 static u8 pow_tab[256] __initdata;
 static u8 log_tab[256] __initdata;
diff --git a/crypto/aes.c b/crypto/aes.c
index 0a6a5c14368684937bf8ca9b4bd866fea06d0e15..a5017292e06633337f0e83a78a01432c09f5d4c9 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -75,12 +75,11 @@ byte(const u32 x, const unsigned n)
 
 struct aes_ctx {
 	int key_length;
-	u32 E[60];
-	u32 D[60];
+	u32 buf[120];
 };
 
-#define E_KEY ctx->E
-#define D_KEY ctx->D
+#define E_KEY (&ctx->buf[0])
+#define D_KEY (&ctx->buf[60])
 
 static u8 pow_tab[256] __initdata;
 static u8 log_tab[256] __initdata;
diff --git a/crypto/api.c b/crypto/api.c
index e26156f718395393bc50883bc61edc7b2a7e2d22..80bba637fba7c1d325e0718bcfa800659f46251e 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -165,7 +165,7 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags)
 		break;
 	}
 
-	return len + alg->cra_alignmask;
+	return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
 }
 
 struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
@@ -179,12 +179,10 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 		goto out;
 
 	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags);
-	tfm = kmalloc(tfm_size, GFP_KERNEL);
+	tfm = kzalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
 		goto out_put;
 
-	memset(tfm, 0, tfm_size);
-	
 	tfm->__crt_alg = alg;
 	
 	if (crypto_init_flags(tfm, flags))
diff --git a/crypto/deflate.c b/crypto/deflate.c
index bc73342cd1ecc253909440de480909b7eab10beb..f209368d62aefab5c1c1998f2af20159d5323d6b 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -73,12 +73,11 @@ static int deflate_decomp_init(struct deflate_ctx *ctx)
 	int ret = 0;
 	struct z_stream_s *stream = &ctx->decomp_stream;
 
-	stream->workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
+	stream->workspace = kzalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
 	if (!stream->workspace ) {
 		ret = -ENOMEM;
 		goto out;
 	}
-	memset(stream->workspace, 0, zlib_inflate_workspacesize());
 	ret = zlib_inflateInit2(stream, -DEFLATE_DEF_WINBITS);
 	if (ret != Z_OK) {
 		ret = -EINVAL;
diff --git a/crypto/des.c b/crypto/des.c
index 7bb548653dc68752f8facabda96dc463c0c437a2..2d74cab40c3e497c9cd356c4b8710e689bd62fb5 100644
--- a/crypto/des.c
+++ b/crypto/des.c
@@ -965,6 +965,7 @@ static struct crypto_alg des3_ede_alg = {
 	.cra_blocksize		=	DES3_EDE_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des3_ede_ctx),
 	.cra_module		=	THIS_MODULE,
+	.cra_alignmask		=	3,
 	.cra_list		=	LIST_HEAD_INIT(des3_ede_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES3_EDE_KEY_SIZE,
diff --git a/crypto/serpent.c b/crypto/serpent.c
index 52ad1a492620c09c5f0505f52285514fff68961b..e366406ab49d6ecd9e9adbf2468fe719fec66291 100644
--- a/crypto/serpent.c
+++ b/crypto/serpent.c
@@ -481,6 +481,7 @@ static struct crypto_alg serpent_alg = {
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct serpent_ctx),
+	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(serpent_alg.cra_list),
 	.cra_u			=	{ .cipher = {
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 733d07ed75e91c874ff81e38255d988416be9c2a..1f683ba794ee6034ec9e2e59efa375db27288727 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -26,37 +26,38 @@
 #define MAX_IVLEN		32
 
 struct hash_testvec {
+	/* only used with keyed hash algorithms */
+	char key[128] __attribute__ ((__aligned__(4)));
 	char plaintext[128];
-	unsigned char psize;
 	char digest[MAX_DIGEST_SIZE];
-	unsigned char np;
 	unsigned char tap[MAX_TAP];
-	char key[128]; /* only used with keyed hash algorithms */
+	unsigned char psize;
+	unsigned char np;
 	unsigned char ksize;
 };
 
 struct hmac_testvec {
 	char key[128];
-	unsigned char ksize;
 	char plaintext[128];
-	unsigned char psize;
 	char digest[MAX_DIGEST_SIZE];
-	unsigned char np;
 	unsigned char tap[MAX_TAP];
+	unsigned char ksize;
+	unsigned char psize;
+	unsigned char np;
 };
 
 struct cipher_testvec {
+	char key[MAX_KEYLEN] __attribute__ ((__aligned__(4)));
+	char iv[MAX_IVLEN];
+	char input[48];
+	char result[48];
+	unsigned char tap[MAX_TAP];
+	int np;
 	unsigned char fail;
 	unsigned char wk; /* weak key flag */
-	char key[MAX_KEYLEN];
 	unsigned char klen;
-	char iv[MAX_IVLEN];
-	char input[48];
 	unsigned char ilen;
-	char result[48];
 	unsigned char rlen;
-	int np;
-	unsigned char tap[MAX_TAP];
 };
 
 struct cipher_speed {
diff --git a/crypto/twofish.c b/crypto/twofish.c
index a26d885486fba45f65942055b8365091d988ec6e..ddfd5a3fcc5fb8ccc781189c9e4387491eb21517 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish.c
@@ -44,6 +44,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/crypto.h>
+#include <linux/bitops.h>
 
 
 /* The large precomputed tables for the Twofish cipher (twofish.c)
@@ -542,9 +543,9 @@ static const u8 calc_sb_tbl[512] = {
 #define CALC_K(a, j, k, l, m, n) \
    x = CALC_K_2 (k, l, k, l, 0); \
    y = CALC_K_2 (m, n, m, n, 4); \
-   y = (y << 8) + (y >> 24); \
+   y = rol32(y, 8); \
    x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+   ctx->a[(j) + 1] = rol32(y, 9)
 
 #define CALC_K192_2(a, b, c, d, j) \
    CALC_K_2 (q0[a ^ key[(j) + 16]], \
@@ -555,9 +556,9 @@ static const u8 calc_sb_tbl[512] = {
 #define CALC_K192(a, j, k, l, m, n) \
    x = CALC_K192_2 (l, l, k, k, 0); \
    y = CALC_K192_2 (n, n, m, m, 4); \
-   y = (y << 8) + (y >> 24); \
+   y = rol32(y, 8); \
    x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+   ctx->a[(j) + 1] = rol32(y, 9)
 
 #define CALC_K256_2(a, b, j) \
    CALC_K192_2 (q1[b ^ key[(j) + 24]], \
@@ -568,9 +569,9 @@ static const u8 calc_sb_tbl[512] = {
 #define CALC_K256(a, j, k, l, m, n) \
    x = CALC_K256_2 (k, l, 0); \
    y = CALC_K256_2 (m, n, 4); \
-   y = (y << 8) + (y >> 24); \
+   y = rol32(y, 8); \
    x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+   ctx->a[(j) + 1] = rol32(y, 9)
 
 
 /* Macros to compute the g() function in the encryption and decryption
@@ -594,15 +595,15 @@ static const u8 calc_sb_tbl[512] = {
    x = G1 (a); y = G2 (b); \
    x += y; y += x + ctx->k[2 * (n) + 1]; \
    (c) ^= x + ctx->k[2 * (n)]; \
-   (c) = ((c) >> 1) + ((c) << 31); \
-   (d) = (((d) << 1)+((d) >> 31)) ^ y
+   (c) = ror32((c), 1); \
+   (d) = rol32((d), 1) ^ y
 
 #define DECROUND(n, a, b, c, d) \
    x = G1 (a); y = G2 (b); \
    x += y; y += x; \
    (d) ^= y + ctx->k[2 * (n) + 1]; \
-   (d) = ((d) >> 1) + ((d) << 31); \
-   (c) = (((c) << 1)+((c) >> 31)); \
+   (d) = ror32((d), 1); \
+   (c) = rol32((c), 1); \
    (c) ^= (x + ctx->k[2 * (n)])
 
 /* Encryption and decryption cycles; each one is simply two Feistel rounds
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 0c08c58252befe58443db9241893424dfad7ef5a..5158a9db4bc58faeabd16b44a7ed2221cbd141ed 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -284,7 +284,11 @@ aes_hw_extkey_available(uint8_t key_len)
 
 static inline struct aes_ctx *aes_ctx(void *ctx)
 {
-	return (struct aes_ctx *)ALIGN((unsigned long)ctx, PADLOCK_ALIGNMENT);
+	unsigned long align = PADLOCK_ALIGNMENT;
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return (struct aes_ctx *)ALIGN((unsigned long)ctx, align);
 }
 
 static int
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index d88bf8aa8b4776c736ab5500679b71683f973c03..0ab1bc1152cacba6734044b3f6d7eefe5237bb6e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -229,6 +229,8 @@ struct crypto_tfm {
 	} crt_u;
 	
 	struct crypto_alg *__crt_alg;
+
+	char __crt_ctx[] __attribute__ ((__aligned__));
 };
 
 /* 
@@ -301,7 +303,13 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
 
 static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
 {
-	return (void *)&tfm[1];
+	return tfm->__crt_ctx;
+}
+
+static inline unsigned int crypto_tfm_ctx_alignment(void)
+{
+	struct crypto_tfm *tfm;
+	return __alignof__(tfm->__crt_ctx);
 }
 
 /*