diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 3a0dddca5a1097e473d103a15797547da66e7bf1..1fcf45ac81ecefd22ad23fe53aa333ffe2b6efa8 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -35,72 +35,118 @@
  * 	when CONFIG_DMA_ENGINE=n
  */
 static __always_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_device *device,
-	struct dma_chan *chan, struct page *dest, struct page **src_list,
-	unsigned int offset, unsigned int src_cnt, size_t len,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
+	     unsigned int offset, int src_cnt, size_t len,
+	     enum async_tx_flags flags,
+	     struct dma_async_tx_descriptor *depend_tx,
+	     dma_async_tx_callback cb_fn, void *cb_param)
 {
-	dma_addr_t dma_dest;
+	struct dma_device *dma = chan->device;
 	dma_addr_t *dma_src = (dma_addr_t *) src_list;
-	struct dma_async_tx_descriptor *tx;
+	struct dma_async_tx_descriptor *tx = NULL;
+	int src_off = 0;
 	int i;
-	unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
-
-	pr_debug("%s: len: %zu\n", __func__, len);
-
-	dma_dest = dma_map_page(device->dev, dest, offset, len,
-				DMA_FROM_DEVICE);
+	dma_async_tx_callback _cb_fn;
+	void *_cb_param;
+	enum async_tx_flags async_flags;
+	enum dma_ctrl_flags dma_flags;
+	int xor_src_cnt;
+	dma_addr_t dma_dest;
 
+	dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_FROM_DEVICE);
 	for (i = 0; i < src_cnt; i++)
-		dma_src[i] = dma_map_page(device->dev, src_list[i], offset,
+		dma_src[i] = dma_map_page(dma->dev, src_list[i], offset,
 					  len, DMA_TO_DEVICE);
 
-	/* Since we have clobbered the src_list we are committed
-	 * to doing this asynchronously.  Drivers force forward progress
-	 * in case they can not provide a descriptor
-	 */
-	tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len,
-					 dma_prep_flags);
-	if (!tx) {
-		if (depend_tx)
+	while (src_cnt) {
+		async_flags = flags;
+		dma_flags = 0;
+		xor_src_cnt = min(src_cnt, dma->max_xor);
+		/* if we are submitting additional xors, leave the chain open,
+		 * clear the callback parameters, and leave the destination
+		 * buffer mapped
+		 */
+		if (src_cnt > xor_src_cnt) {
+			async_flags &= ~ASYNC_TX_ACK;
+			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
+			_cb_fn = NULL;
+			_cb_param = NULL;
+		} else {
+			_cb_fn = cb_fn;
+			_cb_param = cb_param;
+		}
+		if (_cb_fn)
+			dma_flags |= DMA_PREP_INTERRUPT;
+
+		/* Since we have clobbered the src_list we are committed
+		 * to doing this asynchronously.  Drivers force forward progress
+		 * in case they can not provide a descriptor
+		 */
+		tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
+					      xor_src_cnt, len, dma_flags);
+
+		if (unlikely(!tx && depend_tx))
 			dma_wait_for_async_tx(depend_tx);
 
-		while (!tx)
-			tx = device->device_prep_dma_xor(chan, dma_dest,
-							 dma_src, src_cnt, len,
-							 dma_prep_flags);
-	}
+		/* spin wait for the preceeding transactions to complete */
+		while (unlikely(!tx))
+			tx = dma->device_prep_dma_xor(chan, dma_dest,
+						      &dma_src[src_off],
+						      xor_src_cnt, len,
+						      dma_flags);
+
+		async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
+				_cb_param);
+
+		depend_tx = tx;
+		flags |= ASYNC_TX_DEP_ACK;
+
+		if (src_cnt > xor_src_cnt) {
+			/* drop completed sources */
+			src_cnt -= xor_src_cnt;
+			src_off += xor_src_cnt;
 
-	async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+			/* use the intermediate result a source */
+			dma_src[--src_off] = dma_dest;
+			src_cnt++;
+		} else
+			break;
+	}
 
 	return tx;
 }
 
 static void
 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	unsigned int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	    int src_cnt, size_t len, enum async_tx_flags flags,
+	    struct dma_async_tx_descriptor *depend_tx,
+	    dma_async_tx_callback cb_fn, void *cb_param)
 {
-	void *_dest;
 	int i;
-
-	pr_debug("%s: len: %zu\n", __func__, len);
+	int xor_src_cnt;
+	int src_off = 0;
+	void *dest_buf;
+	void **srcs = (void **) src_list;
 
 	/* reuse the 'src_list' array to convert to buffer pointers */
 	for (i = 0; i < src_cnt; i++)
-		src_list[i] = (struct page *)
-			(page_address(src_list[i]) + offset);
+		srcs[i] = page_address(src_list[i]) + offset;
 
 	/* set destination address */
-	_dest = page_address(dest) + offset;
+	dest_buf = page_address(dest) + offset;
 
 	if (flags & ASYNC_TX_XOR_ZERO_DST)
-		memset(_dest, 0, len);
+		memset(dest_buf, 0, len);
+
+	while (src_cnt > 0) {
+		/* process up to 'MAX_XOR_BLOCKS' sources */
+		xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
+		xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]);
 
-	xor_blocks(src_cnt, len, _dest,
-		(void **) src_list);
+		/* drop completed sources */
+		src_cnt -= xor_src_cnt;
+		src_off += xor_src_cnt;
+	}
 
 	async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
 }
@@ -132,106 +178,42 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
 						      &dest, 1, src_list,
 						      src_cnt, len);
-	struct dma_device *device = chan ? chan->device : NULL;
-	struct dma_async_tx_descriptor *tx = NULL;
-	dma_async_tx_callback _cb_fn;
-	void *_cb_param;
-	unsigned long local_flags;
-	int xor_src_cnt;
-	int i = 0, src_off = 0;
-
 	BUG_ON(src_cnt <= 1);
 
-	while (src_cnt) {
-		local_flags = flags;
-		if (device) { /* run the xor asynchronously */
-			xor_src_cnt = min(src_cnt, device->max_xor);
-			/* if we are submitting additional xors
-			 * only set the callback on the last transaction
-			 */
-			if (src_cnt > xor_src_cnt) {
-				local_flags &= ~ASYNC_TX_ACK;
-				_cb_fn = NULL;
-				_cb_param = NULL;
-			} else {
-				_cb_fn = cb_fn;
-				_cb_param = cb_param;
-			}
-
-			tx = do_async_xor(device, chan, dest,
-					  &src_list[src_off], offset,
-					  xor_src_cnt, len, local_flags,
-					  depend_tx, _cb_fn, _cb_param);
-		} else { /* run the xor synchronously */
-			/* in the sync case the dest is an implied source
-			 * (assumes the dest is at the src_off index)
-			 */
-			if (flags & ASYNC_TX_XOR_DROP_DST) {
-				src_cnt--;
-				src_off++;
-			}
-
-			/* process up to 'MAX_XOR_BLOCKS' sources */
-			xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
+	if (chan) {
+		/* run the xor asynchronously */
+		pr_debug("%s (async): len: %zu\n", __func__, len);
 
-			/* if we are submitting additional xors
-			 * only set the callback on the last transaction
-			 */
-			if (src_cnt > xor_src_cnt) {
-				local_flags &= ~ASYNC_TX_ACK;
-				_cb_fn = NULL;
-				_cb_param = NULL;
-			} else {
-				_cb_fn = cb_fn;
-				_cb_param = cb_param;
-			}
-
-			/* wait for any prerequisite operations */
-			if (depend_tx) {
-				/* if ack is already set then we cannot be sure
-				 * we are referring to the correct operation
-				 */
-				BUG_ON(async_tx_test_ack(depend_tx));
-				if (dma_wait_for_async_tx(depend_tx) ==
-					DMA_ERROR)
-					panic("%s: DMA_ERROR waiting for "
-						"depend_tx\n",
-						__func__);
-			}
-
-			do_sync_xor(dest, &src_list[src_off], offset,
-				xor_src_cnt, len, local_flags, depend_tx,
-				_cb_fn, _cb_param);
-		}
+		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
+				    flags, depend_tx, cb_fn, cb_param);
+	} else {
+		/* run the xor synchronously */
+		pr_debug("%s (sync): len: %zu\n", __func__, len);
 
-		/* the previous tx is hidden from the client,
-		 * so ack it
+		/* in the sync case the dest is an implied source
+		 * (assumes the dest is the first source)
 		 */
-		if (i && depend_tx)
-			async_tx_ack(depend_tx);
-
-		depend_tx = tx;
+		if (flags & ASYNC_TX_XOR_DROP_DST) {
+			src_cnt--;
+			src_list++;
+		}
 
-		if (src_cnt > xor_src_cnt) {
-			/* drop completed sources */
-			src_cnt -= xor_src_cnt;
-			src_off += xor_src_cnt;
+		/* wait for any prerequisite operations */
+		if (depend_tx) {
+			/* if ack is already set then we cannot be sure
+			 * we are referring to the correct operation
+			 */
+			BUG_ON(async_tx_test_ack(depend_tx));
+			if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
+				panic("%s: DMA_ERROR waiting for depend_tx\n",
+					__func__);
+		}
 
-			/* unconditionally preserve the destination */
-			flags &= ~ASYNC_TX_XOR_ZERO_DST;
+		do_sync_xor(dest, src_list, offset, src_cnt, len,
+			    flags, depend_tx, cb_fn, cb_param);
 
-			/* use the intermediate result a source, but remember
-			 * it's dropped, because it's implied, in the sync case
-			 */
-			src_list[--src_off] = dest;
-			src_cnt++;
-			flags |= ASYNC_TX_XOR_DROP_DST;
-		} else
-			src_cnt = 0;
-		i++;
+		return NULL;
 	}
-
-	return tx;
 }
 EXPORT_SYMBOL_GPL(async_xor);