From a77a57a1a22afc31891d95879fe3cf2ab03838b0 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 19 Aug 2008 15:01:32 -0700
Subject: [PATCH] IPoIB: Fix deadlock on RTNL in ipoib_stop()

Commit c8c2afe3 ("IPoIB: Use rtnl lock/unlock when changing device
flags") added a call to rtnl_lock() in ipoib_mcast_join_task(), which
is run from the ipoib_workqueue.  However, ipoib_stop() (which is run
inside rtnl_lock()) flushes this workqueue, which leads to a deadlock
if the join task is pending.

Fix this by simply not flushing the workqueue from ipoib_stop().  It
turns out that we really don't care about workqueue tasks running
during or after ipoib_stop(), as long as we make sure to flush the
workqueue before unregistering a netdev.

This fixes <https://bugs.openfabrics.org/show_bug.cgi?id=1114>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c     | 19 +++++++++----------
 .../infiniband/ulp/ipoib/ipoib_multicast.c    | 10 +++++++++-
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f51201b17bfd..7e9e218738fa 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -156,14 +156,8 @@ static int ipoib_stop(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	/*
-	 * Now flush workqueue to make sure a scheduled task doesn't
-	 * bring our internal state back up.
-	 */
-	flush_workqueue(ipoib_workqueue);
-
-	ipoib_ib_dev_down(dev, 1);
-	ipoib_ib_dev_stop(dev, 1);
+	ipoib_ib_dev_down(dev, 0);
+	ipoib_ib_dev_stop(dev, 0);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
@@ -1314,7 +1308,7 @@ sysfs_failed:
 
 register_failed:
 	ib_unregister_event_handler(&priv->event_handler);
-	flush_scheduled_work();
+	flush_workqueue(ipoib_workqueue);
 
 event_failed:
 	ipoib_dev_cleanup(priv->dev);
@@ -1373,7 +1367,12 @@ static void ipoib_remove_one(struct ib_device *device)
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
 		ib_unregister_event_handler(&priv->event_handler);
-		flush_scheduled_work();
+
+		rtnl_lock();
+		dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+		rtnl_unlock();
+
+		flush_workqueue(ipoib_workqueue);
 
 		unregister_netdev(priv->dev);
 		ipoib_dev_cleanup(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 8950e9546f4e..ac33c8f3ea85 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -392,8 +392,16 @@ static int ipoib_mcast_join_complete(int status,
 					   &priv->mcast_task, 0);
 		mutex_unlock(&mcast_mutex);
 
-		if (mcast == priv->broadcast)
+		if (mcast == priv->broadcast) {
+			/*
+			 * Take RTNL lock here to avoid racing with
+			 * ipoib_stop() and turning the carrier back
+			 * on while a device is being removed.
+			 */
+			rtnl_lock();
 			netif_carrier_on(dev);
+			rtnl_unlock();
+		}
 
 		return 0;
 	}
-- 
GitLab