diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 5c5a4ccce76aaec20a921b395cf7028ffbed7a1c..187035560d7ff6a9817c90e824318e0dde69c5e6 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -1,10 +1,10 @@
 IP OVER INFINIBAND
 
   The ib_ipoib driver is an implementation of the IP over InfiniBand
-  protocol as specified by the latest Internet-Drafts issued by the
-  IETF ipoib working group.  It is a "native" implementation in the
-  sense of setting the interface type to ARPHRD_INFINIBAND and the
-  hardware address length to 20 (earlier proprietary implementations
+  protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
+  working group.  It is a "native" implementation in the sense of
+  setting the interface type to ARPHRD_INFINIBAND and the hardware
+  address length to 20 (earlier proprietary implementations
   masqueraded to the kernel as ethernet interfaces).
 
 Partitions and P_Keys
@@ -53,3 +53,7 @@ References
 
   IETF IP over InfiniBand (ipoib) Working Group
     http://ietf.org/html.charters/ipoib-charter.html
+  Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
+    http://ietf.org/rfc/rfc4391.txt 
+  IP over InfiniBand (IPoIB) Architecture (RFC 4392)
+    http://ietf.org/rfc/rfc4392.txt 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index afc612b8577dc6c6f673fb5d2913031256c49877..ba2d6505e9a45ebaa77a64fac52631baf72e9540 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
 	  libibverbs, libibcm and a hardware driver library from
 	  <http://www.openib.org>.
 
+config INFINIBAND_ADDR_TRANS
+	bool
+	depends on INFINIBAND && INET
+	default y
+
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
 
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ec3353f24b27598ba4cd4400ce8b13ab29aacc6d..68e73ec2d1f87f77b1a9c327469979aa64859991 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,5 +1,7 @@
+infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o
+
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o
+					ib_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o
 
@@ -12,8 +14,13 @@ ib_sa-y :=			sa_query.o
 
 ib_cm-y :=			cm.o
 
+rdma_cm-y :=			cma.o
+
+ib_addr-y :=			addr.o
+
 ib_umad-y :=			user_mad.o
 
 ib_ucm-y :=			ucm.o
 
-ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_mem.o
+ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_mem.o \
+				uverbs_marshall.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
new file mode 100644
index 0000000000000000000000000000000000000000..d294bbc42f091792468a265fff603c4f1916c2ee
--- /dev/null
+++ b/drivers/infiniband/core/addr.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+#include <linux/mutex.h>
+#include <linux/inetdevice.h>
+#include <linux/workqueue.h>
+#include <linux/if_arp.h>
+#include <net/arp.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+#include <rdma/ib_addr.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("IB Address Translation");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct addr_req {
+	struct list_head list;
+	struct sockaddr src_addr;
+	struct sockaddr dst_addr;
+	struct rdma_dev_addr *addr;
+	void *context;
+	void (*callback)(int status, struct sockaddr *src_addr,
+			 struct rdma_dev_addr *addr, void *context);
+	unsigned long timeout;
+	int status;
+};
+
+static void process_req(void *data);
+
+static DEFINE_MUTEX(lock);
+static LIST_HEAD(req_list);
+static DECLARE_WORK(work, process_req, NULL);
+static struct workqueue_struct *addr_wq;
+
+static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+		     unsigned char *dst_dev_addr)
+{
+	switch (dev->type) {
+	case ARPHRD_INFINIBAND:
+		dev_addr->dev_type = IB_NODE_CA;
+		break;
+	default:
+		return -EADDRNOTAVAIL;
+	}
+
+	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
+	if (dst_dev_addr)
+		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+	return 0;
+}
+
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+	struct net_device *dev;
+	u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+	int ret;
+
+	dev = ip_dev_find(ip);
+	if (!dev)
+		return -EADDRNOTAVAIL;
+
+	ret = copy_addr(dev_addr, dev, NULL);
+	dev_put(dev);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_translate_ip);
+
+static void set_timeout(unsigned long time)
+{
+	unsigned long delay;
+
+	cancel_delayed_work(&work);
+
+	delay = time - jiffies;
+	if ((long)delay <= 0)
+		delay = 1;
+
+	queue_delayed_work(addr_wq, &work, delay);
+}
+
+static void queue_req(struct addr_req *req)
+{
+	struct addr_req *temp_req;
+
+	mutex_lock(&lock);
+	list_for_each_entry_reverse(temp_req, &req_list, list) {
+		if (time_after(req->timeout, temp_req->timeout))
+			break;
+	}
+
+	list_add(&req->list, &temp_req->list);
+
+	if (req_list.next == &req->list)
+		set_timeout(req->timeout);
+	mutex_unlock(&lock);
+}
+
+static void addr_send_arp(struct sockaddr_in *dst_in)
+{
+	struct rtable *rt;
+	struct flowi fl;
+	u32 dst_ip = dst_in->sin_addr.s_addr;
+
+	memset(&fl, 0, sizeof fl);
+	fl.nl_u.ip4_u.daddr = dst_ip;
+	if (ip_route_output_key(&rt, &fl))
+		return;
+
+	arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
+		 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
+	ip_rt_put(rt);
+}
+
+static int addr_resolve_remote(struct sockaddr_in *src_in,
+			       struct sockaddr_in *dst_in,
+			       struct rdma_dev_addr *addr)
+{
+	u32 src_ip = src_in->sin_addr.s_addr;
+	u32 dst_ip = dst_in->sin_addr.s_addr;
+	struct flowi fl;
+	struct rtable *rt;
+	struct neighbour *neigh;
+	int ret;
+
+	memset(&fl, 0, sizeof fl);
+	fl.nl_u.ip4_u.daddr = dst_ip;
+	fl.nl_u.ip4_u.saddr = src_ip;
+	ret = ip_route_output_key(&rt, &fl);
+	if (ret)
+		goto out;
+
+	/* If the device does ARP internally, return 'done' */
+	if (rt->idev->dev->flags & IFF_NOARP) {
+		copy_addr(addr, rt->idev->dev, NULL);
+		goto put;
+	}
+
+	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
+	if (!neigh) {
+		ret = -ENODATA;
+		goto put;
+	}
+
+	if (!(neigh->nud_state & NUD_VALID)) {
+		ret = -ENODATA;
+		goto release;
+	}
+
+	if (!src_ip) {
+		src_in->sin_family = dst_in->sin_family;
+		src_in->sin_addr.s_addr = rt->rt_src;
+	}
+
+	ret = copy_addr(addr, neigh->dev, neigh->ha);
+release:
+	neigh_release(neigh);
+put:
+	ip_rt_put(rt);
+out:
+	return ret;
+}
+
+static void process_req(void *data)
+{
+	struct addr_req *req, *temp_req;
+	struct sockaddr_in *src_in, *dst_in;
+	struct list_head done_list;
+
+	INIT_LIST_HEAD(&done_list);
+
+	mutex_lock(&lock);
+	list_for_each_entry_safe(req, temp_req, &req_list, list) {
+		if (req->status) {
+			src_in = (struct sockaddr_in *) &req->src_addr;
+			dst_in = (struct sockaddr_in *) &req->dst_addr;
+			req->status = addr_resolve_remote(src_in, dst_in,
+							  req->addr);
+		}
+		if (req->status && time_after(jiffies, req->timeout))
+			req->status = -ETIMEDOUT;
+		else if (req->status == -ENODATA)
+			continue;
+
+		list_del(&req->list);
+		list_add_tail(&req->list, &done_list);
+	}
+
+	if (!list_empty(&req_list)) {
+		req = list_entry(req_list.next, struct addr_req, list);
+		set_timeout(req->timeout);
+	}
+	mutex_unlock(&lock);
+
+	list_for_each_entry_safe(req, temp_req, &done_list, list) {
+		list_del(&req->list);
+		req->callback(req->status, &req->src_addr, req->addr,
+			      req->context);
+		kfree(req);
+	}
+}
+
+static int addr_resolve_local(struct sockaddr_in *src_in,
+			      struct sockaddr_in *dst_in,
+			      struct rdma_dev_addr *addr)
+{
+	struct net_device *dev;
+	u32 src_ip = src_in->sin_addr.s_addr;
+	u32 dst_ip = dst_in->sin_addr.s_addr;
+	int ret;
+
+	dev = ip_dev_find(dst_ip);
+	if (!dev)
+		return -EADDRNOTAVAIL;
+
+	if (ZERONET(src_ip)) {
+		src_in->sin_family = dst_in->sin_family;
+		src_in->sin_addr.s_addr = dst_ip;
+		ret = copy_addr(addr, dev, dev->dev_addr);
+	} else if (LOOPBACK(src_ip)) {
+		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
+		if (!ret)
+			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+	} else {
+		ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
+		if (!ret)
+			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+	}
+
+	dev_put(dev);
+	return ret;
+}
+
+int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+		    struct rdma_dev_addr *addr, int timeout_ms,
+		    void (*callback)(int status, struct sockaddr *src_addr,
+				     struct rdma_dev_addr *addr, void *context),
+		    void *context)
+{
+	struct sockaddr_in *src_in, *dst_in;
+	struct addr_req *req;
+	int ret = 0;
+
+	req = kmalloc(sizeof *req, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+	memset(req, 0, sizeof *req);
+
+	if (src_addr)
+		memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
+	memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+	req->addr = addr;
+	req->callback = callback;
+	req->context = context;
+
+	src_in = (struct sockaddr_in *) &req->src_addr;
+	dst_in = (struct sockaddr_in *) &req->dst_addr;
+
+	req->status = addr_resolve_local(src_in, dst_in, addr);
+	if (req->status == -EADDRNOTAVAIL)
+		req->status = addr_resolve_remote(src_in, dst_in, addr);
+
+	switch (req->status) {
+	case 0:
+		req->timeout = jiffies;
+		queue_req(req);
+		break;
+	case -ENODATA:
+		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
+		queue_req(req);
+		addr_send_arp(dst_in);
+		break;
+	default:
+		ret = req->status;
+		kfree(req);
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ip);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr)
+{
+	struct addr_req *req, *temp_req;
+
+	mutex_lock(&lock);
+	list_for_each_entry_safe(req, temp_req, &req_list, list) {
+		if (req->addr == addr) {
+			req->status = -ECANCELED;
+			req->timeout = jiffies;
+			list_del(&req->list);
+			list_add(&req->list, &req_list);
+			set_timeout(req->timeout);
+			break;
+		}
+	}
+	mutex_unlock(&lock);
+}
+EXPORT_SYMBOL(rdma_addr_cancel);
+
+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
+			 struct packet_type *pkt, struct net_device *orig_dev)
+{
+	struct arphdr *arp_hdr;
+
+	arp_hdr = (struct arphdr *) skb->nh.raw;
+
+	if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
+	    arp_hdr->ar_op == htons(ARPOP_REPLY))
+		set_timeout(jiffies);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct packet_type addr_arp = {
+	.type           = __constant_htons(ETH_P_ARP),
+	.func           = addr_arp_recv,
+	.af_packet_priv = (void*) 1,
+};
+
+static int addr_init(void)
+{
+	addr_wq = create_singlethread_workqueue("ib_addr_wq");
+	if (!addr_wq)
+		return -ENOMEM;
+
+	dev_add_pack(&addr_arp);
+	return 0;
+}
+
+static void addr_cleanup(void)
+{
+	dev_remove_pack(&addr_arp);
+	destroy_workqueue(addr_wq);
+}
+
+module_init(addr_init);
+module_exit(addr_cleanup);
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 50364c0b090c7b6a141c6bd7e47583422826c351..e05ca2cdc73f44aafd3dd807906d073b33511463 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_find_cached_pkey);
 
+int ib_get_cached_lmc(struct ib_device *device,
+		      u8                port_num,
+		      u8                *lmc)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return -EINVAL;
+
+	read_lock_irqsave(&device->cache.lock, flags);
+	*lmc = device->cache.lmc_cache[port_num - start_port(device)];
+	read_unlock_irqrestore(&device->cache.lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_lmc);
+
 static void ib_cache_update(struct ib_device *device,
 			    u8                port)
 {
@@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device,
 	device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
 	device->cache.gid_cache [port - start_port(device)] = gid_cache;
 
+	device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
+
 	write_unlock_irq(&device->cache.lock);
 
 	kfree(old_pkey_cache);
@@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device)
 		kmalloc(sizeof *device->cache.gid_cache *
 			(end_port(device) - start_port(device) + 1), GFP_KERNEL);
 
-	if (!device->cache.pkey_cache || !device->cache.gid_cache) {
+	device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
+					  (end_port(device) -
+					   start_port(device) + 1),
+					  GFP_KERNEL);
+
+	if (!device->cache.pkey_cache || !device->cache.gid_cache ||
+	    !device->cache.lmc_cache) {
 		printk(KERN_WARNING "Couldn't allocate cache "
 		       "for %s\n", device->name);
 		goto err;
@@ -333,6 +359,7 @@ err_cache:
 err:
 	kfree(device->cache.pkey_cache);
 	kfree(device->cache.gid_cache);
+	kfree(device->cache.lmc_cache);
 }
 
 static void ib_cache_cleanup_one(struct ib_device *device)
@@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)
 
 	kfree(device->cache.pkey_cache);
 	kfree(device->cache.gid_cache);
+	kfree(device->cache.lmc_cache);
 }
 
 static struct ib_client cache_client = {
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 86fee43502cd2a977030d48fad69810fc7f0ac0f..450adfe0a4f1c5e4ed847c340e111ca62d000cbb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -32,7 +32,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $
+ * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 
 #include <linux/completion.h>
@@ -132,6 +132,7 @@ struct cm_id_private {
 	/* todo: use alternate port on send failure */
 	struct cm_av av;
 	struct cm_av alt_av;
+	struct ib_cm_compare_data *compare_data;
 
 	void *private_data;
 	__be64 tid;
@@ -253,23 +254,13 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
 	cm_id_priv->private_data_len = private_data_len;
 }
 
-static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num,
-			   u16 dlid, u8 sl, u16 src_path_bits)
-{
-	memset(ah_attr, 0, sizeof ah_attr);
-	ah_attr->dlid = dlid;
-	ah_attr->sl = sl;
-	ah_attr->src_path_bits = src_path_bits;
-	ah_attr->port_num = port_num;
-}
-
-static void cm_init_av_for_response(struct cm_port *port,
-				    struct ib_wc *wc, struct cm_av *av)
+static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+				    struct ib_grh *grh, struct cm_av *av)
 {
 	av->port = port;
 	av->pkey_index = wc->pkey_index;
-	cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid,
-		       wc->sl, wc->dlid_path_bits);
+	ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc,
+			   grh, &av->ah_attr);
 }
 
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
@@ -299,9 +290,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 		return ret;
 
 	av->port = port;
-	cm_set_ah_attr(&av->ah_attr, av->port->port_num,
-		       be16_to_cpu(path->dlid), path->sl,
-		       be16_to_cpu(path->slid) & 0x7F);
+	ib_init_ah_from_path(cm_dev->device, port->port_num, path,
+			     &av->ah_attr);
 	av->packet_life_time = path->packet_life_time;
 	return 0;
 }
@@ -357,6 +347,41 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
 	return cm_id_priv;
 }
 
+static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
+{
+	int i;
+
+	for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
+		((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
+					     ((unsigned long *) mask)[i];
+}
+
+static int cm_compare_data(struct ib_cm_compare_data *src_data,
+			   struct ib_cm_compare_data *dst_data)
+{
+	u8 src[IB_CM_COMPARE_SIZE];
+	u8 dst[IB_CM_COMPARE_SIZE];
+
+	if (!src_data || !dst_data)
+		return 0;
+
+	cm_mask_copy(src, src_data->data, dst_data->mask);
+	cm_mask_copy(dst, dst_data->data, src_data->mask);
+	return memcmp(src, dst, IB_CM_COMPARE_SIZE);
+}
+
+static int cm_compare_private_data(u8 *private_data,
+				   struct ib_cm_compare_data *dst_data)
+{
+	u8 src[IB_CM_COMPARE_SIZE];
+
+	if (!dst_data)
+		return 0;
+
+	cm_mask_copy(src, private_data, dst_data->mask);
+	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
+}
+
 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 {
 	struct rb_node **link = &cm.listen_service_table.rb_node;
@@ -364,14 +389,18 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 	struct cm_id_private *cur_cm_id_priv;
 	__be64 service_id = cm_id_priv->id.service_id;
 	__be64 service_mask = cm_id_priv->id.service_mask;
+	int data_cmp;
 
 	while (*link) {
 		parent = *link;
 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
 					  service_node);
+		data_cmp = cm_compare_data(cm_id_priv->compare_data,
+					   cur_cm_id_priv->compare_data);
 		if ((cur_cm_id_priv->id.service_mask & service_id) ==
 		    (service_mask & cur_cm_id_priv->id.service_id) &&
-		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+		    (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
+		    !data_cmp)
 			return cur_cm_id_priv;
 
 		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
@@ -380,6 +409,10 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 			link = &(*link)->rb_right;
 		else if (service_id < cur_cm_id_priv->id.service_id)
 			link = &(*link)->rb_left;
+		else if (service_id > cur_cm_id_priv->id.service_id)
+			link = &(*link)->rb_right;
+		else if (data_cmp < 0)
+			link = &(*link)->rb_left;
 		else
 			link = &(*link)->rb_right;
 	}
@@ -389,16 +422,20 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 }
 
 static struct cm_id_private * cm_find_listen(struct ib_device *device,
-					     __be64 service_id)
+					     __be64 service_id,
+					     u8 *private_data)
 {
 	struct rb_node *node = cm.listen_service_table.rb_node;
 	struct cm_id_private *cm_id_priv;
+	int data_cmp;
 
 	while (node) {
 		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
+		data_cmp = cm_compare_private_data(private_data,
+						   cm_id_priv->compare_data);
 		if ((cm_id_priv->id.service_mask & service_id) ==
 		     cm_id_priv->id.service_id &&
-		    (cm_id_priv->id.device == device))
+		    (cm_id_priv->id.device == device) && !data_cmp)
 			return cm_id_priv;
 
 		if (device < cm_id_priv->id.device)
@@ -407,6 +444,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
 			node = node->rb_right;
 		else if (service_id < cm_id_priv->id.service_id)
 			node = node->rb_left;
+		else if (service_id > cm_id_priv->id.service_id)
+			node = node->rb_right;
+		else if (data_cmp < 0)
+			node = node->rb_left;
 		else
 			node = node->rb_right;
 	}
@@ -730,15 +771,14 @@ retest:
 	wait_for_completion(&cm_id_priv->comp);
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
-	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
-		kfree(cm_id_priv->private_data);
+	kfree(cm_id_priv->compare_data);
+	kfree(cm_id_priv->private_data);
 	kfree(cm_id_priv);
 }
 EXPORT_SYMBOL(ib_destroy_cm_id);
 
-int ib_cm_listen(struct ib_cm_id *cm_id,
-		 __be64 service_id,
-		 __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+		 struct ib_cm_compare_data *compare_data)
 {
 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
 	unsigned long flags;
@@ -752,7 +792,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
 		return -EINVAL;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	BUG_ON(cm_id->state != IB_CM_IDLE);
+	if (cm_id->state != IB_CM_IDLE)
+		return -EINVAL;
+
+	if (compare_data) {
+		cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
+						   GFP_KERNEL);
+		if (!cm_id_priv->compare_data)
+			return -ENOMEM;
+		cm_mask_copy(cm_id_priv->compare_data->data,
+			     compare_data->data, compare_data->mask);
+		memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
+		       IB_CM_COMPARE_SIZE);
+	}
 
 	cm_id->state = IB_CM_LISTEN;
 
@@ -769,6 +821,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
 
 	if (cur_cm_id_priv) {
 		cm_id->state = IB_CM_IDLE;
+		kfree(cm_id_priv->compare_data);
+		cm_id_priv->compare_data = NULL;
 		ret = -EBUSY;
 	}
 	return ret;
@@ -1241,7 +1295,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 
 	/* Find matching listen request. */
 	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
-					   req_msg->service_id);
+					   req_msg->service_id,
+					   req_msg->private_data);
 	if (!listen_cm_id_priv) {
 		spin_unlock_irqrestore(&cm.lock, flags);
 		cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1276,6 +1331,7 @@ static int cm_req_handler(struct cm_work *work)
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 	cm_id_priv->id.remote_id = req_msg->local_comm_id;
 	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+				work->mad_recv_wc->recv_buf.grh,
 				&cm_id_priv->av);
 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
 							    id.local_id);
@@ -2549,7 +2605,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
 			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
 	sidr_req_msg->request_id = cm_id_priv->id.local_id;
-	sidr_req_msg->pkey = cpu_to_be16(param->pkey);
+	sidr_req_msg->pkey = cpu_to_be16(param->path->pkey);
 	sidr_req_msg->service_id = param->service_id;
 
 	if (param->private_data && param->private_data_len)
@@ -2641,6 +2697,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
 	cm_id_priv->av.dgid.global.interface_id = 0;
 	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+				work->mad_recv_wc->recv_buf.grh,
 				&cm_id_priv->av);
 	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
 	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
@@ -2654,7 +2711,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
 		goto out; /* Duplicate message. */
 	}
 	cur_cm_id_priv = cm_find_listen(cm_id->device,
-					sidr_req_msg->service_id);
+					sidr_req_msg->service_id,
+					sidr_req_msg->private_data);
 	if (!cur_cm_id_priv) {
 		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
 		spin_unlock_irqrestore(&cm.lock, flags);
@@ -3291,7 +3349,6 @@ error:
 
 static void __exit ib_cm_cleanup(void)
 {
-	flush_workqueue(cm.wq);
 	destroy_workqueue(cm.wq);
 	ib_unregister_client(&cm_client);
 	idr_destroy(&cm.local_id_table);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
new file mode 100644
index 0000000000000000000000000000000000000000..a76834edf608be8258b239d83cfeedda6295067c
--- /dev/null
+++ b/drivers/infiniband/core/cma.c
@@ -0,0 +1,1927 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#include <linux/completion.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/idr.h>
+
+#include <net/tcp.h>
+
+#include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
+#include <rdma/ib_sa.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("Generic RDMA CM Agent");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define CMA_CM_RESPONSE_TIMEOUT 20
+#define CMA_MAX_CM_RETRIES 3
+
+static void cma_add_one(struct ib_device *device);
+static void cma_remove_one(struct ib_device *device);
+
+static struct ib_client cma_client = {
+	.name   = "cma",
+	.add    = cma_add_one,
+	.remove = cma_remove_one
+};
+
+static LIST_HEAD(dev_list);
+static LIST_HEAD(listen_any_list);
+static DEFINE_MUTEX(lock);
+static struct workqueue_struct *cma_wq;
+static DEFINE_IDR(sdp_ps);
+static DEFINE_IDR(tcp_ps);
+
+struct cma_device {
+	struct list_head	list;
+	struct ib_device	*device;
+	__be64			node_guid;
+	struct completion	comp;
+	atomic_t		refcount;
+	struct list_head	id_list;
+};
+
+enum cma_state {
+	CMA_IDLE,
+	CMA_ADDR_QUERY,
+	CMA_ADDR_RESOLVED,
+	CMA_ROUTE_QUERY,
+	CMA_ROUTE_RESOLVED,
+	CMA_CONNECT,
+	CMA_DISCONNECT,
+	CMA_ADDR_BOUND,
+	CMA_LISTEN,
+	CMA_DEVICE_REMOVAL,
+	CMA_DESTROYING
+};
+
+struct rdma_bind_list {
+	struct idr		*ps;
+	struct hlist_head	owners;
+	unsigned short		port;
+};
+
+/*
+ * Device removal can occur at anytime, so we need extra handling to
+ * serialize notifying the user of device removal with other callbacks.
+ * We do this by disabling removal notification while a callback is in process,
+ * and reporting it after the callback completes.
+ */
+struct rdma_id_private {
+	struct rdma_cm_id	id;
+
+	struct rdma_bind_list	*bind_list;
+	struct hlist_node	node;
+	struct list_head	list;
+	struct list_head	listen_list;
+	struct cma_device	*cma_dev;
+
+	enum cma_state		state;
+	spinlock_t		lock;
+	struct completion	comp;
+	atomic_t		refcount;
+	wait_queue_head_t	wait_remove;
+	atomic_t		dev_remove;
+
+	int			backlog;
+	int			timeout_ms;
+	struct ib_sa_query	*query;
+	int			query_id;
+	union {
+		struct ib_cm_id	*ib;
+	} cm_id;
+
+	u32			seq_num;
+	u32			qp_num;
+	enum ib_qp_type		qp_type;
+	u8			srq;
+};
+
+struct cma_work {
+	struct work_struct	work;
+	struct rdma_id_private	*id;
+	enum cma_state		old_state;
+	enum cma_state		new_state;
+	struct rdma_cm_event	event;
+};
+
+union cma_ip_addr {
+	struct in6_addr ip6;
+	struct {
+		__u32 pad[3];
+		__u32 addr;
+	} ip4;
+};
+
+struct cma_hdr {
+	u8 cma_version;
+	u8 ip_version;	/* IP version: 7:4 */
+	__u16 port;
+	union cma_ip_addr src_addr;
+	union cma_ip_addr dst_addr;
+};
+
+struct sdp_hh {
+	u8 bsdh[16];
+	u8 sdp_version; /* Major version: 7:4 */
+	u8 ip_version;	/* IP version: 7:4 */
+	u8 sdp_specific1[10];
+	__u16 port;
+	__u16 sdp_specific2;
+	union cma_ip_addr src_addr;
+	union cma_ip_addr dst_addr;
+};
+
+struct sdp_hah {
+	u8 bsdh[16];
+	u8 sdp_version;
+};
+
+#define CMA_VERSION 0x00
+#define SDP_MAJ_VERSION 0x2
+
+static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&id_priv->lock, flags);
+	ret = (id_priv->state == comp);
+	spin_unlock_irqrestore(&id_priv->lock, flags);
+	return ret;
+}
+
+static int cma_comp_exch(struct rdma_id_private *id_priv,
+			 enum cma_state comp, enum cma_state exch)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&id_priv->lock, flags);
+	if ((ret = (id_priv->state == comp)))
+		id_priv->state = exch;
+	spin_unlock_irqrestore(&id_priv->lock, flags);
+	return ret;
+}
+
+static enum cma_state cma_exch(struct rdma_id_private *id_priv,
+			       enum cma_state exch)
+{
+	unsigned long flags;
+	enum cma_state old;
+
+	spin_lock_irqsave(&id_priv->lock, flags);
+	old = id_priv->state;
+	id_priv->state = exch;
+	spin_unlock_irqrestore(&id_priv->lock, flags);
+	return old;
+}
+
+static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
+{
+	return hdr->ip_version >> 4;
+}
+
+static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+}
+
+static inline u8 sdp_get_majv(u8 sdp_version)
+{
+	return sdp_version >> 4;
+}
+
+static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
+{
+	return hh->ip_version >> 4;
+}
+
+static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
+{
+	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
+}
+
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	atomic_inc(&cma_dev->refcount);
+	id_priv->cma_dev = cma_dev;
+	id_priv->id.device = cma_dev->device;
+	list_add_tail(&id_priv->list, &cma_dev->id_list);
+}
+
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+	if (atomic_dec_and_test(&cma_dev->refcount))
+		complete(&cma_dev->comp);
+}
+
+static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+{
+	list_del(&id_priv->list);
+	cma_deref_dev(id_priv->cma_dev);
+	id_priv->cma_dev = NULL;
+}
+
+static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev;
+	union ib_gid *gid;
+	int ret = -ENODEV;
+
+	gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list) {
+		ret = ib_find_cached_gid(cma_dev->device, gid,
+					 &id_priv->id.port_num, NULL);
+		if (!ret) {
+			cma_attach_to_dev(id_priv, cma_dev);
+			break;
+		}
+	}
+	mutex_unlock(&lock);
+	return ret;
+}
+
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
+{
+	switch (id_priv->id.route.addr.dev_addr.dev_type) {
+	case IB_NODE_CA:
+		return cma_acquire_ib_dev(id_priv);
+	default:
+		return -ENODEV;
+	}
+}
+
+static void cma_deref_id(struct rdma_id_private *id_priv)
+{
+	if (atomic_dec_and_test(&id_priv->refcount))
+		complete(&id_priv->comp);
+}
+
+static void cma_release_remove(struct rdma_id_private *id_priv)
+{
+	if (atomic_dec_and_test(&id_priv->dev_remove))
+		wake_up(&id_priv->wait_remove);
+}
+
+struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+				  void *context, enum rdma_port_space ps)
+{
+	struct rdma_id_private *id_priv;
+
+	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
+	if (!id_priv)
+		return ERR_PTR(-ENOMEM);
+
+	id_priv->state = CMA_IDLE;
+	id_priv->id.context = context;
+	id_priv->id.event_handler = event_handler;
+	id_priv->id.ps = ps;
+	spin_lock_init(&id_priv->lock);
+	init_completion(&id_priv->comp);
+	atomic_set(&id_priv->refcount, 1);
+	init_waitqueue_head(&id_priv->wait_remove);
+	atomic_set(&id_priv->dev_remove, 0);
+	INIT_LIST_HEAD(&id_priv->listen_list);
+	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
+
+	return &id_priv->id;
+}
+EXPORT_SYMBOL(rdma_create_id);
+
+static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+	struct rdma_dev_addr *dev_addr;
+	int ret;
+
+	dev_addr = &id_priv->id.route.addr.dev_addr;
+	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+				  ib_addr_get_pkey(dev_addr),
+				  &qp_attr.pkey_index);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+	qp_attr.port_num = id_priv->id.port_num;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
+					  IB_QP_PKEY_INDEX | IB_QP_PORT);
+}
+
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+		   struct ib_qp_init_attr *qp_init_attr)
+{
+	struct rdma_id_private *id_priv;
+	struct ib_qp *qp;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id->device != pd->device)
+		return -EINVAL;
+
+	qp = ib_create_qp(pd, qp_init_attr);
+	if (IS_ERR(qp))
+		return PTR_ERR(qp);
+
+	switch (id->device->node_type) {
+	case IB_NODE_CA:
+		ret = cma_init_ib_qp(id_priv, qp);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	if (ret)
+		goto err;
+
+	id->qp = qp;
+	id_priv->qp_num = qp->qp_num;
+	id_priv->qp_type = qp->qp_type;
+	id_priv->srq = (qp->srq != NULL);
+	return 0;
+err:
+	ib_destroy_qp(qp);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_create_qp);
+
+void rdma_destroy_qp(struct rdma_cm_id *id)
+{
+	ib_destroy_qp(id->qp);
+}
+EXPORT_SYMBOL(rdma_destroy_qp);
+
+static int cma_modify_qp_rtr(struct rdma_cm_id *id)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	if (!id->qp)
+		return 0;
+
+	/* Need to update QP attributes from default values. */
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+}
+
+static int cma_modify_qp_rts(struct rdma_cm_id *id)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	if (!id->qp)
+		return 0;
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+}
+
+static int cma_modify_qp_err(struct rdma_cm_id *id)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!id->qp)
+		return 0;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
+}
+
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	switch (id_priv->id.device->node_type) {
+	case IB_NODE_CA:
+		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+					 qp_attr_mask);
+		if (qp_attr->qp_state == IB_QPS_RTR)
+			qp_attr->rq_psn = id_priv->seq_num;
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(rdma_init_qp_attr);
+
+static inline int cma_zero_addr(struct sockaddr *addr)
+{
+	struct in6_addr *ip6;
+
+	if (addr->sa_family == AF_INET)
+		return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+	else {
+		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
+		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
+			ip6->s6_addr32[3] | ip6->s6_addr32[4]) == 0;
+	}
+}
+
+static inline int cma_loopback_addr(struct sockaddr *addr)
+{
+	return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+}
+
+static inline int cma_any_addr(struct sockaddr *addr)
+{
+	return cma_zero_addr(addr) || cma_loopback_addr(addr);
+}
+
+static inline int cma_any_port(struct sockaddr *addr)
+{
+	return !((struct sockaddr_in *) addr)->sin_port;
+}
+
+static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
+			    u8 *ip_ver, __u16 *port,
+			    union cma_ip_addr **src, union cma_ip_addr **dst)
+{
+	switch (ps) {
+	case RDMA_PS_SDP:
+		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
+		    SDP_MAJ_VERSION)
+			return -EINVAL;
+
+		*ip_ver	= sdp_get_ip_ver(hdr);
+		*port	= ((struct sdp_hh *) hdr)->port;
+		*src	= &((struct sdp_hh *) hdr)->src_addr;
+		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
+		break;
+	default:
+		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
+			return -EINVAL;
+
+		*ip_ver	= cma_get_ip_ver(hdr);
+		*port	= ((struct cma_hdr *) hdr)->port;
+		*src	= &((struct cma_hdr *) hdr)->src_addr;
+		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
+		break;
+	}
+
+	if (*ip_ver != 4 && *ip_ver != 6)
+		return -EINVAL;
+	return 0;
+}
+
+static void cma_save_net_info(struct rdma_addr *addr,
+			      struct rdma_addr *listen_addr,
+			      u8 ip_ver, __u16 port,
+			      union cma_ip_addr *src, union cma_ip_addr *dst)
+{
+	struct sockaddr_in *listen4, *ip4;
+	struct sockaddr_in6 *listen6, *ip6;
+
+	switch (ip_ver) {
+	case 4:
+		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
+		ip4 = (struct sockaddr_in *) &addr->src_addr;
+		ip4->sin_family = listen4->sin_family;
+		ip4->sin_addr.s_addr = dst->ip4.addr;
+		ip4->sin_port = listen4->sin_port;
+
+		ip4 = (struct sockaddr_in *) &addr->dst_addr;
+		ip4->sin_family = listen4->sin_family;
+		ip4->sin_addr.s_addr = src->ip4.addr;
+		ip4->sin_port = port;
+		break;
+	case 6:
+		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
+		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
+		ip6->sin6_family = listen6->sin6_family;
+		ip6->sin6_addr = dst->ip6;
+		ip6->sin6_port = listen6->sin6_port;
+
+		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
+		ip6->sin6_family = listen6->sin6_family;
+		ip6->sin6_addr = src->ip6;
+		ip6->sin6_port = port;
+		break;
+	default:
+		break;
+	}
+}
+
+static inline int cma_user_data_offset(enum rdma_port_space ps)
+{
+	switch (ps) {
+	case RDMA_PS_SDP:
+		return 0;
+	default:
+		return sizeof(struct cma_hdr);
+	}
+}
+
+static int cma_notify_user(struct rdma_id_private *id_priv,
+			   enum rdma_cm_event_type type, int status,
+			   void *data, u8 data_len)
+{
+	struct rdma_cm_event event;
+
+	event.event = type;
+	event.status = status;
+	event.private_data = data;
+	event.private_data_len = data_len;
+
+	return id_priv->id.event_handler(&id_priv->id, &event);
+}
+
+static void cma_cancel_route(struct rdma_id_private *id_priv)
+{
+	switch (id_priv->id.device->node_type) {
+	case IB_NODE_CA:
+		if (id_priv->query)
+			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
+		break;
+	default:
+		break;
+	}
+}
+
+static inline int cma_internal_listen(struct rdma_id_private *id_priv)
+{
+	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
+	       cma_any_addr(&id_priv->id.route.addr.src_addr);
+}
+
+static void cma_destroy_listen(struct rdma_id_private *id_priv)
+{
+	cma_exch(id_priv, CMA_DESTROYING);
+
+	if (id_priv->cma_dev) {
+		switch (id_priv->id.device->node_type) {
+		case IB_NODE_CA:
+	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+				ib_destroy_cm_id(id_priv->cm_id.ib);
+			break;
+		default:
+			break;
+		}
+		cma_detach_from_dev(id_priv);
+	}
+	list_del(&id_priv->listen_list);
+
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
+
+	kfree(id_priv);
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *dev_id_priv;
+
+	mutex_lock(&lock);
+	list_del(&id_priv->list);
+
+	while (!list_empty(&id_priv->listen_list)) {
+		dev_id_priv = list_entry(id_priv->listen_list.next,
+					 struct rdma_id_private, listen_list);
+		cma_destroy_listen(dev_id_priv);
+	}
+	mutex_unlock(&lock);
+}
+
+static void cma_cancel_operation(struct rdma_id_private *id_priv,
+				 enum cma_state state)
+{
+	switch (state) {
+	case CMA_ADDR_QUERY:
+		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
+		break;
+	case CMA_ROUTE_QUERY:
+		cma_cancel_route(id_priv);
+		break;
+	case CMA_LISTEN:
+		if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
+		    !id_priv->cma_dev)
+			cma_cancel_listens(id_priv);
+		break;
+	default:
+		break;
+	}
+}
+
+static void cma_release_port(struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list = id_priv->bind_list;
+
+	if (!bind_list)
+		return;
+
+	mutex_lock(&lock);
+	hlist_del(&id_priv->node);
+	if (hlist_empty(&bind_list->owners)) {
+		idr_remove(bind_list->ps, bind_list->port);
+		kfree(bind_list);
+	}
+	mutex_unlock(&lock);
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv;
+	enum cma_state state;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	state = cma_exch(id_priv, CMA_DESTROYING);
+	cma_cancel_operation(id_priv, state);
+
+	if (id_priv->cma_dev) {
+		switch (id->device->node_type) {
+		case IB_NODE_CA:
+	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+				ib_destroy_cm_id(id_priv->cm_id.ib);
+			break;
+		default:
+			break;
+		}
+	  	mutex_lock(&lock);
+		cma_detach_from_dev(id_priv);
+		mutex_unlock(&lock);
+	}
+
+	cma_release_port(id_priv);
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
+
+	kfree(id_priv->id.route.path_rec);
+	kfree(id_priv);
+}
+EXPORT_SYMBOL(rdma_destroy_id);
+
+static int cma_rep_recv(struct rdma_id_private *id_priv)
+{
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		goto reject;
+
+	ret = cma_modify_qp_rts(&id_priv->id);
+	if (ret)
+		goto reject;
+
+	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
+	if (ret)
+		goto reject;
+
+	return 0;
+reject:
+	cma_modify_qp_err(&id_priv->id);
+	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
+		       NULL, 0, NULL, 0);
+	return ret;
+}
+
+static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
+{
+	if (id_priv->id.ps == RDMA_PS_SDP &&
+	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
+	    SDP_MAJ_VERSION)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cma_rtu_recv(struct rdma_id_private *id_priv)
+{
+	int ret;
+
+	ret = cma_modify_qp_rts(&id_priv->id);
+	if (ret)
+		goto reject;
+
+	return 0;
+reject:
+	cma_modify_qp_err(&id_priv->id);
+	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
+		       NULL, 0, NULL, 0);
+	return ret;
+}
+
+static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv = cm_id->context;
+	enum rdma_cm_event_type event;
+	u8 private_data_len = 0;
+	int ret = 0, status = 0;
+
+	atomic_inc(&id_priv->dev_remove);
+	if (!cma_comp(id_priv, CMA_CONNECT))
+		goto out;
+
+	switch (ib_event->event) {
+	case IB_CM_REQ_ERROR:
+	case IB_CM_REP_ERROR:
+		event = RDMA_CM_EVENT_UNREACHABLE;
+		status = -ETIMEDOUT;
+		break;
+	case IB_CM_REP_RECEIVED:
+		status = cma_verify_rep(id_priv, ib_event->private_data);
+		if (status)
+			event = RDMA_CM_EVENT_CONNECT_ERROR;
+		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
+			status = cma_rep_recv(id_priv);
+			event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
+					 RDMA_CM_EVENT_ESTABLISHED;
+		} else
+			event = RDMA_CM_EVENT_CONNECT_RESPONSE;
+		private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
+		break;
+	case IB_CM_RTU_RECEIVED:
+		status = cma_rtu_recv(id_priv);
+		event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
+				 RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	case IB_CM_DREQ_ERROR:
+		status = -ETIMEDOUT; /* fall through */
+	case IB_CM_DREQ_RECEIVED:
+	case IB_CM_DREP_RECEIVED:
+		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+			goto out;
+		event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IB_CM_TIMEWAIT_EXIT:
+	case IB_CM_MRA_RECEIVED:
+		/* ignore event */
+		goto out;
+	case IB_CM_REJ_RECEIVED:
+		cma_modify_qp_err(&id_priv->id);
+		status = ib_event->param.rej_rcvd.reason;
+		event = RDMA_CM_EVENT_REJECTED;
+		break;
+	default:
+		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
+		       ib_event->event);
+		goto out;
+	}
+
+	ret = cma_notify_user(id_priv, event, status, ib_event->private_data,
+			      private_data_len);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.ib = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+out:
+	cma_release_remove(id_priv);
+	return ret;
+}
+
+static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
+					  struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id;
+	struct rdma_route *rt;
+	union cma_ip_addr *src, *dst;
+	__u16 port;
+	u8 ip_ver;
+
+	id = rdma_create_id(listen_id->event_handler, listen_id->context,
+			    listen_id->ps);
+	if (IS_ERR(id))
+		return NULL;
+
+	rt = &id->route;
+	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
+	if (!rt->path_rec)
+		goto err;
+
+	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+			     &ip_ver, &port, &src, &dst))
+		goto err;
+
+	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+			  ip_ver, port, src, dst);
+	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+	if (rt->num_paths == 2)
+		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
+
+	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
+	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
+	rt->addr.dev_addr.dev_type = IB_NODE_CA;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->state = CMA_CONNECT;
+	return id_priv;
+err:
+	rdma_destroy_id(id);
+	return NULL;
+}
+
+static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *listen_id, *conn_id;
+	int offset, ret;
+
+	listen_id = cm_id->context;
+	atomic_inc(&listen_id->dev_remove);
+	if (!cma_comp(listen_id, CMA_LISTEN)) {
+		ret = -ECONNABORTED;
+		goto out;
+	}
+
+	conn_id = cma_new_id(&listen_id->id, ib_event);
+	if (!conn_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	atomic_inc(&conn_id->dev_remove);
+	ret = cma_acquire_ib_dev(conn_id);
+	if (ret) {
+		ret = -ENODEV;
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+		goto out;
+	}
+
+	conn_id->cm_id.ib = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_ib_handler;
+
+	offset = cma_user_data_offset(listen_id->id.ps);
+	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+			      ib_event->private_data + offset,
+			      IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		conn_id->cm_id.ib = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+out:
+	cma_release_remove(listen_id);
+	return ret;
+}
+
+static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
+{
+	return cpu_to_be64(((u64)ps << 16) +
+	       be16_to_cpu(((struct sockaddr_in *) addr)->sin_port));
+}
+
+static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
+				 struct ib_cm_compare_data *compare)
+{
+	struct cma_hdr *cma_data, *cma_mask;
+	struct sdp_hh *sdp_data, *sdp_mask;
+	__u32 ip4_addr;
+	struct in6_addr ip6_addr;
+
+	memset(compare, 0, sizeof *compare);
+	cma_data = (void *) compare->data;
+	cma_mask = (void *) compare->mask;
+	sdp_data = (void *) compare->data;
+	sdp_mask = (void *) compare->mask;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+		if (ps == RDMA_PS_SDP) {
+			sdp_set_ip_ver(sdp_data, 4);
+			sdp_set_ip_ver(sdp_mask, 0xF);
+			sdp_data->dst_addr.ip4.addr = ip4_addr;
+			sdp_mask->dst_addr.ip4.addr = ~0;
+		} else {
+			cma_set_ip_ver(cma_data, 4);
+			cma_set_ip_ver(cma_mask, 0xF);
+			cma_data->dst_addr.ip4.addr = ip4_addr;
+			cma_mask->dst_addr.ip4.addr = ~0;
+		}
+		break;
+	case AF_INET6:
+		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
+		if (ps == RDMA_PS_SDP) {
+			sdp_set_ip_ver(sdp_data, 6);
+			sdp_set_ip_ver(sdp_mask, 0xF);
+			sdp_data->dst_addr.ip6 = ip6_addr;
+			memset(&sdp_mask->dst_addr.ip6, 0xFF,
+			       sizeof sdp_mask->dst_addr.ip6);
+		} else {
+			cma_set_ip_ver(cma_data, 6);
+			cma_set_ip_ver(cma_mask, 0xF);
+			cma_data->dst_addr.ip6 = ip6_addr;
+			memset(&cma_mask->dst_addr.ip6, 0xFF,
+			       sizeof cma_mask->dst_addr.ip6);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int cma_ib_listen(struct rdma_id_private *id_priv)
+{
+	struct ib_cm_compare_data compare_data;
+	struct sockaddr *addr;
+	__be64 svc_id;
+	int ret;
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.ib))
+		return PTR_ERR(id_priv->cm_id.ib);
+
+	addr = &id_priv->id.route.addr.src_addr;
+	svc_id = cma_get_service_id(id_priv->id.ps, addr);
+	if (cma_any_addr(addr))
+		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
+	else {
+		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
+		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
+	}
+
+	if (ret) {
+		ib_destroy_cm_id(id_priv->cm_id.ib);
+		id_priv->cm_id.ib = NULL;
+	}
+
+	return ret;
+}
+
+static int cma_listen_handler(struct rdma_cm_id *id,
+			      struct rdma_cm_event *event)
+{
+	struct rdma_id_private *id_priv = id->context;
+
+	id->context = id_priv->id.context;
+	id->event_handler = id_priv->id.event_handler;
+	return id_priv->id.event_handler(id, event);
+}
+
+static void cma_listen_on_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	struct rdma_id_private *dev_id_priv;
+	struct rdma_cm_id *id;
+	int ret;
+
+	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
+	if (IS_ERR(id))
+		return;
+
+	dev_id_priv = container_of(id, struct rdma_id_private, id);
+
+	dev_id_priv->state = CMA_ADDR_BOUND;
+	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
+	       ip_addr_size(&id_priv->id.route.addr.src_addr));
+
+	cma_attach_to_dev(dev_id_priv, cma_dev);
+	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+
+	ret = rdma_listen(id, id_priv->backlog);
+	if (ret)
+		goto err;
+
+	return;
+err:
+	cma_destroy_listen(dev_id_priv);
+}
+
+static void cma_listen_on_all(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev;
+
+	mutex_lock(&lock);
+	list_add_tail(&id_priv->list, &listen_any_list);
+	list_for_each_entry(cma_dev, &dev_list, list)
+		cma_listen_on_dev(id_priv, cma_dev);
+	mutex_unlock(&lock);
+}
+
+static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
+{
+	struct sockaddr_in addr_in;
+
+	memset(&addr_in, 0, sizeof addr_in);
+	addr_in.sin_family = af;
+	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
+}
+
+int rdma_listen(struct rdma_cm_id *id, int backlog)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id_priv->state == CMA_IDLE) {
+		ret = cma_bind_any(id, AF_INET);
+		if (ret)
+			return ret;
+	}
+
+	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
+		return -EINVAL;
+
+	id_priv->backlog = backlog;
+	if (id->device) {
+		switch (id->device->node_type) {
+		case IB_NODE_CA:
+			ret = cma_ib_listen(id_priv);
+			if (ret)
+				goto err;
+			break;
+		default:
+			ret = -ENOSYS;
+			goto err;
+		}
+	} else
+		cma_listen_on_all(id_priv);
+
+	return 0;
+err:
+	id_priv->backlog = 0;
+	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_listen);
+
+static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+			      void *context)
+{
+	struct cma_work *work = context;
+	struct rdma_route *route;
+
+	route = &work->id->id.route;
+
+	if (!status) {
+		route->num_paths = 1;
+		*route->path_rec = *path_rec;
+	} else {
+		work->old_state = CMA_ROUTE_QUERY;
+		work->new_state = CMA_ADDR_RESOLVED;
+		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+	}
+
+	queue_work(cma_wq, &work->work);
+}
+
+static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
+			      struct cma_work *work)
+{
+	struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
+	struct ib_sa_path_rec path_rec;
+
+	memset(&path_rec, 0, sizeof path_rec);
+	path_rec.sgid = *ib_addr_get_sgid(addr);
+	path_rec.dgid = *ib_addr_get_dgid(addr);
+	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
+	path_rec.numb_path = 1;
+
+	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+				id_priv->id.port_num, &path_rec,
+				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
+				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
+				timeout_ms, GFP_KERNEL,
+				cma_query_handler, work, &id_priv->query);
+
+	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
+}
+
+static void cma_work_handler(void *data)
+{
+	struct cma_work *work = data;
+	struct rdma_id_private *id_priv = work->id;
+	int destroy = 0;
+
+	atomic_inc(&id_priv->dev_remove);
+	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+		goto out;
+
+	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		destroy = 1;
+	}
+out:
+	cma_release_remove(id_priv);
+	cma_deref_id(id_priv);
+	if (destroy)
+		rdma_destroy_id(&id_priv->id);
+	kfree(work);
+}
+
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct rdma_route *route = &id_priv->id.route;
+	struct cma_work *work;
+	int ret;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+
+	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
+	if (!route->path_rec) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	ret = cma_query_ib_route(id_priv, timeout_ms, work);
+	if (ret)
+		goto err2;
+
+	return 0;
+err2:
+	kfree(route->path_rec);
+	route->path_rec = NULL;
+err1:
+	kfree(work);
+	return ret;
+}
+
+int rdma_set_ib_paths(struct rdma_cm_id *id,
+		      struct ib_sa_path_rec *path_rec, int num_paths)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+		return -EINVAL;
+
+	id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
+	if (!id->route.path_rec) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_set_ib_paths);
+
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+		return -EINVAL;
+
+	atomic_inc(&id_priv->refcount);
+	switch (id->device->node_type) {
+	case IB_NODE_CA:
+		ret = cma_resolve_ib_route(id_priv, timeout_ms);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+	cma_deref_id(id_priv);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_route);
+
+static int cma_bind_loopback(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev;
+	struct ib_port_attr port_attr;
+	union ib_gid *gid;
+	u16 pkey;
+	int ret;
+	u8 p;
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list)
+		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
+			if (!ib_query_port (cma_dev->device, p, &port_attr) &&
+			    port_attr.state == IB_PORT_ACTIVE)
+				goto port_found;
+
+	if (!list_empty(&dev_list)) {
+		p = 1;
+		cma_dev = list_entry(dev_list.next, struct cma_device, list);
+	} else {
+		ret = -ENODEV;
+		goto out;
+	}
+
+port_found:
+	gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
+	ret = ib_get_cached_gid(cma_dev->device, p, 0, gid);
+	if (ret)
+		goto out;
+
+	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
+	if (ret)
+		goto out;
+
+	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
+	id_priv->id.port_num = p;
+	cma_attach_to_dev(id_priv, cma_dev);
+out:
+	mutex_unlock(&lock);
+	return ret;
+}
+
+static void addr_handler(int status, struct sockaddr *src_addr,
+			 struct rdma_dev_addr *dev_addr, void *context)
+{
+	struct rdma_id_private *id_priv = context;
+	enum rdma_cm_event_type event;
+
+	atomic_inc(&id_priv->dev_remove);
+	if (!id_priv->cma_dev && !status)
+		status = cma_acquire_dev(id_priv);
+
+	if (status) {
+		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+			goto out;
+		event = RDMA_CM_EVENT_ADDR_ERROR;
+	} else {
+		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
+			goto out;
+		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
+		       ip_addr_size(src_addr));
+		event = RDMA_CM_EVENT_ADDR_RESOLVED;
+	}
+
+	if (cma_notify_user(id_priv, event, status, NULL, 0)) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		cma_deref_id(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return;
+	}
+out:
+	cma_release_remove(id_priv);
+	cma_deref_id(id_priv);
+}
+
+static int cma_resolve_loopback(struct rdma_id_private *id_priv)
+{
+	struct cma_work *work;
+	struct sockaddr_in *src_in, *dst_in;
+	int ret;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	if (!id_priv->cma_dev) {
+		ret = cma_bind_loopback(id_priv);
+		if (ret)
+			goto err;
+	}
+
+	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
+			 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr));
+
+	if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
+		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
+		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
+		src_in->sin_family = dst_in->sin_family;
+		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+	}
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ADDR_QUERY;
+	work->new_state = CMA_ADDR_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+err:
+	kfree(work);
+	return ret;
+}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+			 struct sockaddr *dst_addr)
+{
+	if (src_addr && src_addr->sa_family)
+		return rdma_bind_addr(id, src_addr);
+	else
+		return cma_bind_any(id, dst_addr->sa_family);
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      struct sockaddr *dst_addr, int timeout_ms)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id_priv->state == CMA_IDLE) {
+		ret = cma_bind_addr(id, src_addr, dst_addr);
+		if (ret)
+			return ret;
+	}
+
+	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+		return -EINVAL;
+
+	atomic_inc(&id_priv->refcount);
+	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
+	if (cma_any_addr(dst_addr))
+		ret = cma_resolve_loopback(id_priv);
+	else
+		ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
+				      &id->route.addr.dev_addr,
+				      timeout_ms, addr_handler, id_priv);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+	cma_deref_id(id_priv);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_addr);
+
+static void cma_bind_port(struct rdma_bind_list *bind_list,
+			  struct rdma_id_private *id_priv)
+{
+	struct sockaddr_in *sin;
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	sin->sin_port = htons(bind_list->port);
+	id_priv->bind_list = bind_list;
+	hlist_add_head(&id_priv->node, &bind_list->owners);
+}
+
+static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
+			  unsigned short snum)
+{
+	struct rdma_bind_list *bind_list;
+	int port, start, ret;
+
+	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+	if (!bind_list)
+		return -ENOMEM;
+
+	start = snum ? snum : sysctl_local_port_range[0];
+
+	do {
+		ret = idr_get_new_above(ps, bind_list, start, &port);
+	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+	if (ret)
+		goto err;
+
+	if ((snum && port != snum) ||
+	    (!snum && port > sysctl_local_port_range[1])) {
+		idr_remove(ps, port);
+		ret = -EADDRNOTAVAIL;
+		goto err;
+	}
+
+	bind_list->ps = ps;
+	bind_list->port = (unsigned short) port;
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+err:
+	kfree(bind_list);
+	return ret;
+}
+
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *cur_id;
+	struct sockaddr_in *sin, *cur_sin;
+	struct rdma_bind_list *bind_list;
+	struct hlist_node *node;
+	unsigned short snum;
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	snum = ntohs(sin->sin_port);
+	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	bind_list = idr_find(ps, snum);
+	if (!bind_list)
+		return cma_alloc_port(ps, id_priv, snum);
+
+	/*
+	 * We don't support binding to any address if anyone is bound to
+	 * a specific address on the same port.
+	 */
+	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
+		return -EADDRNOTAVAIL;
+
+	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
+			return -EADDRNOTAVAIL;
+		
+		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
+		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+			return -EADDRINUSE;
+	}
+
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+	struct idr *ps;
+	int ret;
+
+	switch (id_priv->id.ps) {
+	case RDMA_PS_SDP:
+		ps = &sdp_ps;
+		break;
+	case RDMA_PS_TCP:
+		ps = &tcp_ps;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	mutex_lock(&lock);
+	if (cma_any_port(&id_priv->id.route.addr.src_addr))
+		ret = cma_alloc_port(ps, id_priv, 0);
+	else
+		ret = cma_use_port(ps, id_priv);
+	mutex_unlock(&lock);
+
+	return ret;
+}
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	if (addr->sa_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+		return -EINVAL;
+
+	if (!cma_any_addr(addr)) {
+		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+		if (!ret)
+			ret = cma_acquire_dev(id_priv);
+		if (ret)
+			goto err;
+	}
+
+	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+	ret = cma_get_port(id_priv);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_bind_addr);
+
+static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
+			  struct rdma_route *route)
+{
+	struct sockaddr_in *src4, *dst4;
+	struct cma_hdr *cma_hdr;
+	struct sdp_hh *sdp_hdr;
+
+	src4 = (struct sockaddr_in *) &route->addr.src_addr;
+	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+
+	switch (ps) {
+	case RDMA_PS_SDP:
+		sdp_hdr = hdr;
+		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+			return -EINVAL;
+		sdp_set_ip_ver(sdp_hdr, 4);
+		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+		sdp_hdr->port = src4->sin_port;
+		break;
+	default:
+		cma_hdr = hdr;
+		cma_hdr->cma_version = CMA_VERSION;
+		cma_set_ip_ver(cma_hdr, 4);
+		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+		cma_hdr->port = src4->sin_port;
+		break;
+	}
+	return 0;
+}
+
+static int cma_connect_ib(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct ib_cm_req_param req;
+	struct rdma_route *route;
+	void *private_data;
+	int offset, ret;
+
+	memset(&req, 0, sizeof req);
+	offset = cma_user_data_offset(id_priv->id.ps);
+	req.private_data_len = offset + conn_param->private_data_len;
+	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+	if (!private_data)
+		return -ENOMEM;
+
+	if (conn_param->private_data && conn_param->private_data_len)
+		memcpy(private_data + offset, conn_param->private_data,
+		       conn_param->private_data_len);
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.ib)) {
+		ret = PTR_ERR(id_priv->cm_id.ib);
+		goto out;
+	}
+
+	route = &id_priv->id.route;
+	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
+	if (ret)
+		goto out;
+	req.private_data = private_data;
+
+	req.primary_path = &route->path_rec[0];
+	if (route->num_paths == 2)
+		req.alternate_path = &route->path_rec[1];
+
+	req.service_id = cma_get_service_id(id_priv->id.ps,
+					    &route->addr.dst_addr);
+	req.qp_num = id_priv->qp_num;
+	req.qp_type = id_priv->qp_type;
+	req.starting_psn = id_priv->seq_num;
+	req.responder_resources = conn_param->responder_resources;
+	req.initiator_depth = conn_param->initiator_depth;
+	req.flow_control = conn_param->flow_control;
+	req.retry_count = conn_param->retry_count;
+	req.rnr_retry_count = conn_param->rnr_retry_count;
+	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+	req.max_cm_retries = CMA_MAX_CM_RETRIES;
+	req.srq = id_priv->srq ? 1 : 0;
+
+	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
+out:
+	kfree(private_data);
+	return ret;
+}
+
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+		return -EINVAL;
+
+	if (!id->qp) {
+		id_priv->qp_num = conn_param->qp_num;
+		id_priv->qp_type = conn_param->qp_type;
+		id_priv->srq = conn_param->srq;
+	}
+
+	switch (id->device->node_type) {
+	case IB_NODE_CA:
+		ret = cma_connect_ib(id_priv, conn_param);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_connect);
+
+static int cma_accept_ib(struct rdma_id_private *id_priv,
+			 struct rdma_conn_param *conn_param)
+{
+	struct ib_cm_rep_param rep;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	memset(&rep, 0, sizeof rep);
+	rep.qp_num = id_priv->qp_num;
+	rep.starting_psn = id_priv->seq_num;
+	rep.private_data = conn_param->private_data;
+	rep.private_data_len = conn_param->private_data_len;
+	rep.responder_resources = conn_param->responder_resources;
+	rep.initiator_depth = conn_param->initiator_depth;
+	rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
+	rep.failover_accepted = 0;
+	rep.flow_control = conn_param->flow_control;
+	rep.rnr_retry_count = conn_param->rnr_retry_count;
+	rep.srq = id_priv->srq ? 1 : 0;
+
+	return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
+}
+
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_CONNECT))
+		return -EINVAL;
+
+	if (!id->qp && conn_param) {
+		id_priv->qp_num = conn_param->qp_num;
+		id_priv->qp_type = conn_param->qp_type;
+		id_priv->srq = conn_param->srq;
+	}
+
+	switch (id->device->node_type) {
+	case IB_NODE_CA:
+		if (conn_param)
+			ret = cma_accept_ib(id_priv, conn_param);
+		else
+			ret = cma_rep_recv(id_priv);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	if (ret)
+		goto reject;
+
+	return 0;
+reject:
+	cma_modify_qp_err(id);
+	rdma_reject(id, NULL, 0);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_accept);
+
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+		u8 private_data_len)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_CONNECT))
+		return -EINVAL;
+
+	switch (id->device->node_type) {
+	case IB_NODE_CA:
+		ret = ib_send_cm_rej(id_priv->cm_id.ib,
+				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+				     private_data, private_data_len);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rdma_reject);
+
+int rdma_disconnect(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_CONNECT) &&
+	    !cma_comp(id_priv, CMA_DISCONNECT))
+		return -EINVAL;
+
+	ret = cma_modify_qp_err(id);
+	if (ret)
+		goto out;
+
+	switch (id->device->node_type) {
+	case IB_NODE_CA:
+		/* Initiate or respond to a disconnect. */
+		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
+			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+		break;
+	default:
+		break;
+	}
+out:
+	return ret;
+}
+EXPORT_SYMBOL(rdma_disconnect);
+
+static void cma_add_one(struct ib_device *device)
+{
+	struct cma_device *cma_dev;
+	struct rdma_id_private *id_priv;
+
+	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
+	if (!cma_dev)
+		return;
+
+	cma_dev->device = device;
+	cma_dev->node_guid = device->node_guid;
+	if (!cma_dev->node_guid)
+		goto err;
+
+	init_completion(&cma_dev->comp);
+	atomic_set(&cma_dev->refcount, 1);
+	INIT_LIST_HEAD(&cma_dev->id_list);
+	ib_set_client_data(device, &cma_client, cma_dev);
+
+	mutex_lock(&lock);
+	list_add_tail(&cma_dev->list, &dev_list);
+	list_for_each_entry(id_priv, &listen_any_list, list)
+		cma_listen_on_dev(id_priv, cma_dev);
+	mutex_unlock(&lock);
+	return;
+err:
+	kfree(cma_dev);
+}
+
+static int cma_remove_id_dev(struct rdma_id_private *id_priv)
+{
+	enum cma_state state;
+
+	/* Record that we want to remove the device */
+	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
+	if (state == CMA_DESTROYING)
+		return 0;
+
+	cma_cancel_operation(id_priv, state);
+	wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
+
+	/* Check for destruction from another callback. */
+	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+		return 0;
+
+	return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL,
+			       0, NULL, 0);
+}
+
+static void cma_process_remove(struct cma_device *cma_dev)
+{
+	struct list_head remove_list;
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	INIT_LIST_HEAD(&remove_list);
+
+	mutex_lock(&lock);
+	while (!list_empty(&cma_dev->id_list)) {
+		id_priv = list_entry(cma_dev->id_list.next,
+				     struct rdma_id_private, list);
+
+		if (cma_internal_listen(id_priv)) {
+			cma_destroy_listen(id_priv);
+			continue;
+		}
+
+		list_del(&id_priv->list);
+		list_add_tail(&id_priv->list, &remove_list);
+		atomic_inc(&id_priv->refcount);
+		mutex_unlock(&lock);
+
+		ret = cma_remove_id_dev(id_priv);
+		cma_deref_id(id_priv);
+		if (ret)
+			rdma_destroy_id(&id_priv->id);
+
+		mutex_lock(&lock);
+	}
+	mutex_unlock(&lock);
+
+	cma_deref_dev(cma_dev);
+	wait_for_completion(&cma_dev->comp);
+}
+
+static void cma_remove_one(struct ib_device *device)
+{
+	struct cma_device *cma_dev;
+
+	cma_dev = ib_get_client_data(device, &cma_client);
+	if (!cma_dev)
+		return;
+
+	mutex_lock(&lock);
+	list_del(&cma_dev->list);
+	mutex_unlock(&lock);
+
+	cma_process_remove(cma_dev);
+	kfree(cma_dev);
+}
+
+static int cma_init(void)
+{
+	int ret;
+
+	cma_wq = create_singlethread_workqueue("rdma_cm_wq");
+	if (!cma_wq)
+		return -ENOMEM;
+
+	ret = ib_register_client(&cma_client);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	destroy_workqueue(cma_wq);
+	return ret;
+}
+
+static void cma_cleanup(void)
+{
+	ib_unregister_client(&cma_client);
+	destroy_workqueue(cma_wq);
+	idr_destroy(&sdp_ps);
+	idr_destroy(&tcp_ps);
+}
+
+module_init(cma_init);
+module_exit(cma_cleanup);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 838bf54458d2d2ddda812583d1fb7273179d491a..615fe9cc6c568fe9b42dffdf007fda3254c81e1e 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -54,7 +54,7 @@ enum {
 /*
  * If an FMR is not in use, then the list member will point to either
  * its pool's free_list (if the FMR can be mapped again; that is,
- * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the
+ * remap_count < pool->max_remaps) or its pool's dirty_list (if the
  * FMR needs to be unmapped before being remapped).  In either of
  * these cases it is a bug if the ref_count is not 0.  In other words,
  * if ref_count is > 0, then the list member must not be linked into
@@ -84,6 +84,7 @@ struct ib_fmr_pool {
 
 	int                       pool_size;
 	int                       max_pages;
+	int			  max_remaps;
 	int                       dirty_watermark;
 	int                       dirty_len;
 	struct list_head          free_list;
@@ -214,8 +215,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 {
 	struct ib_device   *device;
 	struct ib_fmr_pool *pool;
+	struct ib_device_attr *attr;
 	int i;
 	int ret;
+	int max_remaps;
 
 	if (!params)
 		return ERR_PTR(-EINVAL);
@@ -228,6 +231,26 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 		return ERR_PTR(-ENOSYS);
 	}
 
+	attr = kmalloc(sizeof *attr, GFP_KERNEL);
+	if (!attr) {
+		printk(KERN_WARNING "couldn't allocate device attr struct");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = ib_query_device(device, attr);
+	if (ret) {
+		printk(KERN_WARNING "couldn't query device");
+		kfree(attr);
+		return ERR_PTR(ret);
+	}
+
+	if (!attr->max_map_per_fmr)
+		max_remaps = IB_FMR_MAX_REMAPS;
+	else
+		max_remaps = attr->max_map_per_fmr;
+
+	kfree(attr);
+
 	pool = kmalloc(sizeof *pool, GFP_KERNEL);
 	if (!pool) {
 		printk(KERN_WARNING "couldn't allocate pool struct");
@@ -258,6 +281,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 
 	pool->pool_size       = 0;
 	pool->max_pages       = params->max_pages_per_fmr;
+	pool->max_remaps      = max_remaps;
 	pool->dirty_watermark = params->dirty_watermark;
 	pool->dirty_len       = 0;
 	spin_lock_init(&pool->pool_lock);
@@ -279,7 +303,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 		struct ib_pool_fmr *fmr;
 		struct ib_fmr_attr attr = {
 			.max_pages  = params->max_pages_per_fmr,
-			.max_maps   = IB_FMR_MAX_REMAPS,
+			.max_maps   = pool->max_remaps,
 			.page_shift = params->page_shift
 		};
 
@@ -489,7 +513,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
 
 	--fmr->ref_count;
 	if (!fmr->ref_count) {
-		if (fmr->remap_count < IB_FMR_MAX_REMAPS) {
+		if (fmr->remap_count < pool->max_remaps) {
 			list_add_tail(&fmr->list, &pool->free_list);
 		} else {
 			list_add_tail(&fmr->list, &pool->dirty_list);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 5ad41a64314cd5b050f67dd4980c6ba0264419a4..b38e02a5db356c2b8dfdc612032e5fd73a860397 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -34,6 +34,7 @@
  * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 #include <linux/dma-mapping.h>
+#include <rdma/ib_cache.h>
 
 #include "mad_priv.h"
 #include "mad_rmpp.h"
@@ -45,8 +46,7 @@ MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
 MODULE_AUTHOR("Sean Hefty");
 
-
-kmem_cache_t *ib_mad_cache;
+static kmem_cache_t *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
 static u32 ib_mad_client_id = 0;
@@ -1673,20 +1673,21 @@ static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
 		rwc->recv_buf.mad->mad_hdr.mgmt_class;
 }
 
-static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
+static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
+				   struct ib_mad_send_wr_private *wr,
 				   struct ib_mad_recv_wc *rwc )
 {
 	struct ib_ah_attr attr;
 	u8 send_resp, rcv_resp;
+	union ib_gid sgid;
+	struct ib_device *device = mad_agent_priv->agent.device;
+	u8 port_num = mad_agent_priv->agent.port_num;
+	u8 lmc;
 
 	send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
 		     mad_hdr.method & IB_MGMT_METHOD_RESP;
 	rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
 
-	if (!send_resp && rcv_resp)
-		/* is request/response. GID/LIDs are both local (same). */
-		return 1;
-
 	if (send_resp == rcv_resp)
 		/* both requests, or both responses. GIDs different */
 		return 0;
@@ -1695,48 +1696,78 @@ static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
 		/* Assume not equal, to avoid false positives. */
 		return 0;
 
-	if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH))
-		return attr.dlid == rwc->wc->slid;
-	else if ((attr.ah_flags & IB_AH_GRH) &&
-		 (rwc->wc->wc_flags & IB_WC_GRH))
-		return memcmp(attr.grh.dgid.raw,
-			      rwc->recv_buf.grh->sgid.raw, 16) == 0;
-	else
+	if (!!(attr.ah_flags & IB_AH_GRH) !=
+	    !!(rwc->wc->wc_flags & IB_WC_GRH))
 		/* one has GID, other does not.  Assume different */
 		return 0;
+
+	if (!send_resp && rcv_resp) {
+		/* is request/response. */
+		if (!(attr.ah_flags & IB_AH_GRH)) {
+			if (ib_get_cached_lmc(device, port_num, &lmc))
+				return 0;
+			return (!lmc || !((attr.src_path_bits ^
+					   rwc->wc->dlid_path_bits) &
+					  ((1 << lmc) - 1)));
+		} else {
+			if (ib_get_cached_gid(device, port_num,
+					      attr.grh.sgid_index, &sgid))
+				return 0;
+			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
+				       16);
+		}
+	}
+
+	if (!(attr.ah_flags & IB_AH_GRH))
+		return attr.dlid == rwc->wc->slid;
+	else
+		return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+			       16);
+}
+
+static inline int is_direct(u8 class)
+{
+	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
 }
+
 struct ib_mad_send_wr_private*
 ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
-		 struct ib_mad_recv_wc *mad_recv_wc)
+		 struct ib_mad_recv_wc *wc)
 {
-	struct ib_mad_send_wr_private *mad_send_wr;
+	struct ib_mad_send_wr_private *wr;
 	struct ib_mad *mad;
 
-	mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad;
+	mad = (struct ib_mad *)wc->recv_buf.mad;
 
-	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
-			    agent_list) {
-		if ((mad_send_wr->tid == mad->mad_hdr.tid) &&
-		    rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
-		    rcv_has_same_gid(mad_send_wr, mad_recv_wc))
-			return mad_send_wr;
+	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
+		if ((wr->tid == mad->mad_hdr.tid) &&
+		    rcv_has_same_class(wr, wc) &&
+		    /*
+		     * Don't check GID for direct routed MADs.
+		     * These might have permissive LIDs.
+		     */
+		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
+		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
+			return wr;
 	}
 
 	/*
 	 * It's possible to receive the response before we've
 	 * been notified that the send has completed
 	 */
-	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
-			    agent_list) {
-		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
-		    mad_send_wr->tid == mad->mad_hdr.tid &&
-		    mad_send_wr->timeout &&
-		    rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
-		    rcv_has_same_gid(mad_send_wr, mad_recv_wc)) {
+	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
+		if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
+		    wr->tid == mad->mad_hdr.tid &&
+		    wr->timeout &&
+		    rcv_has_same_class(wr, wc) &&
+		    /*
+		     * Don't check GID for direct routed MADs.
+		     * These might have permissive LIDs.
+		     */
+		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
+		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
 			/* Verify request has not been canceled */
-			return (mad_send_wr->status == IB_WC_SUCCESS) ?
-				mad_send_wr : NULL;
-		}
+			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
 	}
 	return NULL;
 }
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index b4fa28d3160fc5abd2dc076e92bfb2f81928fb62..d147f3bad2ce78f819525d5332b88212b0a452be 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -212,8 +212,6 @@ struct ib_mad_port_private {
 	struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
-extern kmem_cache_t *ib_mad_cache;
-
 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
 
 struct ib_mad_send_wr_private *
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 501cc054cb3b150d8af4bd3c1c6d0196560ffb65..e911c99ff8437987d0e12535a8f80bda02e8327f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -47,6 +47,7 @@
 
 #include <rdma/ib_pack.h>
 #include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -441,6 +442,36 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
 }
 EXPORT_SYMBOL(ib_sa_cancel_query);
 
+int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
+			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	u16 gid_index;
+
+	memset(ah_attr, 0, sizeof *ah_attr);
+	ah_attr->dlid = be16_to_cpu(rec->dlid);
+	ah_attr->sl = rec->sl;
+	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
+	ah_attr->port_num = port_num;
+
+	if (rec->hop_limit > 1) {
+		ah_attr->ah_flags = IB_AH_GRH;
+		ah_attr->grh.dgid = rec->dgid;
+
+		ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
+					 &gid_index);
+		if (ret)
+			return ret;
+
+		ah_attr->grh.sgid_index    = gid_index;
+		ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
+		ah_attr->grh.hop_limit     = rec->hop_limit;
+		ah_attr->grh.traffic_class = rec->traffic_class;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_path);
+
 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 {
 	unsigned long flags;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 9164a09b6ccd1d0aeea7153d29c8307e3c5903ca..c1c6fda9452cc44f6b44400e00eaa7f5e42ea43a 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -30,7 +30,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $
+ * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 
 #include <linux/completion.h>
@@ -50,6 +50,7 @@
 
 #include <rdma/ib_cm.h>
 #include <rdma/ib_user_cm.h>
+#include <rdma/ib_marshall.h>
 
 MODULE_AUTHOR("Libor Michalek");
 MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
@@ -63,7 +64,7 @@ struct ib_ucm_device {
 };
 
 struct ib_ucm_file {
-	struct semaphore mutex;
+	struct mutex file_mutex;
 	struct file *filp;
 	struct ib_ucm_device *device;
 
@@ -152,7 +153,7 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
 {
 	struct ib_ucm_event *uevent;
 
-	down(&ctx->file->mutex);
+	mutex_lock(&ctx->file->file_mutex);
 	list_del(&ctx->file_list);
 	while (!list_empty(&ctx->events)) {
 
@@ -167,7 +168,7 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
 
 		kfree(uevent);
 	}
-	up(&ctx->file->mutex);
+	mutex_unlock(&ctx->file->file_mutex);
 }
 
 static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
@@ -205,36 +206,6 @@ error:
 	return NULL;
 }
 
-static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
-				  struct ib_sa_path_rec	 *kpath)
-{
-	if (!kpath || !upath)
-		return;
-
-	memcpy(upath->dgid, kpath->dgid.raw, sizeof *upath->dgid);
-	memcpy(upath->sgid, kpath->sgid.raw, sizeof *upath->sgid);
-
-	upath->dlid             = kpath->dlid;
-	upath->slid             = kpath->slid;
-	upath->raw_traffic      = kpath->raw_traffic;
-	upath->flow_label       = kpath->flow_label;
-	upath->hop_limit        = kpath->hop_limit;
-	upath->traffic_class    = kpath->traffic_class;
-	upath->reversible       = kpath->reversible;
-	upath->numb_path        = kpath->numb_path;
-	upath->pkey             = kpath->pkey;
-	upath->sl	        = kpath->sl;
-	upath->mtu_selector     = kpath->mtu_selector;
-	upath->mtu              = kpath->mtu;
-	upath->rate_selector    = kpath->rate_selector;
-	upath->rate             = kpath->rate;
-	upath->packet_life_time = kpath->packet_life_time;
-	upath->preference       = kpath->preference;
-
-	upath->packet_life_time_selector =
-		kpath->packet_life_time_selector;
-}
-
 static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
 				 struct ib_cm_req_event_param *kreq)
 {
@@ -253,8 +224,10 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
 	ureq->srq                        = kreq->srq;
 	ureq->port			 = kreq->port;
 
-	ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
-	ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
+	ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path);
+	if (kreq->alternate_path)
+		ib_copy_path_rec_to_user(&ureq->alternate_path,
+					 kreq->alternate_path);
 }
 
 static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
@@ -324,8 +297,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
 		info	      = evt->param.rej_rcvd.ari;
 		break;
 	case IB_CM_LAP_RECEIVED:
-		ib_ucm_event_path_get(&uvt->resp.u.lap_resp.path,
-				      evt->param.lap_rcvd.alternate_path);
+		ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path,
+					 evt->param.lap_rcvd.alternate_path);
 		uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
 		uvt->resp.present = IB_UCM_PRES_ALTERNATE;
 		break;
@@ -402,11 +375,11 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
 	if (result)
 		goto err2;
 
-	down(&ctx->file->mutex);
+	mutex_lock(&ctx->file->file_mutex);
 	list_add_tail(&uevent->file_list, &ctx->file->events);
 	list_add_tail(&uevent->ctx_list, &ctx->events);
 	wake_up_interruptible(&ctx->file->poll_wait);
-	up(&ctx->file->mutex);
+	mutex_unlock(&ctx->file->file_mutex);
 	return 0;
 
 err2:
@@ -432,7 +405,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
-	down(&file->mutex);
+	mutex_lock(&file->file_mutex);
 	while (list_empty(&file->events)) {
 
 		if (file->filp->f_flags & O_NONBLOCK) {
@@ -447,9 +420,9 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
 
 		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
 
-		up(&file->mutex);
+		mutex_unlock(&file->file_mutex);
 		schedule();
-		down(&file->mutex);
+		mutex_lock(&file->file_mutex);
 
 		finish_wait(&file->poll_wait, &wait);
 	}
@@ -509,7 +482,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
 	kfree(uevent->info);
 	kfree(uevent);
 done:
-	up(&file->mutex);
+	mutex_unlock(&file->file_mutex);
 	return result;
 }
 
@@ -528,9 +501,9 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
-	down(&file->mutex);
+	mutex_lock(&file->file_mutex);
 	ctx = ib_ucm_ctx_alloc(file);
-	up(&file->mutex);
+	mutex_unlock(&file->file_mutex);
 	if (!ctx)
 		return -ENOMEM;
 
@@ -637,65 +610,11 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
 	return result;
 }
 
-static void ib_ucm_copy_ah_attr(struct ib_ucm_ah_attr *dest_attr,
-				struct ib_ah_attr *src_attr)
-{
-	memcpy(dest_attr->grh_dgid, src_attr->grh.dgid.raw,
-	       sizeof src_attr->grh.dgid);
-	dest_attr->grh_flow_label = src_attr->grh.flow_label;
-	dest_attr->grh_sgid_index = src_attr->grh.sgid_index;
-	dest_attr->grh_hop_limit = src_attr->grh.hop_limit;
-	dest_attr->grh_traffic_class = src_attr->grh.traffic_class;
-
-	dest_attr->dlid = src_attr->dlid;
-	dest_attr->sl = src_attr->sl;
-	dest_attr->src_path_bits = src_attr->src_path_bits;
-	dest_attr->static_rate = src_attr->static_rate;
-	dest_attr->is_global = (src_attr->ah_flags & IB_AH_GRH);
-	dest_attr->port_num = src_attr->port_num;
-}
-
-static void ib_ucm_copy_qp_attr(struct ib_ucm_init_qp_attr_resp *dest_attr,
-				struct ib_qp_attr *src_attr)
-{
-	dest_attr->cur_qp_state = src_attr->cur_qp_state;
-	dest_attr->path_mtu = src_attr->path_mtu;
-	dest_attr->path_mig_state = src_attr->path_mig_state;
-	dest_attr->qkey = src_attr->qkey;
-	dest_attr->rq_psn = src_attr->rq_psn;
-	dest_attr->sq_psn = src_attr->sq_psn;
-	dest_attr->dest_qp_num = src_attr->dest_qp_num;
-	dest_attr->qp_access_flags = src_attr->qp_access_flags;
-
-	dest_attr->max_send_wr = src_attr->cap.max_send_wr;
-	dest_attr->max_recv_wr = src_attr->cap.max_recv_wr;
-	dest_attr->max_send_sge = src_attr->cap.max_send_sge;
-	dest_attr->max_recv_sge = src_attr->cap.max_recv_sge;
-	dest_attr->max_inline_data = src_attr->cap.max_inline_data;
-
-	ib_ucm_copy_ah_attr(&dest_attr->ah_attr, &src_attr->ah_attr);
-	ib_ucm_copy_ah_attr(&dest_attr->alt_ah_attr, &src_attr->alt_ah_attr);
-
-	dest_attr->pkey_index = src_attr->pkey_index;
-	dest_attr->alt_pkey_index = src_attr->alt_pkey_index;
-	dest_attr->en_sqd_async_notify = src_attr->en_sqd_async_notify;
-	dest_attr->sq_draining = src_attr->sq_draining;
-	dest_attr->max_rd_atomic = src_attr->max_rd_atomic;
-	dest_attr->max_dest_rd_atomic = src_attr->max_dest_rd_atomic;
-	dest_attr->min_rnr_timer = src_attr->min_rnr_timer;
-	dest_attr->port_num = src_attr->port_num;
-	dest_attr->timeout = src_attr->timeout;
-	dest_attr->retry_cnt = src_attr->retry_cnt;
-	dest_attr->rnr_retry = src_attr->rnr_retry;
-	dest_attr->alt_port_num = src_attr->alt_port_num;
-	dest_attr->alt_timeout = src_attr->alt_timeout;
-}
-
 static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
 				   const char __user *inbuf,
 				   int in_len, int out_len)
 {
-	struct ib_ucm_init_qp_attr_resp resp;
+	struct ib_uverbs_qp_attr resp;
 	struct ib_ucm_init_qp_attr cmd;
 	struct ib_ucm_context *ctx;
 	struct ib_qp_attr qp_attr;
@@ -718,7 +637,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
 	if (result)
 		goto out;
 
-	ib_ucm_copy_qp_attr(&resp, &qp_attr);
+	ib_copy_qp_attr_to_user(&resp, &qp_attr);
 
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
 			 &resp, sizeof(resp)))
@@ -729,6 +648,17 @@ out:
 	return result;
 }
 
+static int ucm_validate_listen(__be64 service_id, __be64 service_mask)
+{
+	service_id &= service_mask;
+
+	if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) ||
+	    ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID))
+		return -EINVAL;
+
+	return 0;
+}
+
 static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
 			     const char __user *inbuf,
 			     int in_len, int out_len)
@@ -744,7 +674,13 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask);
+	result = ucm_validate_listen(cmd.service_id, cmd.service_mask);
+	if (result)
+		goto out;
+
+	result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask,
+			      NULL);
+out:
 	ib_ucm_ctx_put(ctx);
 	return result;
 }
@@ -793,7 +729,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
 
 static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
 {
-	struct ib_ucm_path_rec ucm_path;
+	struct ib_user_path_rec upath;
 	struct ib_sa_path_rec  *sa_path;
 
 	*path = NULL;
@@ -805,36 +741,14 @@ static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
 	if (!sa_path)
 		return -ENOMEM;
 
-	if (copy_from_user(&ucm_path, (void __user *)(unsigned long)src,
-			   sizeof(ucm_path))) {
+	if (copy_from_user(&upath, (void __user *)(unsigned long)src,
+			   sizeof(upath))) {
 
 		kfree(sa_path);
 		return -EFAULT;
 	}
 
-	memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof sa_path->dgid);
-	memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof sa_path->sgid);
-
-	sa_path->dlid	          = ucm_path.dlid;
-	sa_path->slid	          = ucm_path.slid;
-	sa_path->raw_traffic      = ucm_path.raw_traffic;
-	sa_path->flow_label       = ucm_path.flow_label;
-	sa_path->hop_limit        = ucm_path.hop_limit;
-	sa_path->traffic_class    = ucm_path.traffic_class;
-	sa_path->reversible       = ucm_path.reversible;
-	sa_path->numb_path        = ucm_path.numb_path;
-	sa_path->pkey             = ucm_path.pkey;
-	sa_path->sl               = ucm_path.sl;
-	sa_path->mtu_selector     = ucm_path.mtu_selector;
-	sa_path->mtu              = ucm_path.mtu;
-	sa_path->rate_selector    = ucm_path.rate_selector;
-	sa_path->rate             = ucm_path.rate;
-	sa_path->packet_life_time = ucm_path.packet_life_time;
-	sa_path->preference       = ucm_path.preference;
-
-	sa_path->packet_life_time_selector =
-		ucm_path.packet_life_time_selector;
-
+	ib_copy_path_rec_from_user(sa_path, &upath);
 	*path = sa_path;
 	return 0;
 }
@@ -1130,7 +1044,6 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
 	param.service_id       = cmd.sid;
 	param.timeout_ms       = cmd.timeout;
 	param.max_cm_retries   = cmd.max_cm_retries;
-	param.pkey             = cmd.pkey;
 
 	ctx = ib_ucm_ctx_get(file, cmd.id);
 	if (!IS_ERR(ctx)) {
@@ -1263,7 +1176,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
 	INIT_LIST_HEAD(&file->ctxs);
 	init_waitqueue_head(&file->poll_wait);
 
-	init_MUTEX(&file->mutex);
+	mutex_init(&file->file_mutex);
 
 	filp->private_data = file;
 	file->filp = filp;
@@ -1277,11 +1190,11 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
 	struct ib_ucm_file *file = filp->private_data;
 	struct ib_ucm_context *ctx;
 
-	down(&file->mutex);
+	mutex_lock(&file->file_mutex);
 	while (!list_empty(&file->ctxs)) {
 		ctx = list_entry(file->ctxs.next,
 				 struct ib_ucm_context, file_list);
-		up(&file->mutex);
+		mutex_unlock(&file->file_mutex);
 
 		mutex_lock(&ctx_id_mutex);
 		idr_remove(&ctx_id_table, ctx->id);
@@ -1291,9 +1204,9 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
 		ib_ucm_cleanup_events(ctx);
 		kfree(ctx);
 
-		down(&file->mutex);
+		mutex_lock(&file->file_mutex);
 	}
-	up(&file->mutex);
+	mutex_unlock(&file->file_mutex);
 	kfree(file);
 	return 0;
 }
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3372d67ff139eb51232f96ad287e50608e5998ef..bb9bee56a824a9ea5b1da2417a882a12aab11f16 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -132,7 +132,7 @@ struct ib_ucq_object {
 	u32			async_events_reported;
 };
 
-extern struct mutex ib_uverbs_idr_mutex;
+extern spinlock_t ib_uverbs_idr_lock;
 extern struct idr ib_uverbs_pd_idr;
 extern struct idr ib_uverbs_mr_idr;
 extern struct idr ib_uverbs_mw_idr;
@@ -141,6 +141,8 @@ extern struct idr ib_uverbs_cq_idr;
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
 
+void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
+
 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 					int is_async, int *fd);
 void ib_uverbs_release_event_file(struct kref *ref);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 9f69bd48eb1bee6ccf488f0e71a7777efd7f5f0d..76bf61e9b552424e37347b7cd52a3e92bff30ed5 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -50,6 +50,196 @@
 		(udata)->outlen = (olen);				\
 	} while (0)
 
+/*
+ * The ib_uobject locking scheme is as follows:
+ *
+ * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
+ *   needs to be held during all idr operations.  When an object is
+ *   looked up, a reference must be taken on the object's kref before
+ *   dropping this lock.
+ *
+ * - Each object also has an rwsem.  This rwsem must be held for
+ *   reading while an operation that uses the object is performed.
+ *   For example, while registering an MR, the associated PD's
+ *   uobject.mutex must be held for reading.  The rwsem must be held
+ *   for writing while initializing or destroying an object.
+ *
+ * - In addition, each object has a "live" flag.  If this flag is not
+ *   set, then lookups of the object will fail even if it is found in
+ *   the idr.  This handles a reader that blocks and does not acquire
+ *   the rwsem until after the object is destroyed.  The destroy
+ *   operation will set the live flag to 0 and then drop the rwsem;
+ *   this will allow the reader to acquire the rwsem, see that the
+ *   live flag is 0, and then drop the rwsem and its reference to
+ *   object.  The underlying storage will not be freed until the last
+ *   reference to the object is dropped.
+ */
+
+static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
+		      struct ib_ucontext *context)
+{
+	uobj->user_handle = user_handle;
+	uobj->context     = context;
+	kref_init(&uobj->ref);
+	init_rwsem(&uobj->mutex);
+	uobj->live        = 0;
+}
+
+static void release_uobj(struct kref *kref)
+{
+	kfree(container_of(kref, struct ib_uobject, ref));
+}
+
+static void put_uobj(struct ib_uobject *uobj)
+{
+	kref_put(&uobj->ref, release_uobj);
+}
+
+static void put_uobj_read(struct ib_uobject *uobj)
+{
+	up_read(&uobj->mutex);
+	put_uobj(uobj);
+}
+
+static void put_uobj_write(struct ib_uobject *uobj)
+{
+	up_write(&uobj->mutex);
+	put_uobj(uobj);
+}
+
+static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
+{
+	int ret;
+
+retry:
+	if (!idr_pre_get(idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&ib_uverbs_idr_lock);
+	ret = idr_get_new(idr, uobj, &uobj->id);
+	spin_unlock(&ib_uverbs_idr_lock);
+
+	if (ret == -EAGAIN)
+		goto retry;
+
+	return ret;
+}
+
+void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
+{
+	spin_lock(&ib_uverbs_idr_lock);
+	idr_remove(idr, uobj->id);
+	spin_unlock(&ib_uverbs_idr_lock);
+}
+
+static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
+					 struct ib_ucontext *context)
+{
+	struct ib_uobject *uobj;
+
+	spin_lock(&ib_uverbs_idr_lock);
+	uobj = idr_find(idr, id);
+	if (uobj)
+		kref_get(&uobj->ref);
+	spin_unlock(&ib_uverbs_idr_lock);
+
+	return uobj;
+}
+
+static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
+					struct ib_ucontext *context)
+{
+	struct ib_uobject *uobj;
+
+	uobj = __idr_get_uobj(idr, id, context);
+	if (!uobj)
+		return NULL;
+
+	down_read(&uobj->mutex);
+	if (!uobj->live) {
+		put_uobj_read(uobj);
+		return NULL;
+	}
+
+	return uobj;
+}
+
+static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
+					 struct ib_ucontext *context)
+{
+	struct ib_uobject *uobj;
+
+	uobj = __idr_get_uobj(idr, id, context);
+	if (!uobj)
+		return NULL;
+
+	down_write(&uobj->mutex);
+	if (!uobj->live) {
+		put_uobj_write(uobj);
+		return NULL;
+	}
+
+	return uobj;
+}
+
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+{
+	struct ib_uobject *uobj;
+
+	uobj = idr_read_uobj(idr, id, context);
+	return uobj ? uobj->object : NULL;
+}
+
+static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
+{
+	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+}
+
+static void put_pd_read(struct ib_pd *pd)
+{
+	put_uobj_read(pd->uobject);
+}
+
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+{
+	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+}
+
+static void put_cq_read(struct ib_cq *cq)
+{
+	put_uobj_read(cq->uobject);
+}
+
+static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
+{
+	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+}
+
+static void put_ah_read(struct ib_ah *ah)
+{
+	put_uobj_read(ah->uobject);
+}
+
+static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
+{
+	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+}
+
+static void put_qp_read(struct ib_qp *qp)
+{
+	put_uobj_read(qp->uobject);
+}
+
+static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
+{
+	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+}
+
+static void put_srq_read(struct ib_srq *srq)
+{
+	put_uobj_read(srq->uobject);
+}
+
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 			      const char __user *buf,
 			      int in_len, int out_len)
@@ -80,8 +270,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
 	ucontext = ibdev->alloc_ucontext(ibdev, &udata);
-	if (IS_ERR(ucontext))
-		return PTR_ERR(file->ucontext);
+	if (IS_ERR(ucontext)) {
+		ret = PTR_ERR(file->ucontext);
+		goto err;
+	}
 
 	ucontext->device = ibdev;
 	INIT_LIST_HEAD(&ucontext->pd_list);
@@ -278,7 +470,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -ENOMEM;
 
-	uobj->context = file->ucontext;
+	init_uobj(uobj, 0, file->ucontext);
+	down_write(&uobj->mutex);
 
 	pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
 					    file->ucontext, &udata);
@@ -291,20 +484,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 	pd->uobject = uobj;
 	atomic_set(&pd->usecnt, 0);
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-retry:
-	if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_up;
-	}
-
-	ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
-
-	if (ret == -EAGAIN)
-		goto retry;
+	uobj->object = pd;
+	ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
 	if (ret)
-		goto err_up;
+		goto err_idr;
 
 	memset(&resp, 0, sizeof resp);
 	resp.pd_handle = uobj->id;
@@ -312,26 +495,27 @@ retry:
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_idr;
+		goto err_copy;
 	}
 
 	mutex_lock(&file->mutex);
 	list_add_tail(&uobj->list, &file->ucontext->pd_list);
 	mutex_unlock(&file->mutex);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	uobj->live = 1;
+
+	up_write(&uobj->mutex);
 
 	return in_len;
 
-err_idr:
-	idr_remove(&ib_uverbs_pd_idr, uobj->id);
+err_copy:
+	idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
 
-err_up:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+err_idr:
 	ib_dealloc_pd(pd);
 
 err:
-	kfree(uobj);
+	put_uobj_write(uobj);
 	return ret;
 }
 
@@ -340,37 +524,34 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
 			     int in_len, int out_len)
 {
 	struct ib_uverbs_dealloc_pd cmd;
-	struct ib_pd               *pd;
 	struct ib_uobject          *uobj;
-	int                         ret = -EINVAL;
+	int                         ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
+	uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
 
-	pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
-	if (!pd || pd->uobject->context != file->ucontext)
-		goto out;
+	ret = ib_dealloc_pd(uobj->object);
+	if (!ret)
+		uobj->live = 0;
 
-	uobj = pd->uobject;
+	put_uobj_write(uobj);
 
-	ret = ib_dealloc_pd(pd);
 	if (ret)
-		goto out;
+		return ret;
 
-	idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
+	idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
 
 	mutex_lock(&file->mutex);
 	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
-	kfree(uobj);
-
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	put_uobj(uobj);
 
-	return ret ? ret : in_len;
+	return in_len;
 }
 
 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -410,7 +591,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	if (!obj)
 		return -ENOMEM;
 
-	obj->uobject.context = file->ucontext;
+	init_uobj(&obj->uobject, 0, file->ucontext);
+	down_write(&obj->uobject.mutex);
 
 	/*
 	 * We ask for writable memory if any access flags other than
@@ -427,23 +609,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 
 	obj->umem.virt_base = cmd.hca_va;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
-	if (!pd || pd->uobject->context != file->ucontext) {
-		ret = -EINVAL;
-		goto err_up;
-	}
-
-	if (!pd->device->reg_user_mr) {
-		ret = -ENOSYS;
-		goto err_up;
-	}
+	pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+	if (!pd)
+		goto err_release;
 
 	mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
 	if (IS_ERR(mr)) {
 		ret = PTR_ERR(mr);
-		goto err_up;
+		goto err_put;
 	}
 
 	mr->device  = pd->device;
@@ -452,53 +625,48 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	atomic_inc(&pd->usecnt);
 	atomic_set(&mr->usecnt, 0);
 
-	memset(&resp, 0, sizeof resp);
-	resp.lkey = mr->lkey;
-	resp.rkey = mr->rkey;
-
-retry:
-	if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_unreg;
-	}
-
-	ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
-
-	if (ret == -EAGAIN)
-		goto retry;
+	obj->uobject.object = mr;
+	ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject);
 	if (ret)
 		goto err_unreg;
 
+	memset(&resp, 0, sizeof resp);
+	resp.lkey      = mr->lkey;
+	resp.rkey      = mr->rkey;
 	resp.mr_handle = obj->uobject.id;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_idr;
+		goto err_copy;
 	}
 
+	put_pd_read(pd);
+
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
 	mutex_unlock(&file->mutex);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	obj->uobject.live = 1;
+
+	up_write(&obj->uobject.mutex);
 
 	return in_len;
 
-err_idr:
-	idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);
+err_copy:
+	idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject);
 
 err_unreg:
 	ib_dereg_mr(mr);
-	atomic_dec(&pd->usecnt);
 
-err_up:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+err_put:
+	put_pd_read(pd);
 
+err_release:
 	ib_umem_release(file->device->ib_dev, &obj->umem);
 
 err_free:
-	kfree(obj);
+	put_uobj_write(&obj->uobject);
 	return ret;
 }
 
@@ -508,37 +676,40 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_dereg_mr cmd;
 	struct ib_mr             *mr;
+	struct ib_uobject	 *uobj;
 	struct ib_umem_object    *memobj;
 	int                       ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
-	if (!mr || mr->uobject->context != file->ucontext)
-		goto out;
+	uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
 
-	memobj = container_of(mr->uobject, struct ib_umem_object, uobject);
+	memobj = container_of(uobj, struct ib_umem_object, uobject);
+	mr     = uobj->object;
 
 	ret = ib_dereg_mr(mr);
+	if (!ret)
+		uobj->live = 0;
+
+	put_uobj_write(uobj);
+
 	if (ret)
-		goto out;
+		return ret;
 
-	idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
+	idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 
 	mutex_lock(&file->mutex);
-	list_del(&memobj->uobject.list);
+	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
 	ib_umem_release(file->device->ib_dev, &memobj->umem);
-	kfree(memobj);
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	put_uobj(uobj);
 
-	return ret ? ret : in_len;
+	return in_len;
 }
 
 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -577,7 +748,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_cq      cmd;
 	struct ib_uverbs_create_cq_resp resp;
 	struct ib_udata                 udata;
-	struct ib_ucq_object           *uobj;
+	struct ib_ucq_object           *obj;
 	struct ib_uverbs_event_file    *ev_file = NULL;
 	struct ib_cq                   *cq;
 	int                             ret;
@@ -595,10 +766,13 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 	if (cmd.comp_vector >= file->device->num_comp_vectors)
 		return -EINVAL;
 
-	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
-	if (!uobj)
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
 		return -ENOMEM;
 
+	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext);
+	down_write(&obj->uobject.mutex);
+
 	if (cmd.comp_channel >= 0) {
 		ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
 		if (!ev_file) {
@@ -607,72 +781,64 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 		}
 	}
 
-	uobj->uobject.user_handle   = cmd.user_handle;
-	uobj->uobject.context       = file->ucontext;
-	uobj->uverbs_file	    = file;
-	uobj->comp_events_reported  = 0;
-	uobj->async_events_reported = 0;
-	INIT_LIST_HEAD(&uobj->comp_list);
-	INIT_LIST_HEAD(&uobj->async_list);
+	obj->uverbs_file	   = file;
+	obj->comp_events_reported  = 0;
+	obj->async_events_reported = 0;
+	INIT_LIST_HEAD(&obj->comp_list);
+	INIT_LIST_HEAD(&obj->async_list);
 
 	cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
 					     file->ucontext, &udata);
 	if (IS_ERR(cq)) {
 		ret = PTR_ERR(cq);
-		goto err;
+		goto err_file;
 	}
 
 	cq->device        = file->device->ib_dev;
-	cq->uobject       = &uobj->uobject;
+	cq->uobject       = &obj->uobject;
 	cq->comp_handler  = ib_uverbs_comp_handler;
 	cq->event_handler = ib_uverbs_cq_event_handler;
 	cq->cq_context    = ev_file;
 	atomic_set(&cq->usecnt, 0);
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-retry:
-	if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_up;
-	}
-
-	ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
-
-	if (ret == -EAGAIN)
-		goto retry;
+	obj->uobject.object = cq;
+	ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 	if (ret)
-		goto err_up;
+		goto err_free;
 
 	memset(&resp, 0, sizeof resp);
-	resp.cq_handle = uobj->uobject.id;
+	resp.cq_handle = obj->uobject.id;
 	resp.cqe       = cq->cqe;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_idr;
+		goto err_copy;
 	}
 
 	mutex_lock(&file->mutex);
-	list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
+	list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
 	mutex_unlock(&file->mutex);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	obj->uobject.live = 1;
+
+	up_write(&obj->uobject.mutex);
 
 	return in_len;
 
-err_idr:
-	idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
+err_copy:
+	idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
+
 
-err_up:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+err_free:
 	ib_destroy_cq(cq);
 
-err:
+err_file:
 	if (ev_file)
-		ib_uverbs_release_ucq(file, ev_file, uobj);
-	kfree(uobj);
+		ib_uverbs_release_ucq(file, ev_file, obj);
+
+err:
+	put_uobj_write(&obj->uobject);
 	return ret;
 }
 
@@ -693,11 +859,9 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
-	if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq)
-		goto out;
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	if (!cq)
+		return -EINVAL;
 
 	ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
 	if (ret)
@@ -711,7 +875,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		ret = -EFAULT;
 
 out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	put_cq_read(cq);
 
 	return ret ? ret : in_len;
 }
@@ -722,6 +886,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_poll_cq       cmd;
 	struct ib_uverbs_poll_cq_resp *resp;
+	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 	struct ib_wc                  *wc;
 	int                            ret = 0;
@@ -742,15 +907,17 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		goto out_wc;
 	}
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
-	if (!cq || cq->uobject->context != file->ucontext) {
+	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
+	if (!uobj) {
 		ret = -EINVAL;
 		goto out;
 	}
+	cq = uobj->object;
 
 	resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
+	put_uobj_read(uobj);
+
 	for (i = 0; i < resp->count; i++) {
 		resp->wc[i].wr_id 	   = wc[i].wr_id;
 		resp->wc[i].status 	   = wc[i].status;
@@ -772,7 +939,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		ret = -EFAULT;
 
 out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
 	kfree(resp);
 
 out_wc:
@@ -785,22 +951,23 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 				int out_len)
 {
 	struct ib_uverbs_req_notify_cq cmd;
+	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
-	int                            ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
-	if (cq && cq->uobject->context == file->ucontext) {
-		ib_req_notify_cq(cq, cmd.solicited_only ?
-					IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
-		ret = in_len;
-	}
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
+	cq = uobj->object;
 
-	return ret;
+	ib_req_notify_cq(cq, cmd.solicited_only ?
+			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+
+	put_uobj_read(uobj);
+
+	return in_len;
 }
 
 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
@@ -809,52 +976,50 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_destroy_cq      cmd;
 	struct ib_uverbs_destroy_cq_resp resp;
+	struct ib_uobject		*uobj;
 	struct ib_cq               	*cq;
-	struct ib_ucq_object        	*uobj;
+	struct ib_ucq_object        	*obj;
 	struct ib_uverbs_event_file	*ev_file;
-	u64				 user_handle;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	memset(&resp, 0, sizeof resp);
-
-	mutex_lock(&ib_uverbs_idr_mutex);
+	uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
+	cq      = uobj->object;
+	ev_file = cq->cq_context;
+	obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
 
-	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
-	if (!cq || cq->uobject->context != file->ucontext)
-		goto out;
+	ret = ib_destroy_cq(cq);
+	if (!ret)
+		uobj->live = 0;
 
-	user_handle = cq->uobject->user_handle;
-	uobj        = container_of(cq->uobject, struct ib_ucq_object, uobject);
-	ev_file     = cq->cq_context;
+	put_uobj_write(uobj);
 
-	ret = ib_destroy_cq(cq);
 	if (ret)
-		goto out;
+		return ret;
 
-	idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
+	idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
 
 	mutex_lock(&file->mutex);
-	list_del(&uobj->uobject.list);
+	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
-	ib_uverbs_release_ucq(file, ev_file, uobj);
+	ib_uverbs_release_ucq(file, ev_file, obj);
 
-	resp.comp_events_reported  = uobj->comp_events_reported;
-	resp.async_events_reported = uobj->async_events_reported;
+	memset(&resp, 0, sizeof resp);
+	resp.comp_events_reported  = obj->comp_events_reported;
+	resp.async_events_reported = obj->async_events_reported;
 
-	kfree(uobj);
+	put_uobj(uobj);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
-		ret = -EFAULT;
-
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+		return -EFAULT;
 
-	return ret ? ret : in_len;
+	return in_len;
 }
 
 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
@@ -864,7 +1029,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_qp      cmd;
 	struct ib_uverbs_create_qp_resp resp;
 	struct ib_udata                 udata;
-	struct ib_uqp_object           *uobj;
+	struct ib_uqp_object           *obj;
 	struct ib_pd                   *pd;
 	struct ib_cq                   *scq, *rcq;
 	struct ib_srq                  *srq;
@@ -882,23 +1047,21 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
-	if (!uobj)
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
 		return -ENOMEM;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
+	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext);
+	down_write(&obj->uevent.uobject.mutex);
 
-	pd  = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
-	scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
-	rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
-	srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL;
+	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+	rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 
-	if (!pd  || pd->uobject->context  != file->ucontext ||
-	    !scq || scq->uobject->context != file->ucontext ||
-	    !rcq || rcq->uobject->context != file->ucontext ||
-	    (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) {
+	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
 		ret = -EINVAL;
-		goto err_up;
+		goto err_put;
 	}
 
 	attr.event_handler = ib_uverbs_qp_event_handler;
@@ -915,16 +1078,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	attr.cap.max_recv_sge    = cmd.max_recv_sge;
 	attr.cap.max_inline_data = cmd.max_inline_data;
 
-	uobj->uevent.uobject.user_handle = cmd.user_handle;
-	uobj->uevent.uobject.context     = file->ucontext;
-	uobj->uevent.events_reported     = 0;
-	INIT_LIST_HEAD(&uobj->uevent.event_list);
-	INIT_LIST_HEAD(&uobj->mcast_list);
+	obj->uevent.events_reported     = 0;
+	INIT_LIST_HEAD(&obj->uevent.event_list);
+	INIT_LIST_HEAD(&obj->mcast_list);
 
 	qp = pd->device->create_qp(pd, &attr, &udata);
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
-		goto err_up;
+		goto err_put;
 	}
 
 	qp->device     	  = pd->device;
@@ -932,7 +1093,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	qp->send_cq    	  = attr.send_cq;
 	qp->recv_cq    	  = attr.recv_cq;
 	qp->srq	       	  = attr.srq;
-	qp->uobject       = &uobj->uevent.uobject;
+	qp->uobject       = &obj->uevent.uobject;
 	qp->event_handler = attr.event_handler;
 	qp->qp_context    = attr.qp_context;
 	qp->qp_type	  = attr.qp_type;
@@ -942,23 +1103,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	if (attr.srq)
 		atomic_inc(&attr.srq->usecnt);
 
-	memset(&resp, 0, sizeof resp);
-	resp.qpn = qp->qp_num;
-
-retry:
-	if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_destroy;
-	}
-
-	ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject.id);
-
-	if (ret == -EAGAIN)
-		goto retry;
+	obj->uevent.uobject.object = qp;
+	ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
 	if (ret)
 		goto err_destroy;
 
-	resp.qp_handle       = uobj->uevent.uobject.id;
+	memset(&resp, 0, sizeof resp);
+	resp.qpn             = qp->qp_num;
+	resp.qp_handle       = obj->uevent.uobject.id;
 	resp.max_recv_sge    = attr.cap.max_recv_sge;
 	resp.max_send_sge    = attr.cap.max_send_sge;
 	resp.max_recv_wr     = attr.cap.max_recv_wr;
@@ -968,32 +1120,42 @@ retry:
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_idr;
+		goto err_copy;
 	}
 
+	put_pd_read(pd);
+	put_cq_read(scq);
+	put_cq_read(rcq);
+	if (srq)
+		put_srq_read(srq);
+
 	mutex_lock(&file->mutex);
-	list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list);
+	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
 	mutex_unlock(&file->mutex);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	obj->uevent.uobject.live = 1;
+
+	up_write(&obj->uevent.uobject.mutex);
 
 	return in_len;
 
-err_idr:
-	idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id);
+err_copy:
+	idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
 
 err_destroy:
 	ib_destroy_qp(qp);
-	atomic_dec(&pd->usecnt);
-	atomic_dec(&attr.send_cq->usecnt);
-	atomic_dec(&attr.recv_cq->usecnt);
-	if (attr.srq)
-		atomic_dec(&attr.srq->usecnt);
-
-err_up:
-	mutex_unlock(&ib_uverbs_idr_mutex);
 
-	kfree(uobj);
+err_put:
+	if (pd)
+		put_pd_read(pd);
+	if (scq)
+		put_cq_read(scq);
+	if (rcq)
+		put_cq_read(rcq);
+	if (srq)
+		put_srq_read(srq);
+
+	put_uobj_write(&obj->uevent.uobject);
 	return ret;
 }
 
@@ -1018,15 +1180,15 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
 		goto out;
 	}
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (qp && qp->uobject->context == file->ucontext)
-		ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
-	else
+	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	if (!qp) {
 		ret = -EINVAL;
+		goto out;
+	}
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
+
+	put_qp_read(qp);
 
 	if (ret)
 		goto out;
@@ -1113,10 +1275,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	if (!attr)
 		return -ENOMEM;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (!qp || qp->uobject->context != file->ucontext) {
+	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	if (!qp) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1168,13 +1328,15 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
 	ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+
+	put_qp_read(qp);
+
 	if (ret)
 		goto out;
 
 	ret = in_len;
 
 out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
 	kfree(attr);
 
 	return ret;
@@ -1186,8 +1348,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_destroy_qp      cmd;
 	struct ib_uverbs_destroy_qp_resp resp;
+	struct ib_uobject		*uobj;
 	struct ib_qp               	*qp;
-	struct ib_uqp_object        	*uobj;
+	struct ib_uqp_object        	*obj;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1195,43 +1358,43 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 
 	memset(&resp, 0, sizeof resp);
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (!qp || qp->uobject->context != file->ucontext)
-		goto out;
-
-	uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+	uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
+	qp  = uobj->object;
+	obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
-	if (!list_empty(&uobj->mcast_list)) {
-		ret = -EBUSY;
-		goto out;
+	if (!list_empty(&obj->mcast_list)) {
+		put_uobj_write(uobj);
+		return -EBUSY;
 	}
 
 	ret = ib_destroy_qp(qp);
+	if (!ret)
+		uobj->live = 0;
+
+	put_uobj_write(uobj);
+
 	if (ret)
-		goto out;
+		return ret;
 
-	idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
+	idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 
 	mutex_lock(&file->mutex);
-	list_del(&uobj->uevent.uobject.list);
+	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
-	ib_uverbs_release_uevent(file, &uobj->uevent);
+	ib_uverbs_release_uevent(file, &obj->uevent);
 
-	resp.events_reported = uobj->uevent.events_reported;
+	resp.events_reported = obj->uevent.events_reported;
 
-	kfree(uobj);
+	put_uobj(uobj);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
-		ret = -EFAULT;
-
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+		return -EFAULT;
 
-	return ret ? ret : in_len;
+	return in_len;
 }
 
 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
@@ -1244,6 +1407,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 	struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
 	struct ib_qp                   *qp;
 	int                             i, sg_ind;
+	int				is_ud;
 	ssize_t                         ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1260,12 +1424,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 	if (!user_wr)
 		return -ENOMEM;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (!qp || qp->uobject->context != file->ucontext)
+	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	if (!qp)
 		goto out;
 
+	is_ud = qp->qp_type == IB_QPT_UD;
 	sg_ind = 0;
 	last = NULL;
 	for (i = 0; i < cmd.wr_count; ++i) {
@@ -1273,12 +1436,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 				   buf + sizeof cmd + i * cmd.wqe_size,
 				   cmd.wqe_size)) {
 			ret = -EFAULT;
-			goto out;
+			goto out_put;
 		}
 
 		if (user_wr->num_sge + sg_ind > cmd.sge_count) {
 			ret = -EINVAL;
-			goto out;
+			goto out_put;
 		}
 
 		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
@@ -1286,7 +1449,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 			       GFP_KERNEL);
 		if (!next) {
 			ret = -ENOMEM;
-			goto out;
+			goto out_put;
 		}
 
 		if (!last)
@@ -1302,12 +1465,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 		next->send_flags = user_wr->send_flags;
 		next->imm_data   = (__be32 __force) user_wr->imm_data;
 
-		if (qp->qp_type == IB_QPT_UD) {
-			next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
-						  user_wr->wr.ud.ah);
+		if (is_ud) {
+			next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
+						     file->ucontext);
 			if (!next->wr.ud.ah) {
 				ret = -EINVAL;
-				goto out;
+				goto out_put;
 			}
 			next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
 			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
@@ -1344,7 +1507,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 					   sg_ind * sizeof (struct ib_sge),
 					   next->num_sge * sizeof (struct ib_sge))) {
 				ret = -EFAULT;
-				goto out;
+				goto out_put;
 			}
 			sg_ind += next->num_sge;
 		} else
@@ -1364,10 +1527,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+out_put:
+	put_qp_read(qp);
 
+out:
 	while (wr) {
+		if (is_ud && wr->wr.ud.ah)
+			put_ah_read(wr->wr.ud.ah);
 		next = wr->next;
 		kfree(wr);
 		wr = next;
@@ -1482,14 +1648,15 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 	if (IS_ERR(wr))
 		return PTR_ERR(wr);
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (!qp || qp->uobject->context != file->ucontext)
+	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	if (!qp)
 		goto out;
 
 	resp.bad_wr = 0;
 	ret = qp->device->post_recv(qp, wr, &bad_wr);
+
+	put_qp_read(qp);
+
 	if (ret)
 		for (next = wr; next; next = next->next) {
 			++resp.bad_wr;
@@ -1503,8 +1670,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 		ret = -EFAULT;
 
 out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
-
 	while (wr) {
 		next = wr->next;
 		kfree(wr);
@@ -1533,14 +1698,15 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 	if (IS_ERR(wr))
 		return PTR_ERR(wr);
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
-	if (!srq || srq->uobject->context != file->ucontext)
+	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+	if (!srq)
 		goto out;
 
 	resp.bad_wr = 0;
 	ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+
+	put_srq_read(srq);
+
 	if (ret)
 		for (next = wr; next; next = next->next) {
 			++resp.bad_wr;
@@ -1554,8 +1720,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 		ret = -EFAULT;
 
 out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
-
 	while (wr) {
 		next = wr->next;
 		kfree(wr);
@@ -1587,17 +1751,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -ENOMEM;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
+	init_uobj(uobj, cmd.user_handle, file->ucontext);
+	down_write(&uobj->mutex);
 
-	pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
-	if (!pd || pd->uobject->context != file->ucontext) {
+	pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+	if (!pd) {
 		ret = -EINVAL;
-		goto err_up;
+		goto err;
 	}
 
-	uobj->user_handle = cmd.user_handle;
-	uobj->context     = file->ucontext;
-
 	attr.dlid 	       = cmd.attr.dlid;
 	attr.sl 	       = cmd.attr.sl;
 	attr.src_path_bits     = cmd.attr.src_path_bits;
@@ -1613,21 +1775,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	ah = ib_create_ah(pd, &attr);
 	if (IS_ERR(ah)) {
 		ret = PTR_ERR(ah);
-		goto err_up;
-	}
-
-	ah->uobject = uobj;
-
-retry:
-	if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_destroy;
+		goto err;
 	}
 
-	ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
+	ah->uobject  = uobj;
+	uobj->object = ah;
 
-	if (ret == -EAGAIN)
-		goto retry;
+	ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
 	if (ret)
 		goto err_destroy;
 
@@ -1636,27 +1790,29 @@ retry:
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_idr;
+		goto err_copy;
 	}
 
+	put_pd_read(pd);
+
 	mutex_lock(&file->mutex);
 	list_add_tail(&uobj->list, &file->ucontext->ah_list);
 	mutex_unlock(&file->mutex);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	uobj->live = 1;
+
+	up_write(&uobj->mutex);
 
 	return in_len;
 
-err_idr:
-	idr_remove(&ib_uverbs_ah_idr, uobj->id);
+err_copy:
+	idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 
 err_destroy:
 	ib_destroy_ah(ah);
 
-err_up:
-	mutex_unlock(&ib_uverbs_idr_mutex);
-
-	kfree(uobj);
+err:
+	put_uobj_write(uobj);
 	return ret;
 }
 
@@ -1666,35 +1822,34 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_ah cmd;
 	struct ib_ah		   *ah;
 	struct ib_uobject	   *uobj;
-	int			    ret = -EINVAL;
+	int			    ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
+	uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
+	ah = uobj->object;
 
-	ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
-	if (!ah || ah->uobject->context != file->ucontext)
-		goto out;
+	ret = ib_destroy_ah(ah);
+	if (!ret)
+		uobj->live = 0;
 
-	uobj = ah->uobject;
+	put_uobj_write(uobj);
 
-	ret = ib_destroy_ah(ah);
 	if (ret)
-		goto out;
+		return ret;
 
-	idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
+	idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 
 	mutex_lock(&file->mutex);
 	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
-	kfree(uobj);
+	put_uobj(uobj);
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
-
-	return ret ? ret : in_len;
+	return in_len;
 }
 
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -1703,47 +1858,43 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_attach_mcast cmd;
 	struct ib_qp                 *qp;
-	struct ib_uqp_object         *uobj;
+	struct ib_uqp_object         *obj;
 	struct ib_uverbs_mcast_entry *mcast;
-	int                           ret = -EINVAL;
+	int                           ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (!qp || qp->uobject->context != file->ucontext)
-		goto out;
+	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	if (!qp)
+		return -EINVAL;
 
-	uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+	obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
 
-	list_for_each_entry(mcast, &uobj->mcast_list, list)
+	list_for_each_entry(mcast, &obj->mcast_list, list)
 		if (cmd.mlid == mcast->lid &&
 		    !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
 			ret = 0;
-			goto out;
+			goto out_put;
 		}
 
 	mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
 	if (!mcast) {
 		ret = -ENOMEM;
-		goto out;
+		goto out_put;
 	}
 
 	mcast->lid = cmd.mlid;
 	memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
 
 	ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
-	if (!ret) {
-		uobj = container_of(qp->uobject, struct ib_uqp_object,
-				    uevent.uobject);
-		list_add_tail(&mcast->list, &uobj->mcast_list);
-	} else
+	if (!ret)
+		list_add_tail(&mcast->list, &obj->mcast_list);
+	else
 		kfree(mcast);
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+out_put:
+	put_qp_read(qp);
 
 	return ret ? ret : in_len;
 }
@@ -1753,7 +1904,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 			       int out_len)
 {
 	struct ib_uverbs_detach_mcast cmd;
-	struct ib_uqp_object         *uobj;
+	struct ib_uqp_object         *obj;
 	struct ib_qp                 *qp;
 	struct ib_uverbs_mcast_entry *mcast;
 	int                           ret = -EINVAL;
@@ -1761,19 +1912,17 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
-	if (!qp || qp->uobject->context != file->ucontext)
-		goto out;
+	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	if (!qp)
+		return -EINVAL;
 
 	ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
 	if (ret)
-		goto out;
+		goto out_put;
 
-	uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+	obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
 
-	list_for_each_entry(mcast, &uobj->mcast_list, list)
+	list_for_each_entry(mcast, &obj->mcast_list, list)
 		if (cmd.mlid == mcast->lid &&
 		    !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
 			list_del(&mcast->list);
@@ -1781,8 +1930,8 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 			break;
 		}
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+out_put:
+	put_qp_read(qp);
 
 	return ret ? ret : in_len;
 }
@@ -1794,7 +1943,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_srq      cmd;
 	struct ib_uverbs_create_srq_resp resp;
 	struct ib_udata                  udata;
-	struct ib_uevent_object         *uobj;
+	struct ib_uevent_object         *obj;
 	struct ib_pd                    *pd;
 	struct ib_srq                   *srq;
 	struct ib_srq_init_attr          attr;
@@ -1810,17 +1959,17 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
-	if (!uobj)
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
 		return -ENOMEM;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
+	init_uobj(&obj->uobject, 0, file->ucontext);
+	down_write(&obj->uobject.mutex);
 
-	pd  = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
-
-	if (!pd || pd->uobject->context != file->ucontext) {
+	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+	if (!pd) {
 		ret = -EINVAL;
-		goto err_up;
+		goto err;
 	}
 
 	attr.event_handler  = ib_uverbs_srq_event_handler;
@@ -1829,69 +1978,59 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	attr.attr.max_sge   = cmd.max_sge;
 	attr.attr.srq_limit = cmd.srq_limit;
 
-	uobj->uobject.user_handle = cmd.user_handle;
-	uobj->uobject.context     = file->ucontext;
-	uobj->events_reported     = 0;
-	INIT_LIST_HEAD(&uobj->event_list);
+	obj->events_reported     = 0;
+	INIT_LIST_HEAD(&obj->event_list);
 
 	srq = pd->device->create_srq(pd, &attr, &udata);
 	if (IS_ERR(srq)) {
 		ret = PTR_ERR(srq);
-		goto err_up;
+		goto err;
 	}
 
 	srq->device    	   = pd->device;
 	srq->pd        	   = pd;
-	srq->uobject       = &uobj->uobject;
+	srq->uobject       = &obj->uobject;
 	srq->event_handler = attr.event_handler;
 	srq->srq_context   = attr.srq_context;
 	atomic_inc(&pd->usecnt);
 	atomic_set(&srq->usecnt, 0);
 
-	memset(&resp, 0, sizeof resp);
-
-retry:
-	if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_destroy;
-	}
-
-	ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->uobject.id);
-
-	if (ret == -EAGAIN)
-		goto retry;
+	obj->uobject.object = srq;
+	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
 	if (ret)
 		goto err_destroy;
 
-	resp.srq_handle = uobj->uobject.id;
+	memset(&resp, 0, sizeof resp);
+	resp.srq_handle = obj->uobject.id;
 	resp.max_wr     = attr.attr.max_wr;
 	resp.max_sge    = attr.attr.max_sge;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_idr;
+		goto err_copy;
 	}
 
+	put_pd_read(pd);
+
 	mutex_lock(&file->mutex);
-	list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
+	list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
 	mutex_unlock(&file->mutex);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	obj->uobject.live = 1;
+
+	up_write(&obj->uobject.mutex);
 
 	return in_len;
 
-err_idr:
-	idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id);
+err_copy:
+	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
 
 err_destroy:
 	ib_destroy_srq(srq);
-	atomic_dec(&pd->usecnt);
-
-err_up:
-	mutex_unlock(&ib_uverbs_idr_mutex);
 
-	kfree(uobj);
+err:
+	put_uobj_write(&obj->uobject);
 	return ret;
 }
 
@@ -1907,21 +2046,16 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
-	if (!srq || srq->uobject->context != file->ucontext) {
-		ret = -EINVAL;
-		goto out;
-	}
+	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+	if (!srq)
+		return -EINVAL;
 
 	attr.max_wr    = cmd.max_wr;
 	attr.srq_limit = cmd.srq_limit;
 
 	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	put_srq_read(srq);
 
 	return ret ? ret : in_len;
 }
@@ -1942,18 +2076,16 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
+	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+	if (!srq)
+		return -EINVAL;
 
-	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
-	if (srq && srq->uobject->context == file->ucontext)
-		ret = ib_query_srq(srq, &attr);
-	else
-		ret = -EINVAL;
+	ret = ib_query_srq(srq, &attr);
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
+	put_srq_read(srq);
 
 	if (ret)
-		goto out;
+		return ret;
 
 	memset(&resp, 0, sizeof resp);
 
@@ -1963,10 +2095,9 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
-		ret = -EFAULT;
+		return -EFAULT;
 
-out:
-	return ret ? ret : in_len;
+	return in_len;
 }
 
 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
@@ -1975,45 +2106,45 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_destroy_srq      cmd;
 	struct ib_uverbs_destroy_srq_resp resp;
+	struct ib_uobject		 *uobj;
 	struct ib_srq               	 *srq;
-	struct ib_uevent_object        	 *uobj;
+	struct ib_uevent_object        	 *obj;
 	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
-	memset(&resp, 0, sizeof resp);
+	uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
+	if (!uobj)
+		return -EINVAL;
+	srq = uobj->object;
+	obj = container_of(uobj, struct ib_uevent_object, uobject);
 
-	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
-	if (!srq || srq->uobject->context != file->ucontext)
-		goto out;
+	ret = ib_destroy_srq(srq);
+	if (!ret)
+		uobj->live = 0;
 
-	uobj = container_of(srq->uobject, struct ib_uevent_object, uobject);
+	put_uobj_write(uobj);
 
-	ret = ib_destroy_srq(srq);
 	if (ret)
-		goto out;
+		return ret;
 
-	idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle);
+	idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 
 	mutex_lock(&file->mutex);
-	list_del(&uobj->uobject.list);
+	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
-	ib_uverbs_release_uevent(file, uobj);
+	ib_uverbs_release_uevent(file, obj);
 
-	resp.events_reported = uobj->events_reported;
+	memset(&resp, 0, sizeof resp);
+	resp.events_reported = obj->events_reported;
 
-	kfree(uobj);
+	put_uobj(uobj);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
 
-out:
-	mutex_unlock(&ib_uverbs_idr_mutex);
-
 	return ret ? ret : in_len;
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ff092a0a94da0ebf77967dbeef73da9d2943ec8c..5ec2d49e9bb6ebae15bd88da1206e94d7f6eb860 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -66,7 +66,7 @@ enum {
 
 static struct class *uverbs_class;
 
-DEFINE_MUTEX(ib_uverbs_idr_mutex);
+DEFINE_SPINLOCK(ib_uverbs_idr_lock);
 DEFINE_IDR(ib_uverbs_pd_idr);
 DEFINE_IDR(ib_uverbs_mr_idr);
 DEFINE_IDR(ib_uverbs_mw_idr);
@@ -183,21 +183,21 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	if (!context)
 		return 0;
 
-	mutex_lock(&ib_uverbs_idr_mutex);
-
 	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
-		struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
-		idr_remove(&ib_uverbs_ah_idr, uobj->id);
+		struct ib_ah *ah = uobj->object;
+
+		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 		ib_destroy_ah(ah);
 		list_del(&uobj->list);
 		kfree(uobj);
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
-		struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+		struct ib_qp *qp = uobj->object;
 		struct ib_uqp_object *uqp =
 			container_of(uobj, struct ib_uqp_object, uevent.uobject);
-		idr_remove(&ib_uverbs_qp_idr, uobj->id);
+
+		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 		ib_uverbs_detach_umcast(qp, uqp);
 		ib_destroy_qp(qp);
 		list_del(&uobj->list);
@@ -206,11 +206,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
-		struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+		struct ib_cq *cq = uobj->object;
 		struct ib_uverbs_event_file *ev_file = cq->cq_context;
 		struct ib_ucq_object *ucq =
 			container_of(uobj, struct ib_ucq_object, uobject);
-		idr_remove(&ib_uverbs_cq_idr, uobj->id);
+
+		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
 		ib_destroy_cq(cq);
 		list_del(&uobj->list);
 		ib_uverbs_release_ucq(file, ev_file, ucq);
@@ -218,10 +219,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
-		struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id);
+		struct ib_srq *srq = uobj->object;
 		struct ib_uevent_object *uevent =
 			container_of(uobj, struct ib_uevent_object, uobject);
-		idr_remove(&ib_uverbs_srq_idr, uobj->id);
+
+		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 		ib_destroy_srq(srq);
 		list_del(&uobj->list);
 		ib_uverbs_release_uevent(file, uevent);
@@ -231,11 +233,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	/* XXX Free MWs */
 
 	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
-		struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
+		struct ib_mr *mr = uobj->object;
 		struct ib_device *mrdev = mr->device;
 		struct ib_umem_object *memobj;
 
-		idr_remove(&ib_uverbs_mr_idr, uobj->id);
+		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 		ib_dereg_mr(mr);
 
 		memobj = container_of(uobj, struct ib_umem_object, uobject);
@@ -246,15 +248,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
-		struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id);
-		idr_remove(&ib_uverbs_pd_idr, uobj->id);
+		struct ib_pd *pd = uobj->object;
+
+		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
 		ib_dealloc_pd(pd);
 		list_del(&uobj->list);
 		kfree(uobj);
 	}
 
-	mutex_unlock(&ib_uverbs_idr_mutex);
-
 	return context->device->dealloc_ucontext(context);
 }
 
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
new file mode 100644
index 0000000000000000000000000000000000000000..ce46b13ae02be96157b983f1b713bfd7a77db2ac
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_marshall.h>
+
+static void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
+				    struct ib_ah_attr *src)
+{
+	memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
+	dst->grh.flow_label        = src->grh.flow_label;
+	dst->grh.sgid_index        = src->grh.sgid_index;
+	dst->grh.hop_limit         = src->grh.hop_limit;
+	dst->grh.traffic_class     = src->grh.traffic_class;
+	dst->dlid 	    	   = src->dlid;
+	dst->sl   	    	   = src->sl;
+	dst->src_path_bits 	   = src->src_path_bits;
+	dst->static_rate   	   = src->static_rate;
+	dst->is_global             = src->ah_flags & IB_AH_GRH ? 1 : 0;
+	dst->port_num 	    	   = src->port_num;
+}
+
+void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+			     struct ib_qp_attr *src)
+{
+	dst->cur_qp_state	= src->cur_qp_state;
+	dst->path_mtu		= src->path_mtu;
+	dst->path_mig_state	= src->path_mig_state;
+	dst->qkey		= src->qkey;
+	dst->rq_psn		= src->rq_psn;
+	dst->sq_psn		= src->sq_psn;
+	dst->dest_qp_num	= src->dest_qp_num;
+	dst->qp_access_flags	= src->qp_access_flags;
+
+	dst->max_send_wr	= src->cap.max_send_wr;
+	dst->max_recv_wr	= src->cap.max_recv_wr;
+	dst->max_send_sge	= src->cap.max_send_sge;
+	dst->max_recv_sge	= src->cap.max_recv_sge;
+	dst->max_inline_data	= src->cap.max_inline_data;
+
+	ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
+	ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
+
+	dst->pkey_index		= src->pkey_index;
+	dst->alt_pkey_index	= src->alt_pkey_index;
+	dst->en_sqd_async_notify = src->en_sqd_async_notify;
+	dst->sq_draining	= src->sq_draining;
+	dst->max_rd_atomic	= src->max_rd_atomic;
+	dst->max_dest_rd_atomic	= src->max_dest_rd_atomic;
+	dst->min_rnr_timer	= src->min_rnr_timer;
+	dst->port_num		= src->port_num;
+	dst->timeout		= src->timeout;
+	dst->retry_cnt		= src->retry_cnt;
+	dst->rnr_retry		= src->rnr_retry;
+	dst->alt_port_num	= src->alt_port_num;
+	dst->alt_timeout	= src->alt_timeout;
+}
+EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+			      struct ib_sa_path_rec *src)
+{
+	memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
+	memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+
+	dst->dlid		= src->dlid;
+	dst->slid		= src->slid;
+	dst->raw_traffic	= src->raw_traffic;
+	dst->flow_label		= src->flow_label;
+	dst->hop_limit		= src->hop_limit;
+	dst->traffic_class	= src->traffic_class;
+	dst->reversible		= src->reversible;
+	dst->numb_path		= src->numb_path;
+	dst->pkey		= src->pkey;
+	dst->sl			= src->sl;
+	dst->mtu_selector	= src->mtu_selector;
+	dst->mtu		= src->mtu;
+	dst->rate_selector	= src->rate_selector;
+	dst->rate		= src->rate;
+	dst->packet_life_time	= src->packet_life_time;
+	dst->preference		= src->preference;
+	dst->packet_life_time_selector = src->packet_life_time_selector;
+}
+EXPORT_SYMBOL(ib_copy_path_rec_to_user);
+
+void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+				struct ib_user_path_rec *src)
+{
+	memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
+	memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
+
+	dst->dlid		= src->dlid;
+	dst->slid		= src->slid;
+	dst->raw_traffic	= src->raw_traffic;
+	dst->flow_label		= src->flow_label;
+	dst->hop_limit		= src->hop_limit;
+	dst->traffic_class	= src->traffic_class;
+	dst->reversible		= src->reversible;
+	dst->numb_path		= src->numb_path;
+	dst->pkey		= src->pkey;
+	dst->sl			= src->sl;
+	dst->mtu_selector	= src->mtu_selector;
+	dst->mtu		= src->mtu;
+	dst->rate_selector	= src->rate_selector;
+	dst->rate		= src->rate;
+	dst->packet_life_time	= src->packet_life_time;
+	dst->preference		= src->preference;
+	dst->packet_life_time_selector = src->packet_life_time_selector;
+}
+EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b78e7dc6933072c21f4bd943960a9b79f6e3393f..468999c388033e83f4668f4f43e187f911f7a3ec 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -125,35 +125,47 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
-struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
-				   struct ib_grh *grh, u8 port_num)
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
+		       struct ib_grh *grh, struct ib_ah_attr *ah_attr)
 {
-	struct ib_ah_attr ah_attr;
 	u32 flow_class;
 	u16 gid_index;
 	int ret;
 
-	memset(&ah_attr, 0, sizeof ah_attr);
-	ah_attr.dlid = wc->slid;
-	ah_attr.sl = wc->sl;
-	ah_attr.src_path_bits = wc->dlid_path_bits;
-	ah_attr.port_num = port_num;
+	memset(ah_attr, 0, sizeof *ah_attr);
+	ah_attr->dlid = wc->slid;
+	ah_attr->sl = wc->sl;
+	ah_attr->src_path_bits = wc->dlid_path_bits;
+	ah_attr->port_num = port_num;
 
 	if (wc->wc_flags & IB_WC_GRH) {
-		ah_attr.ah_flags = IB_AH_GRH;
-		ah_attr.grh.dgid = grh->sgid;
+		ah_attr->ah_flags = IB_AH_GRH;
+		ah_attr->grh.dgid = grh->sgid;
 
-		ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num,
+		ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
 					 &gid_index);
 		if (ret)
-			return ERR_PTR(ret);
+			return ret;
 
-		ah_attr.grh.sgid_index = (u8) gid_index;
+		ah_attr->grh.sgid_index = (u8) gid_index;
 		flow_class = be32_to_cpu(grh->version_tclass_flow);
-		ah_attr.grh.flow_label = flow_class & 0xFFFFF;
-		ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF;
-		ah_attr.grh.hop_limit = grh->hop_limit;
+		ah_attr->grh.flow_label = flow_class & 0xFFFFF;
+		ah_attr->grh.hop_limit = grh->hop_limit;
+		ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
 	}
+	return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_wc);
+
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
+				   struct ib_grh *grh, u8 port_num)
+{
+	struct ib_ah_attr ah_attr;
+	int ret;
+
+	ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+	if (ret)
+		return ERR_PTR(ret);
 
 	return ib_create_ah(pd, &ah_attr);
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index f7f8391fe43fc21d714144991c76dab31c2f20d1..1a9d0a2c33c37f6aa4d158f1505dc501548486ad 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -137,47 +137,11 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
 	return reply(smp);
 }
 
-struct port_info {
-	__be64 mkey;
-	__be64 gid_prefix;
-	__be16 lid;
-	__be16 sm_lid;
-	__be32 cap_mask;
-	__be16 diag_code;
-	__be16 mkey_lease_period;
-	u8 local_port_num;
-	u8 link_width_enabled;
-	u8 link_width_supported;
-	u8 link_width_active;
-	u8 linkspeed_portstate;			/* 4 bits, 4 bits */
-	u8 portphysstate_linkdown;		/* 4 bits, 4 bits */
-	u8 mkeyprot_resv_lmc;			/* 2 bits, 3, 3 */
-	u8 linkspeedactive_enabled;		/* 4 bits, 4 bits */
-	u8 neighbormtu_mastersmsl;		/* 4 bits, 4 bits */
-	u8 vlcap_inittype;			/* 4 bits, 4 bits */
-	u8 vl_high_limit;
-	u8 vl_arb_high_cap;
-	u8 vl_arb_low_cap;
-	u8 inittypereply_mtucap;		/* 4 bits, 4 bits */
-	u8 vlstallcnt_hoqlife;			/* 3 bits, 5 bits */
-	u8 operationalvl_pei_peo_fpi_fpo;	/* 4 bits, 1, 1, 1, 1 */
-	__be16 mkey_violations;
-	__be16 pkey_violations;
-	__be16 qkey_violations;
-	u8 guid_cap;
-	u8 clientrereg_resv_subnetto;		/* 1 bit, 2 bits, 5 */
-	u8 resv_resptimevalue;			/* 3 bits, 5 bits */
-	u8 localphyerrors_overrunerrors;	/* 4 bits, 4 bits */
-	__be16 max_credit_hint;
-	u8 resv;
-	u8 link_roundtrip_latency[3];
-} __attribute__ ((packed));
-
 static int recv_subn_get_portinfo(struct ib_smp *smp,
 				  struct ib_device *ibdev, u8 port)
 {
 	struct ipath_ibdev *dev;
-	struct port_info *pip = (struct port_info *)smp->data;
+	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
 	u16 lid;
 	u8 ibcstat;
 	u8 mtu;
@@ -312,7 +276,7 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
 static int recv_subn_set_portinfo(struct ib_smp *smp,
 				  struct ib_device *ibdev, u8 port)
 {
-	struct port_info *pip = (struct port_info *)smp->data;
+	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
 	struct ib_event event;
 	struct ipath_ibdev *dev;
 	u32 flags;
@@ -445,7 +409,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 
 	if (pip->clientrereg_resv_subnetto & 0x80) {
 		clientrereg = 1;
-		event.event = IB_EVENT_LID_CHANGE;
+		event.event = IB_EVENT_CLIENT_REREGISTER;
 		ib_dispatch_event(&event);
 	}
 
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 798e13e14faf826695d8b64a0493a5580e24bd73..d0f7731802c9acede3b220e730ab31ba9873cc24 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -174,7 +174,6 @@ enum {
 
 struct mthca_cmd_context {
 	struct completion done;
-	struct timer_list timer;
 	int               result;
 	int               next;
 	u64               out_param;
@@ -362,15 +361,6 @@ void mthca_cmd_event(struct mthca_dev *dev,
 	complete(&context->done);
 }
 
-static void event_timeout(unsigned long context_ptr)
-{
-	struct mthca_cmd_context *context =
-		(struct mthca_cmd_context *) context_ptr;
-
-	context->result = -EBUSY;
-	complete(&context->done);
-}
-
 static int mthca_cmd_wait(struct mthca_dev *dev,
 			  u64 in_param,
 			  u64 *out_param,
@@ -401,11 +391,10 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
 	if (err)
 		goto out;
 
-	context->timer.expires  = jiffies + timeout;
-	add_timer(&context->timer);
-
-	wait_for_completion(&context->done);
-	del_timer_sync(&context->timer);
+	if (!wait_for_completion_timeout(&context->done, timeout)) {
+		err = -EBUSY;
+		goto out;
+	}
 
 	err = context->result;
 	if (err)
@@ -535,10 +524,6 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
 	for (i = 0; i < dev->cmd.max_cmds; ++i) {
 		dev->cmd.context[i].token = i;
 		dev->cmd.context[i].next = i + 1;
-		init_timer(&dev->cmd.context[i].timer);
-		dev->cmd.context[i].timer.data     =
-			(unsigned long) &dev->cmd.context[i];
-		dev->cmd.context[i].timer.function = event_timeout;
 	}
 
 	dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 205854e9c662809b49576b1ea8c3eb213e44ce72..3e27a084257e9f23b7a4fba49561e15cfddf5b91 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -540,8 +540,17 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
 		entry->wr_id = srq->wrid[wqe_index];
 		mthca_free_srq_wqe(srq, wqe);
 	} else {
+		s32 wqe;
 		wq = &(*cur_qp)->rq;
-		wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift;
+		wqe = be32_to_cpu(cqe->wqe);
+		wqe_index = wqe >> wq->wqe_shift;
+               /*
+		* WQE addr == base - 1 might be reported in receive completion
+		* with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+		* Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
+		*/
+		if (unlikely(wqe_index < 0))
+			wqe_index = wq->max - 1;
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
 	}
 
@@ -813,6 +822,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 	spin_lock_init(&cq->lock);
 	cq->refcount = 1;
 	init_waitqueue_head(&cq->wait);
+	mutex_init(&cq->mutex);
 
 	memset(cq_context, 0, sizeof *cq_context);
 	cq_context->flags           = cpu_to_be32(MTHCA_CQ_STATUS_OK      |
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 99f109c3815d86255d0e6c607ebb742aec1c287e..d536217e700ea46f8acec32f2830fbd965b25af2 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -695,10 +695,6 @@ static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
 
 static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
 {
-	unsigned long mthca_base;
-
-	mthca_base = pci_resource_start(dev->pdev, 0);
-
 	if (mthca_is_memfree(dev)) {
 		/*
 		 * We assume that the EQ arm and EQ set CI registers
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 4730863ece9a398c89e62c366d2f112d81b509f4..d9bc030bcccc78627f434a226f199110dafefa15 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -114,14 +114,22 @@ static void smp_snoop(struct ib_device *ibdev,
 	     mad->mad_hdr.mgmt_class  == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
 	    mad->mad_hdr.method     == IB_MGMT_METHOD_SET) {
 		if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+			struct ib_port_info *pinfo =
+				(struct ib_port_info *) ((struct ib_smp *) mad)->data;
+
 			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
-				     be16_to_cpup((__be16 *) (mad->data + 58)),
-				     (*(u8 *) (mad->data + 76)) & 0xf);
+				     be16_to_cpu(pinfo->lid),
+				     pinfo->neighbormtu_mastersmsl & 0xf);
 
 			event.device           = ibdev;
-			event.event            = IB_EVENT_LID_CHANGE;
 			event.element.port_num = port_num;
+
+			if(pinfo->clientrereg_resv_subnetto & 0x80)
+				event.event    = IB_EVENT_CLIENT_REREGISTER;
+			else
+				event.event    = IB_EVENT_LID_CHANGE;
+
 			ib_dispatch_event(&event);
 		}
 
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index a2eae8a30167e3d9cc076b6885028c54485a8285..230ae21db8fd3c15a2c8664443987efec352912e 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -115,6 +115,16 @@ static int mthca_query_device(struct ib_device *ibdev,
 	props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 					   props->max_mcast_grp;
+	/*
+	 * If Sinai memory key optimization is being used, then only
+	 * the 8-bit key portion will change.  For other HCAs, the
+	 * unused index bits will also be used for FMR remapping.
+	 */
+	if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+		props->max_map_per_fmr = 255;
+	else
+		props->max_map_per_fmr =
+			(1 << (32 - long_log2(mdev->limits.num_mpts))) - 1;
 
 	err = 0;
  out:
@@ -783,18 +793,24 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
 	if (entries < 1 || entries > dev->limits.max_cqes)
 		return -EINVAL;
 
+	mutex_lock(&cq->mutex);
+
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries == ibcq->cqe + 1)
-		return 0;
+	if (entries == ibcq->cqe + 1) {
+		ret = 0;
+		goto out;
+	}
 
 	if (cq->is_kernel) {
 		ret = mthca_alloc_resize_buf(dev, cq, entries);
 		if (ret)
-			return ret;
+			goto out;
 		lkey = cq->resize_buf->buf.mr.ibmr.lkey;
 	} else {
-		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
-			return -EFAULT;
+		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+			ret = -EFAULT;
+			goto out;
+		}
 		lkey = ucmd.lkey;
 	}
 
@@ -811,7 +827,7 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
 			cq->resize_buf = NULL;
 			spin_unlock_irq(&cq->lock);
 		}
-		return ret;
+		goto out;
 	}
 
 	if (cq->is_kernel) {
@@ -838,7 +854,10 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
 	} else
 		ibcq->cqe = entries - 1;
 
-	return 0;
+out:
+	mutex_unlock(&cq->mutex);
+
+	return ret;
 }
 
 static int mthca_destroy_cq(struct ib_cq *cq)
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 179a8f610d0f00ded795bfa0b07310a9ac1fc986..8de2887ba15ce090ef9ea4297a634c5067244aaf 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -214,6 +214,7 @@ struct mthca_cq {
 	int			arm_sn;
 
 	wait_queue_head_t	wait;
+	struct mutex		mutex;
 };
 
 struct mthca_srq {
@@ -237,6 +238,7 @@ struct mthca_srq {
 	struct mthca_mr		mr;
 
 	wait_queue_head_t	wait;
+	struct mutex		mutex;
 };
 
 struct mthca_wq {
@@ -278,6 +280,7 @@ struct mthca_qp {
 	union mthca_buf	       queue;
 
 	wait_queue_head_t      wait;
+	struct mutex	       mutex;
 };
 
 struct mthca_sqp {
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 07c13be07a4a5da858a975fe6d11059b0da3a35b..16c387d8170cc599b8b2b16aa8d0275672bbe920 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -534,7 +534,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	struct mthca_qp_context *qp_context;
 	u32 sqd_event = 0;
 	u8 status;
-	int err;
+	int err = -EINVAL;
+
+	mutex_lock(&qp->mutex);
 
 	if (attr_mask & IB_QP_CUR_STATE) {
 		cur_state = attr->cur_qp_state;
@@ -553,39 +555,41 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 			  "%d->%d with attr 0x%08x\n",
 			  qp->transport, cur_state, new_state,
 			  attr_mask);
-		return -EINVAL;
+		goto out;
 	}
 
 	if ((attr_mask & IB_QP_PKEY_INDEX) &&
 	     attr->pkey_index >= dev->limits.pkey_table_len) {
 		mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
 			  attr->pkey_index, dev->limits.pkey_table_len-1);
-		return -EINVAL;
+		goto out;
 	}
 
 	if ((attr_mask & IB_QP_PORT) &&
 	    (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
 		mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
-		return -EINVAL;
+		goto out;
 	}
 
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
 	    attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
 		mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
 			  attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
-		return -EINVAL;
+		goto out;
 	}
 
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
 	    attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
 		mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
 			  attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
-		return -EINVAL;
+		goto out;
 	}
 
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
+	if (IS_ERR(mailbox)) {
+		err = PTR_ERR(mailbox);
+		goto out;
+	}
 	qp_param = mailbox->buf;
 	qp_context = &qp_param->context;
 	memset(qp_param, 0, sizeof *qp_param);
@@ -618,7 +622,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
 			mthca_dbg(dev, "path MTU (%u) is invalid\n",
 				  attr->path_mtu);
-			return -EINVAL;
+			goto out_mailbox;
 		}
 		qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
 	}
@@ -672,7 +676,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	if (attr_mask & IB_QP_AV) {
 		if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
 				   attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
-			return -EINVAL;
+			goto out_mailbox;
 
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
 	}
@@ -686,18 +690,18 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
 			mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
 				  attr->alt_pkey_index, dev->limits.pkey_table_len-1);
-			return -EINVAL;
+			goto out_mailbox;
 		}
 
 		if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
 			mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
 				attr->alt_port_num);
-			return -EINVAL;
+			goto out_mailbox;
 		}
 
 		if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
 				   attr->alt_ah_attr.port_num))
-			return -EINVAL;
+			goto out_mailbox;
 
 		qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
 							      attr->alt_port_num << 24);
@@ -793,12 +797,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
 			      mailbox, sqd_event, &status);
 	if (err)
-		goto out;
+		goto out_mailbox;
 	if (status) {
 		mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
 			   cur_state, new_state, status);
 		err = -EINVAL;
-		goto out;
+		goto out_mailbox;
 	}
 
 	qp->state = new_state;
@@ -853,8 +857,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		}
 	}
 
-out:
+out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
+
+out:
+	mutex_unlock(&qp->mutex);
 	return err;
 }
 
@@ -1100,6 +1107,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
 
 	qp->refcount = 1;
 	init_waitqueue_head(&qp->wait);
+	mutex_init(&qp->mutex);
 	qp->state    	 = IB_QPS_RESET;
 	qp->atomic_rd_en = 0;
 	qp->resp_depth   = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index df5e494a9d3887ca9b71c9cd8721c7ffdaea0d54..f4fddd5327f534039bd0de8b6e0c5cce91ea4c3d 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -49,6 +49,12 @@ int mthca_reset(struct mthca_dev *mdev)
 	u32 *hca_header    = NULL;
 	u32 *bridge_header = NULL;
 	struct pci_dev *bridge = NULL;
+	int bridge_pcix_cap = 0;
+	int hca_pcie_cap = 0;
+	int hca_pcix_cap = 0;
+
+	u16 devctl;
+	u16 linkctl;
 
 #define MTHCA_RESET_OFFSET 0xf0010
 #define MTHCA_RESET_VALUE  swab32(1)
@@ -110,6 +116,9 @@ int mthca_reset(struct mthca_dev *mdev)
 		}
 	}
 
+	hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
+	hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
+
 	if (bridge) {
 		bridge_header = kmalloc(256, GFP_KERNEL);
 		if (!bridge_header) {
@@ -129,6 +138,13 @@ int mthca_reset(struct mthca_dev *mdev)
 				goto out;
 			}
 		}
+		bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
+		if (!bridge_pcix_cap) {
+				err = -ENODEV;
+				mthca_err(mdev, "Couldn't locate HCA bridge "
+					  "PCI-X capability, aborting.\n");
+				goto out;
+		}
 	}
 
 	/* actually hit reset */
@@ -178,6 +194,20 @@ int mthca_reset(struct mthca_dev *mdev)
 good:
 	/* Now restore the PCI headers */
 	if (bridge) {
+		if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
+				 bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
+			err = -ENODEV;
+			mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
+				  "split transaction control, aborting.\n");
+			goto out;
+		}
+		if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
+				 bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
+			err = -ENODEV;
+			mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
+				  "split transaction control, aborting.\n");
+			goto out;
+		}
 		/*
 		 * Bridge control register is at 0x3e, so we'll
 		 * naturally restore it last in this loop.
@@ -203,6 +233,35 @@ good:
 		}
 	}
 
+	if (hca_pcix_cap) {
+		if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
+				 hca_header[hca_pcix_cap / 4])) {
+			err = -ENODEV;
+			mthca_err(mdev, "Couldn't restore HCA PCI-X "
+				  "command register, aborting.\n");
+			goto out;
+		}
+	}
+
+	if (hca_pcie_cap) {
+		devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
+		if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
+					   devctl)) {
+			err = -ENODEV;
+			mthca_err(mdev, "Couldn't restore HCA PCI Express "
+				  "Device Control register, aborting.\n");
+			goto out;
+		}
+		linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
+		if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
+					   linkctl)) {
+			err = -ENODEV;
+			mthca_err(mdev, "Couldn't restore HCA PCI Express "
+				  "Link control register, aborting.\n");
+			goto out;
+		}
+	}
+
 	for (i = 0; i < 16; ++i) {
 		if (i * 4 == PCI_COMMAND)
 			continue;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b292fefa3b411924ea29d693da19f75bf5187ffd..fab417c5cf43671f329b9b8c26dddc87bd9345ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -243,6 +243,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 	spin_lock_init(&srq->lock);
 	srq->refcount = 1;
 	init_waitqueue_head(&srq->wait);
+	mutex_init(&srq->mutex);
 
 	if (mthca_is_memfree(dev))
 		mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
@@ -371,7 +372,11 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 	if (attr_mask & IB_SRQ_LIMIT) {
 		if (attr->srq_limit > srq->max)
 			return -EINVAL;
+
+		mutex_lock(&srq->mutex);
 		ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+		mutex_unlock(&srq->mutex);
+
 		if (ret)
 			return ret;
 		if (status)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 12a1e0572ef208fd341aad093aa45d4a29f0fce0..491d2afaf5b499cc8089cd9f81d25a49d6001f1e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -272,8 +272,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
 
 void ipoib_mcast_join_task(void *dev_ptr);
-void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
-		      struct sk_buff *skb);
+void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(void *dev_ptr);
 int ipoib_mcast_start_thread(struct net_device *dev);
@@ -369,15 +368,26 @@ extern int ipoib_debug_level;
 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */
 
 
-#define IPOIB_GID_FMT		"%x:%x:%x:%x:%x:%x:%x:%x"
-
-#define IPOIB_GID_ARG(gid)	be16_to_cpup((__be16 *) ((gid).raw +  0)), \
-				be16_to_cpup((__be16 *) ((gid).raw +  2)), \
-				be16_to_cpup((__be16 *) ((gid).raw +  4)), \
-				be16_to_cpup((__be16 *) ((gid).raw +  6)), \
-				be16_to_cpup((__be16 *) ((gid).raw +  8)), \
-				be16_to_cpup((__be16 *) ((gid).raw + 10)), \
-				be16_to_cpup((__be16 *) ((gid).raw + 12)), \
-				be16_to_cpup((__be16 *) ((gid).raw + 14))
+#define IPOIB_GID_FMT		"%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:" \
+				"%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x"
+
+#define IPOIB_GID_RAW_ARG(gid)	((u8 *)(gid))[0], \
+				((u8 *)(gid))[1], \
+				((u8 *)(gid))[2], \
+				((u8 *)(gid))[3], \
+				((u8 *)(gid))[4], \
+				((u8 *)(gid))[5], \
+				((u8 *)(gid))[6], \
+				((u8 *)(gid))[7], \
+				((u8 *)(gid))[8], \
+				((u8 *)(gid))[9], \
+				((u8 *)(gid))[10],\
+				((u8 *)(gid))[11],\
+				((u8 *)(gid))[12],\
+				((u8 *)(gid))[13],\
+				((u8 *)(gid))[14],\
+				((u8 *)(gid))[15]
+
+#define IPOIB_GID_ARG(gid)	IPOIB_GID_RAW_ARG((gid).raw)
 
 #endif /* _IPOIB_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8406839b91cf3879bff26215f08cd9addc7ab312..5033666b14817e75488b1859e7b217d41f653f8b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -84,15 +84,9 @@ void ipoib_free_ah(struct kref *kref)
 
 	unsigned long flags;
 
-	if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
-		ipoib_dbg(priv, "Freeing ah %p\n", ah->ah);
-		ib_destroy_ah(ah->ah);
-		kfree(ah);
-	} else {
-		spin_lock_irqsave(&priv->lock, flags);
-		list_add_tail(&ah->list, &priv->dead_ahs);
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
+	spin_lock_irqsave(&priv->lock, flags);
+	list_add_tail(&ah->list, &priv->dead_ahs);
+	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -377,19 +371,16 @@ static void __ipoib_reap_ah(struct net_device *dev)
 	struct ipoib_ah *ah, *tah;
 	LIST_HEAD(remove_list);
 
-	spin_lock_irq(&priv->lock);
+	spin_lock_irq(&priv->tx_lock);
+	spin_lock(&priv->lock);
 	list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
 		if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
 			list_del(&ah->list);
-			list_add_tail(&ah->list, &remove_list);
+			ib_destroy_ah(ah->ah);
+			kfree(ah);
 		}
-	spin_unlock_irq(&priv->lock);
-
-	list_for_each_entry_safe(ah, tah, &remove_list, list) {
-		ipoib_dbg(priv, "Reaping ah %p\n", ah->ah);
-		ib_destroy_ah(ah->ah);
-		kfree(ah);
-	}
+	spin_unlock(&priv->lock);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 void ipoib_reap_ah(void *dev_ptr)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cb078a7d0bf5b86551adf812fbc93724883616f8..1c6ea1c682a5dc7c4bd5bed14f67307ef27aa1a1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -185,8 +185,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static struct ipoib_path *__path_find(struct net_device *dev,
-				      union ib_gid *gid)
+static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct rb_node *n = priv->path_tree.rb_node;
@@ -196,7 +195,7 @@ static struct ipoib_path *__path_find(struct net_device *dev,
 	while (n) {
 		path = rb_entry(n, struct ipoib_path, rb_node);
 
-		ret = memcmp(gid->raw, path->pathrec.dgid.raw,
+		ret = memcmp(gid, path->pathrec.dgid.raw,
 			     sizeof (union ib_gid));
 
 		if (ret < 0)
@@ -424,8 +423,7 @@ static void path_rec_completion(int status,
 	}
 }
 
-static struct ipoib_path *path_rec_create(struct net_device *dev,
-					  union ib_gid *gid)
+static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_path *path;
@@ -440,7 +438,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev,
 
 	INIT_LIST_HEAD(&path->neigh_list);
 
-	memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid));
+	memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
 	path->pathrec.sgid      = priv->local_gid;
 	path->pathrec.pkey      = cpu_to_be16(priv->pkey);
 	path->pathrec.numb_path = 1;
@@ -498,10 +496,9 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 	 */
 	spin_lock(&priv->lock);
 
-	path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4));
+	path = __path_find(dev, skb->dst->neighbour->ha + 4);
 	if (!path) {
-		path = path_rec_create(dev,
-				       (union ib_gid *) (skb->dst->neighbour->ha + 4));
+		path = path_rec_create(dev, skb->dst->neighbour->ha + 4);
 		if (!path)
 			goto err_path;
 
@@ -551,7 +548,7 @@ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
 	/* Add in the P_Key for multicasts */
 	skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
 	skb->dst->neighbour->ha[9] = priv->pkey & 0xff;
-	ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb);
+	ipoib_mcast_send(dev, skb->dst->neighbour->ha + 4, skb);
 }
 
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -566,10 +563,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 	 */
 	spin_lock(&priv->lock);
 
-	path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4));
+	path = __path_find(dev, phdr->hwaddr + 4);
 	if (!path) {
-		path = path_rec_create(dev,
-				       (union ib_gid *) (phdr->hwaddr + 4));
+		path = path_rec_create(dev, phdr->hwaddr + 4);
 		if (path) {
 			/* put pseudoheader back on for next time */
 			skb_push(skb, sizeof *phdr);
@@ -660,7 +656,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
 			phdr->hwaddr[9] = priv->pkey & 0xff;
 
-			ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
+			ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
 		} else {
 			/* unicast GID -- should be ARP or RARP reply */
 
@@ -671,7 +667,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 					   skb->dst ? "neigh" : "dst",
 					   be16_to_cpup((__be16 *) skb->data),
 					   be32_to_cpup((__be32 *) phdr->hwaddr),
-					   IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
+					   IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
 				dev_kfree_skb_any(skb);
 				++priv->stats.tx_dropped;
 				goto out;
@@ -754,7 +750,7 @@ static void ipoib_neigh_destructor(struct neighbour *n)
 	ipoib_dbg(priv,
 		  "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
 		  be32_to_cpup((__be32 *) n->ha),
-		  IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4))));
+		  IPOIB_GID_RAW_ARG(n->ha + 4));
 
 	spin_lock_irqsave(&priv->lock, flags);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 1d917edcf9ba99ea3e2de4e33700ba4e05893c53..216471fa01cc48fed536e16acf817d9b88383886 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -154,7 +154,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 	return mcast;
 }
 
-static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct rb_node *n = priv->multicast_tree.rb_node;
@@ -165,7 +165,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_g
 
 		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
 
-		ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw,
+		ret = memcmp(mgid, mcast->mcmember.mgid.raw,
 			     sizeof (union ib_gid));
 		if (ret < 0)
 			n = n->rb_left;
@@ -694,8 +694,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 	return 0;
 }
 
-void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
-		      struct sk_buff *skb)
+void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_mcast *mcast;
@@ -718,7 +717,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
 	if (!mcast) {
 		/* Let's create a new send only group now */
 		ipoib_dbg_mcast(priv, "setting up send only multicast group for "
-				IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid));
+				IPOIB_GID_FMT "\n", IPOIB_GID_RAW_ARG(mgid));
 
 		mcast = ipoib_mcast_alloc(dev, 0);
 		if (!mcast) {
@@ -730,7 +729,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
 		}
 
 		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
-		mcast->mcmember.mgid = *mgid;
+		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
 		__ipoib_mcast_add(dev, mcast);
 		list_add_tail(&mcast->list, &priv->multicast_list);
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 1d49d1643c5943246961a02c60b5acce7d8cd8f3..7b717c648f727bb52ac8d33fcb5bcb6de9cefe5e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -255,7 +255,8 @@ void ipoib_event(struct ib_event_handler *handler,
 	    record->event == IB_EVENT_PKEY_CHANGE ||
 	    record->event == IB_EVENT_PORT_ACTIVE ||
 	    record->event == IB_EVENT_LID_CHANGE  ||
-	    record->event == IB_EVENT_SM_CHANGE) {
+	    record->event == IB_EVENT_SM_CHANGE   ||
+	    record->event == IB_EVENT_CLIENT_REREGISTER) {
 		ipoib_dbg(priv, "Port state change event\n");
 		queue_work(ipoib_workqueue, &priv->flush_task);
 	}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9cbdffa08dc2bca6da821638efc0078b24792374..4e22afef72062571c12b3489bc9e26fa998fce43 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -62,6 +62,13 @@ MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
 		   "v" DRV_VERSION " (" DRV_RELDATE ")");
 MODULE_LICENSE("Dual BSD/GPL");
 
+static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE;
+static int srp_max_iu_len;
+
+module_param(srp_sg_tablesize, int, 0444);
+MODULE_PARM_DESC(srp_sg_tablesize,
+		 "Max number of gather/scatter entries per I/O (default is 12)");
+
 static int topspin_workarounds = 1;
 
 module_param(topspin_workarounds, int, 0444);
@@ -105,7 +112,8 @@ static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
 	if (!iu->buf)
 		goto out_free_iu;
 
-	iu->dma = dma_map_single(host->dev->dma_device, iu->buf, size, direction);
+	iu->dma = dma_map_single(host->dev->dev->dma_device,
+				 iu->buf, size, direction);
 	if (dma_mapping_error(iu->dma))
 		goto out_free_buf;
 
@@ -127,7 +135,8 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
 	if (!iu)
 		return;
 
-	dma_unmap_single(host->dev->dma_device, iu->dma, iu->size, iu->direction);
+	dma_unmap_single(host->dev->dev->dma_device,
+			 iu->dma, iu->size, iu->direction);
 	kfree(iu->buf);
 	kfree(iu);
 }
@@ -147,7 +156,7 @@ static int srp_init_qp(struct srp_target_port *target,
 	if (!attr)
 		return -ENOMEM;
 
-	ret = ib_find_cached_pkey(target->srp_host->dev,
+	ret = ib_find_cached_pkey(target->srp_host->dev->dev,
 				  target->srp_host->port,
 				  be16_to_cpu(target->path.pkey),
 				  &attr->pkey_index);
@@ -179,7 +188,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
 	if (!init_attr)
 		return -ENOMEM;
 
-	target->cq = ib_create_cq(target->srp_host->dev, srp_completion,
+	target->cq = ib_create_cq(target->srp_host->dev->dev, srp_completion,
 				  NULL, target, SRP_CQ_SIZE);
 	if (IS_ERR(target->cq)) {
 		ret = PTR_ERR(target->cq);
@@ -198,7 +207,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
 	init_attr->send_cq             = target->cq;
 	init_attr->recv_cq             = target->cq;
 
-	target->qp = ib_create_qp(target->srp_host->pd, init_attr);
+	target->qp = ib_create_qp(target->srp_host->dev->pd, init_attr);
 	if (IS_ERR(target->qp)) {
 		ret = PTR_ERR(target->qp);
 		ib_destroy_cq(target->cq);
@@ -250,7 +259,7 @@ static int srp_lookup_path(struct srp_target_port *target)
 
 	init_completion(&target->done);
 
-	target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev,
+	target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
 						   target->srp_host->port,
 						   &target->path,
 						   IB_SA_PATH_REC_DGID		|
@@ -309,10 +318,32 @@ static int srp_send_req(struct srp_target_port *target)
 
 	req->priv.opcode     	= SRP_LOGIN_REQ;
 	req->priv.tag        	= 0;
-	req->priv.req_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN);
+	req->priv.req_it_iu_len = cpu_to_be32(srp_max_iu_len);
 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					      SRP_BUF_FORMAT_INDIRECT);
-	memcpy(req->priv.initiator_port_id, target->srp_host->initiator_port_id, 16);
+	/*
+	 * In the published SRP specification (draft rev. 16a), the 
+	 * port identifier format is 8 bytes of ID extension followed
+	 * by 8 bytes of GUID.  Older drafts put the two halves in the
+	 * opposite order, so that the GUID comes first.
+	 *
+	 * Targets conforming to these obsolete drafts can be
+	 * recognized by the I/O Class they report.
+	 */
+	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
+		memcpy(req->priv.initiator_port_id,
+		       target->srp_host->initiator_port_id + 8, 8);
+		memcpy(req->priv.initiator_port_id + 8,
+		       target->srp_host->initiator_port_id, 8);
+		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
+		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
+	} else {
+		memcpy(req->priv.initiator_port_id,
+		       target->srp_host->initiator_port_id, 16);
+		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
+		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
+	}
+
 	/*
 	 * Topspin/Cisco SRP targets will reject our login unless we
 	 * zero out the first 8 bytes of our initiator port ID.  The
@@ -325,8 +356,6 @@ static int srp_send_req(struct srp_target_port *target)
 		       (unsigned long long) be64_to_cpu(target->ioc_guid));
 		memset(req->priv.initiator_port_id, 0, 8);
 	}
-	memcpy(req->priv.target_port_id,     &target->id_ext, 8);
-	memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
 
 	status = ib_send_cm_req(target->cm_id, &req->param);
 
@@ -359,9 +388,9 @@ static void srp_remove_work(void *target_ptr)
 	target->state = SRP_TARGET_REMOVED;
 	spin_unlock_irq(target->scsi_host->host_lock);
 
-	mutex_lock(&target->srp_host->target_mutex);
+	spin_lock(&target->srp_host->target_lock);
 	list_del(&target->list);
-	mutex_unlock(&target->srp_host->target_mutex);
+	spin_unlock(&target->srp_host->target_lock);
 
 	scsi_remove_host(target->scsi_host);
 	ib_destroy_cm_id(target->cm_id);
@@ -421,6 +450,11 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
 		return;
 
+	if (req->fmr) {
+		ib_fmr_pool_unmap(req->fmr);
+		req->fmr = NULL;
+	}
+
 	/*
 	 * This handling of non-SG commands can be killed when the
 	 * SCSI midlayer no longer generates non-SG commands.
@@ -433,18 +467,30 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
 		scat  = &req->fake_sg;
 	}
 
-	dma_unmap_sg(target->srp_host->dev->dma_device, scat, nents,
+	dma_unmap_sg(target->srp_host->dev->dev->dma_device, scat, nents,
 		     scmnd->sc_data_direction);
 }
 
+static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
+{
+	srp_unmap_data(req->scmnd, target, req);
+	list_move_tail(&req->list, &target->free_reqs);
+}
+
+static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
+{
+	req->scmnd->result = DID_RESET << 16;
+	req->scmnd->scsi_done(req->scmnd);
+	srp_remove_req(target, req);
+}
+
 static int srp_reconnect_target(struct srp_target_port *target)
 {
 	struct ib_cm_id *new_cm_id;
 	struct ib_qp_attr qp_attr;
-	struct srp_request *req;
+	struct srp_request *req, *tmp;
 	struct ib_wc wc;
 	int ret;
-	int i;
 
 	spin_lock_irq(target->scsi_host->host_lock);
 	if (target->state != SRP_TARGET_LIVE) {
@@ -459,7 +505,7 @@ static int srp_reconnect_target(struct srp_target_port *target)
 	 * Now get a new local CM ID so that we avoid confusing the
 	 * target in case things are really fouled up.
 	 */
-	new_cm_id = ib_create_cm_id(target->srp_host->dev,
+	new_cm_id = ib_create_cm_id(target->srp_host->dev->dev,
 				    srp_cm_handler, target);
 	if (IS_ERR(new_cm_id)) {
 		ret = PTR_ERR(new_cm_id);
@@ -480,19 +526,12 @@ static int srp_reconnect_target(struct srp_target_port *target)
 	while (ib_poll_cq(target->cq, 1, &wc) > 0)
 		; /* nothing */
 
-	list_for_each_entry(req, &target->req_queue, list) {
-		req->scmnd->result = DID_RESET << 16;
-		req->scmnd->scsi_done(req->scmnd);
-		srp_unmap_data(req->scmnd, target, req);
-	}
+	list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+		srp_reset_req(target, req);
 
 	target->rx_head	 = 0;
 	target->tx_head	 = 0;
 	target->tx_tail  = 0;
-	INIT_LIST_HEAD(&target->free_reqs);
-	INIT_LIST_HEAD(&target->req_queue);
-	for (i = 0; i < SRP_SQ_SIZE; ++i)
-		list_add_tail(&target->req_ring[i].list, &target->free_reqs);
 
 	ret = srp_connect_target(target);
 	if (ret)
@@ -528,14 +567,79 @@ err:
 	return ret;
 }
 
+static int srp_map_fmr(struct srp_device *dev, struct scatterlist *scat,
+		       int sg_cnt, struct srp_request *req,
+		       struct srp_direct_buf *buf)
+{
+	u64 io_addr = 0;
+	u64 *dma_pages;
+	u32 len;
+	int page_cnt;
+	int i, j;
+	int ret;
+
+	if (!dev->fmr_pool)
+		return -ENODEV;
+
+	len = page_cnt = 0;
+	for (i = 0; i < sg_cnt; ++i) {
+		if (sg_dma_address(&scat[i]) & ~dev->fmr_page_mask) {
+			if (i > 0)
+				return -EINVAL;
+			else
+				++page_cnt;
+		}
+		if ((sg_dma_address(&scat[i]) + sg_dma_len(&scat[i])) &
+		    ~dev->fmr_page_mask) {
+			if (i < sg_cnt - 1)
+				return -EINVAL;
+			else
+				++page_cnt;
+		}
+
+		len += sg_dma_len(&scat[i]);
+	}
+
+	page_cnt += len >> dev->fmr_page_shift;
+	if (page_cnt > SRP_FMR_SIZE)
+		return -ENOMEM;
+
+	dma_pages = kmalloc(sizeof (u64) * page_cnt, GFP_ATOMIC);
+	if (!dma_pages)
+		return -ENOMEM;
+
+	page_cnt = 0;
+	for (i = 0; i < sg_cnt; ++i)
+		for (j = 0; j < sg_dma_len(&scat[i]); j += dev->fmr_page_size)
+			dma_pages[page_cnt++] =
+				(sg_dma_address(&scat[i]) & dev->fmr_page_mask) + j;
+
+	req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool,
+					dma_pages, page_cnt, &io_addr);
+	if (IS_ERR(req->fmr)) {
+		ret = PTR_ERR(req->fmr);
+		goto out;
+	}
+
+	buf->va  = cpu_to_be64(sg_dma_address(&scat[0]) & ~dev->fmr_page_mask);
+	buf->key = cpu_to_be32(req->fmr->fmr->rkey);
+	buf->len = cpu_to_be32(len);
+
+	ret = 0;
+
+out:
+	kfree(dma_pages);
+
+	return ret;
+}
+
 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
 			struct srp_request *req)
 {
 	struct scatterlist *scat;
 	struct srp_cmd *cmd = req->cmd->buf;
 	int len, nents, count;
-	int i;
-	u8 fmt;
+	u8 fmt = SRP_DATA_DESC_DIRECT;
 
 	if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
 		return sizeof (struct srp_cmd);
@@ -560,53 +664,63 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
 		sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
 	}
 
-	count = dma_map_sg(target->srp_host->dev->dma_device, scat, nents,
-			   scmnd->sc_data_direction);
+	count = dma_map_sg(target->srp_host->dev->dev->dma_device,
+			   scat, nents, scmnd->sc_data_direction);
+
+	fmt = SRP_DATA_DESC_DIRECT;
+	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
 
 	if (count == 1) {
+		/*
+		 * The midlayer only generated a single gather/scatter
+		 * entry, or DMA mapping coalesced everything to a
+		 * single entry.  So a direct descriptor along with
+		 * the DMA MR suffices.
+		 */
 		struct srp_direct_buf *buf = (void *) cmd->add_data;
 
-		fmt = SRP_DATA_DESC_DIRECT;
-
 		buf->va  = cpu_to_be64(sg_dma_address(scat));
-		buf->key = cpu_to_be32(target->srp_host->mr->rkey);
+		buf->key = cpu_to_be32(target->srp_host->dev->mr->rkey);
 		buf->len = cpu_to_be32(sg_dma_len(scat));
-
-		len = sizeof (struct srp_cmd) +
-			sizeof (struct srp_direct_buf);
-	} else {
+	} else if (srp_map_fmr(target->srp_host->dev, scat, count, req,
+			       (void *) cmd->add_data)) {
+		/*
+		 * FMR mapping failed, and the scatterlist has more
+		 * than one entry.  Generate an indirect memory
+		 * descriptor.
+		 */
 		struct srp_indirect_buf *buf = (void *) cmd->add_data;
 		u32 datalen = 0;
+		int i;
 
 		fmt = SRP_DATA_DESC_INDIRECT;
+		len = sizeof (struct srp_cmd) +
+			sizeof (struct srp_indirect_buf) +
+			count * sizeof (struct srp_direct_buf);
+
+		for (i = 0; i < count; ++i) {
+			buf->desc_list[i].va  =
+				cpu_to_be64(sg_dma_address(&scat[i]));
+			buf->desc_list[i].key =
+				cpu_to_be32(target->srp_host->dev->mr->rkey);
+			buf->desc_list[i].len =
+				cpu_to_be32(sg_dma_len(&scat[i]));
+			datalen += sg_dma_len(&scat[i]);
+		}
 
 		if (scmnd->sc_data_direction == DMA_TO_DEVICE)
 			cmd->data_out_desc_cnt = count;
 		else
 			cmd->data_in_desc_cnt = count;
 
-		buf->table_desc.va  = cpu_to_be64(req->cmd->dma +
-						  sizeof *cmd +
-						  sizeof *buf);
+		buf->table_desc.va  =
+			cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf);
 		buf->table_desc.key =
-			cpu_to_be32(target->srp_host->mr->rkey);
+			cpu_to_be32(target->srp_host->dev->mr->rkey);
 		buf->table_desc.len =
 			cpu_to_be32(count * sizeof (struct srp_direct_buf));
 
-		for (i = 0; i < count; ++i) {
-			buf->desc_list[i].va  = cpu_to_be64(sg_dma_address(&scat[i]));
-			buf->desc_list[i].key =
-				cpu_to_be32(target->srp_host->mr->rkey);
-			buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
-
-			datalen += sg_dma_len(&scat[i]);
-		}
-
 		buf->len = cpu_to_be32(datalen);
-
-		len = sizeof (struct srp_cmd) +
-			sizeof (struct srp_indirect_buf) +
-			count * sizeof (struct srp_direct_buf);
 	}
 
 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
@@ -617,12 +731,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
 	return len;
 }
 
-static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
-{
-	srp_unmap_data(req->scmnd, target, req);
-	list_move_tail(&req->list, &target->free_reqs);
-}
-
 static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
 {
 	struct srp_request *req;
@@ -689,7 +797,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 
 	iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV];
 
-	dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma,
+	dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma,
 				target->max_ti_iu_len, DMA_FROM_DEVICE);
 
 	opcode = *(u8 *) iu->buf;
@@ -726,7 +834,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 		break;
 	}
 
-	dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma,
+	dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma,
 				   target->max_ti_iu_len, DMA_FROM_DEVICE);
 }
 
@@ -770,7 +878,7 @@ static int __srp_post_recv(struct srp_target_port *target)
 
 	list.addr   = iu->dma;
 	list.length = iu->size;
-	list.lkey   = target->srp_host->mr->lkey;
+	list.lkey   = target->srp_host->dev->mr->lkey;
 
 	wr.next     = NULL;
 	wr.sg_list  = &list;
@@ -805,12 +913,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target)
 	if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
 		return NULL;
 
-	if (unlikely(target->req_lim < 1)) {
-		if (printk_ratelimit())
-			printk(KERN_DEBUG PFX "Target has req_lim %d\n",
-			       target->req_lim);
-		return NULL;
-	}
+	if (unlikely(target->req_lim < 1))
+		++target->zero_req_lim;
 
 	return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
 }
@@ -828,7 +932,7 @@ static int __srp_post_send(struct srp_target_port *target,
 
 	list.addr   = iu->dma;
 	list.length = len;
-	list.lkey   = target->srp_host->mr->lkey;
+	list.lkey   = target->srp_host->dev->mr->lkey;
 
 	wr.next       = NULL;
 	wr.wr_id      = target->tx_head & SRP_SQ_SIZE;
@@ -870,8 +974,8 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
 	if (!iu)
 		goto err;
 
-	dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma,
-				SRP_MAX_IU_LEN, DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma,
+				srp_max_iu_len, DMA_TO_DEVICE);
 
 	req = list_entry(target->free_reqs.next, struct srp_request, list);
 
@@ -903,8 +1007,8 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
 		goto err_unmap;
 	}
 
-	dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma,
-				   SRP_MAX_IU_LEN, DMA_TO_DEVICE);
+	dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma,
+				   srp_max_iu_len, DMA_TO_DEVICE);
 
 	if (__srp_post_send(target, iu, len)) {
 		printk(KERN_ERR PFX "Send failed\n");
@@ -936,7 +1040,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
 
 	for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
 		target->tx_ring[i] = srp_alloc_iu(target->srp_host,
-						  SRP_MAX_IU_LEN,
+						  srp_max_iu_len,
 						  GFP_KERNEL, DMA_TO_DEVICE);
 		if (!target->tx_ring[i])
 			goto err;
@@ -1107,11 +1211,10 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 		srp_cm_rej_handler(cm_id, event, target);
 		break;
 
-	case IB_CM_MRA_RECEIVED:
-		printk(KERN_ERR PFX "MRA received\n");
-		break;
-
-	case IB_CM_DREP_RECEIVED:
+	case IB_CM_DREQ_RECEIVED:
+		printk(KERN_WARNING PFX "DREQ received - connection closed\n");
+		if (ib_send_cm_drep(cm_id, NULL, 0))
+			printk(KERN_ERR PFX "Sending CM DREP failed\n");
 		break;
 
 	case IB_CM_TIMEWAIT_EXIT:
@@ -1121,6 +1224,11 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 		target->status = 0;
 		break;
 
+	case IB_CM_MRA_RECEIVED:
+	case IB_CM_DREQ_ERROR:
+	case IB_CM_DREP_RECEIVED:
+		break;
+
 	default:
 		printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event);
 		break;
@@ -1239,11 +1347,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
 	spin_lock_irq(target->scsi_host->host_lock);
 
 	list_for_each_entry_safe(req, tmp, &target->req_queue, list)
-		if (req->scmnd->device == scmnd->device) {
-			req->scmnd->result = DID_RESET << 16;
-			req->scmnd->scsi_done(req->scmnd);
-			srp_remove_req(target, req);
-		}
+		if (req->scmnd->device == scmnd->device)
+			srp_reset_req(target, req);
 
 	spin_unlock_irq(target->scsi_host->host_lock);
 
@@ -1329,11 +1434,23 @@ static ssize_t show_dgid(struct class_device *cdev, char *buf)
 		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[7]));
 }
 
+static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n", target->zero_req_lim);
+}
+
 static CLASS_DEVICE_ATTR(id_ext,	S_IRUGO, show_id_ext,		NULL);
 static CLASS_DEVICE_ATTR(ioc_guid,	S_IRUGO, show_ioc_guid,		NULL);
 static CLASS_DEVICE_ATTR(service_id,	S_IRUGO, show_service_id,	NULL);
 static CLASS_DEVICE_ATTR(pkey,		S_IRUGO, show_pkey,		NULL);
 static CLASS_DEVICE_ATTR(dgid,		S_IRUGO, show_dgid,		NULL);
+static CLASS_DEVICE_ATTR(zero_req_lim,	S_IRUGO, show_zero_req_lim,	NULL);
 
 static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_id_ext,
@@ -1341,6 +1458,7 @@ static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_service_id,
 	&class_device_attr_pkey,
 	&class_device_attr_dgid,
+	&class_device_attr_zero_req_lim,
 	NULL
 };
 
@@ -1354,7 +1472,6 @@ static struct scsi_host_template srp_template = {
 	.eh_host_reset_handler		= srp_reset_host,
 	.can_queue			= SRP_SQ_SIZE,
 	.this_id			= -1,
-	.sg_tablesize			= SRP_MAX_INDIRECT,
 	.cmd_per_lun			= SRP_SQ_SIZE,
 	.use_clustering			= ENABLE_CLUSTERING,
 	.shost_attrs			= srp_host_attrs
@@ -1365,18 +1482,17 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
 	sprintf(target->target_name, "SRP.T10:%016llX",
 		 (unsigned long long) be64_to_cpu(target->id_ext));
 
-	if (scsi_add_host(target->scsi_host, host->dev->dma_device))
+	if (scsi_add_host(target->scsi_host, host->dev->dev->dma_device))
 		return -ENODEV;
 
-	mutex_lock(&host->target_mutex);
+	spin_lock(&host->target_lock);
 	list_add_tail(&target->list, &host->target_list);
-	mutex_unlock(&host->target_mutex);
+	spin_unlock(&host->target_lock);
 
 	target->state = SRP_TARGET_LIVE;
 
-	/* XXX: are we supposed to have a definition of SCAN_WILD_CARD ?? */
 	scsi_scan_target(&target->scsi_host->shost_gendev,
-			 0, target->scsi_id, ~0, 0);
+			 0, target->scsi_id, SCAN_WILD_CARD, 0);
 
 	return 0;
 }
@@ -1410,6 +1526,8 @@ enum {
 	SRP_OPT_PKEY		= 1 << 3,
 	SRP_OPT_SERVICE_ID	= 1 << 4,
 	SRP_OPT_MAX_SECT	= 1 << 5,
+	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
+	SRP_OPT_IO_CLASS	= 1 << 7,
 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
 				   SRP_OPT_IOC_GUID	|
 				   SRP_OPT_DGID		|
@@ -1418,13 +1536,15 @@ enum {
 };
 
 static match_table_t srp_opt_tokens = {
-	{ SRP_OPT_ID_EXT,	"id_ext=%s" 	},
-	{ SRP_OPT_IOC_GUID,	"ioc_guid=%s" 	},
-	{ SRP_OPT_DGID,		"dgid=%s" 	},
-	{ SRP_OPT_PKEY,		"pkey=%x" 	},
-	{ SRP_OPT_SERVICE_ID,	"service_id=%s" },
-	{ SRP_OPT_MAX_SECT,     "max_sect=%d" 	},
-	{ SRP_OPT_ERR,		NULL 		}
+	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
+	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
+	{ SRP_OPT_DGID,			"dgid=%s" 		},
+	{ SRP_OPT_PKEY,			"pkey=%x" 		},
+	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
+	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
+	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
+	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
+	{ SRP_OPT_ERR,			NULL 			}
 };
 
 static int srp_parse_options(const char *buf, struct srp_target_port *target)
@@ -1500,6 +1620,29 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
 			target->scsi_host->max_sectors = token;
 			break;
 
+		case SRP_OPT_MAX_CMD_PER_LUN:
+			if (match_int(args, &token)) {
+				printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p);
+				goto out;
+			}
+			target->scsi_host->cmd_per_lun = min(token, SRP_SQ_SIZE);
+			break;
+
+		case SRP_OPT_IO_CLASS:
+			if (match_hex(args, &token)) {
+				printk(KERN_WARNING PFX "bad  IO class parameter '%s' \n", p);
+				goto out;
+			}
+			if (token != SRP_REV10_IB_IO_CLASS &&
+			    token != SRP_REV16A_IB_IO_CLASS) {
+				printk(KERN_WARNING PFX "unknown IO class parameter value"
+				       " %x specified (use %x or %x).\n",
+				       token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS);
+				goto out;
+			}
+			target->io_class = token;
+			break;
+
 		default:
 			printk(KERN_WARNING PFX "unknown parameter or missing value "
 			       "'%s' in target creation request\n", p);
@@ -1542,6 +1685,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
 	target = host_to_target(target_host);
 	memset(target, 0, sizeof *target);
 
+	target->io_class   = SRP_REV16A_IB_IO_CLASS;
 	target->scsi_host  = target_host;
 	target->srp_host   = host;
 
@@ -1558,7 +1702,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
 	if (ret)
 		goto err;
 
-	ib_get_cached_gid(host->dev, host->port, 0, &target->path.sgid);
+	ib_get_cached_gid(host->dev->dev, host->port, 0, &target->path.sgid);
 
 	printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
 	       "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
@@ -1579,7 +1723,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
 	if (ret)
 		goto err;
 
-	target->cm_id = ib_create_cm_id(host->dev, srp_cm_handler, target);
+	target->cm_id = ib_create_cm_id(host->dev->dev, srp_cm_handler, target);
 	if (IS_ERR(target->cm_id)) {
 		ret = PTR_ERR(target->cm_id);
 		goto err_free;
@@ -1619,7 +1763,7 @@ static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 	struct srp_host *host =
 		container_of(class_dev, struct srp_host, class_dev);
 
-	return sprintf(buf, "%s\n", host->dev->name);
+	return sprintf(buf, "%s\n", host->dev->dev->name);
 }
 
 static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -1634,7 +1778,7 @@ static ssize_t show_port(struct class_device *class_dev, char *buf)
 
 static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
 
-static struct srp_host *srp_add_port(struct ib_device *device, u8 port)
+static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
 {
 	struct srp_host *host;
 
@@ -1643,32 +1787,21 @@ static struct srp_host *srp_add_port(struct ib_device *device, u8 port)
 		return NULL;
 
 	INIT_LIST_HEAD(&host->target_list);
-	mutex_init(&host->target_mutex);
+	spin_lock_init(&host->target_lock);
 	init_completion(&host->released);
 	host->dev  = device;
 	host->port = port;
 
 	host->initiator_port_id[7] = port;
-	memcpy(host->initiator_port_id + 8, &device->node_guid, 8);
-
-	host->pd   = ib_alloc_pd(device);
-	if (IS_ERR(host->pd))
-		goto err_free;
-
-	host->mr   = ib_get_dma_mr(host->pd,
-				   IB_ACCESS_LOCAL_WRITE |
-				   IB_ACCESS_REMOTE_READ |
-				   IB_ACCESS_REMOTE_WRITE);
-	if (IS_ERR(host->mr))
-		goto err_pd;
+	memcpy(host->initiator_port_id + 8, &device->dev->node_guid, 8);
 
 	host->class_dev.class = &srp_class;
-	host->class_dev.dev   = device->dma_device;
+	host->class_dev.dev   = device->dev->dma_device;
 	snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d",
-		 device->name, port);
+		 device->dev->name, port);
 
 	if (class_device_register(&host->class_dev))
-		goto err_mr;
+		goto free_host;
 	if (class_device_create_file(&host->class_dev, &class_device_attr_add_target))
 		goto err_class;
 	if (class_device_create_file(&host->class_dev, &class_device_attr_ibdev))
@@ -1681,13 +1814,7 @@ static struct srp_host *srp_add_port(struct ib_device *device, u8 port)
 err_class:
 	class_device_unregister(&host->class_dev);
 
-err_mr:
-	ib_dereg_mr(host->mr);
-
-err_pd:
-	ib_dealloc_pd(host->pd);
-
-err_free:
+free_host:
 	kfree(host);
 
 	return NULL;
@@ -1695,15 +1822,62 @@ err_free:
 
 static void srp_add_one(struct ib_device *device)
 {
-	struct list_head *dev_list;
+	struct srp_device *srp_dev;
+	struct ib_device_attr *dev_attr;
+	struct ib_fmr_pool_param fmr_param;
 	struct srp_host *host;
 	int s, e, p;
 
-	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
-	if (!dev_list)
+	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
+	if (!dev_attr)
 		return;
 
-	INIT_LIST_HEAD(dev_list);
+	if (ib_query_device(device, dev_attr)) {
+		printk(KERN_WARNING PFX "Query device failed for %s\n",
+		       device->name);
+		goto free_attr;
+	}
+
+	srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
+	if (!srp_dev)
+		goto free_attr;
+
+	/*
+	 * Use the smallest page size supported by the HCA, down to a
+	 * minimum of 512 bytes (which is the smallest sector that a
+	 * SCSI command will ever carry).
+	 */
+	srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1);
+	srp_dev->fmr_page_size  = 1 << srp_dev->fmr_page_shift;
+	srp_dev->fmr_page_mask  = ~((unsigned long) srp_dev->fmr_page_size - 1);
+
+	INIT_LIST_HEAD(&srp_dev->dev_list);
+
+	srp_dev->dev = device;
+	srp_dev->pd  = ib_alloc_pd(device);
+	if (IS_ERR(srp_dev->pd))
+		goto free_dev;
+
+	srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
+				    IB_ACCESS_LOCAL_WRITE |
+				    IB_ACCESS_REMOTE_READ |
+				    IB_ACCESS_REMOTE_WRITE);
+	if (IS_ERR(srp_dev->mr))
+		goto err_pd;
+
+	memset(&fmr_param, 0, sizeof fmr_param);
+	fmr_param.pool_size	    = SRP_FMR_POOL_SIZE;
+	fmr_param.dirty_watermark   = SRP_FMR_DIRTY_SIZE;
+	fmr_param.cache		    = 1;
+	fmr_param.max_pages_per_fmr = SRP_FMR_SIZE;
+	fmr_param.page_shift	    = srp_dev->fmr_page_shift;
+	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
+				       IB_ACCESS_REMOTE_WRITE |
+				       IB_ACCESS_REMOTE_READ);
+
+	srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
+	if (IS_ERR(srp_dev->fmr_pool))
+		srp_dev->fmr_pool = NULL;
 
 	if (device->node_type == IB_NODE_SWITCH) {
 		s = 0;
@@ -1714,25 +1888,35 @@ static void srp_add_one(struct ib_device *device)
 	}
 
 	for (p = s; p <= e; ++p) {
-		host = srp_add_port(device, p);
+		host = srp_add_port(srp_dev, p);
 		if (host)
-			list_add_tail(&host->list, dev_list);
+			list_add_tail(&host->list, &srp_dev->dev_list);
 	}
 
-	ib_set_client_data(device, &srp_client, dev_list);
+	ib_set_client_data(device, &srp_client, srp_dev);
+
+	goto free_attr;
+
+err_pd:
+	ib_dealloc_pd(srp_dev->pd);
+
+free_dev:
+	kfree(srp_dev);
+
+free_attr:
+	kfree(dev_attr);
 }
 
 static void srp_remove_one(struct ib_device *device)
 {
-	struct list_head *dev_list;
+	struct srp_device *srp_dev;
 	struct srp_host *host, *tmp_host;
 	LIST_HEAD(target_list);
 	struct srp_target_port *target, *tmp_target;
-	unsigned long flags;
 
-	dev_list = ib_get_client_data(device, &srp_client);
+	srp_dev = ib_get_client_data(device, &srp_client);
 
-	list_for_each_entry_safe(host, tmp_host, dev_list, list) {
+	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
 		class_device_unregister(&host->class_dev);
 		/*
 		 * Wait for the sysfs entry to go away, so that no new
@@ -1744,15 +1928,13 @@ static void srp_remove_one(struct ib_device *device)
 		 * Mark all target ports as removed, so we stop queueing
 		 * commands and don't try to reconnect.
 		 */
-		mutex_lock(&host->target_mutex);
-		list_for_each_entry_safe(target, tmp_target,
-					 &host->target_list, list) {
-			spin_lock_irqsave(target->scsi_host->host_lock, flags);
-			if (target->state != SRP_TARGET_REMOVED)
-				target->state = SRP_TARGET_REMOVED;
-			spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+		spin_lock(&host->target_lock);
+		list_for_each_entry(target, &host->target_list, list) {
+			spin_lock_irq(target->scsi_host->host_lock);
+			target->state = SRP_TARGET_REMOVED;
+			spin_unlock_irq(target->scsi_host->host_lock);
 		}
-		mutex_unlock(&host->target_mutex);
+		spin_unlock(&host->target_lock);
 
 		/*
 		 * Wait for any reconnection tasks that may have
@@ -1770,18 +1952,26 @@ static void srp_remove_one(struct ib_device *device)
 			scsi_host_put(target->scsi_host);
 		}
 
-		ib_dereg_mr(host->mr);
-		ib_dealloc_pd(host->pd);
 		kfree(host);
 	}
 
-	kfree(dev_list);
+	if (srp_dev->fmr_pool)
+		ib_destroy_fmr_pool(srp_dev->fmr_pool);
+	ib_dereg_mr(srp_dev->mr);
+	ib_dealloc_pd(srp_dev->pd);
+
+	kfree(srp_dev);
 }
 
 static int __init srp_init_module(void)
 {
 	int ret;
 
+	srp_template.sg_tablesize = srp_sg_tablesize;
+	srp_max_iu_len = (sizeof (struct srp_cmd) +
+			  sizeof (struct srp_indirect_buf) +
+			  srp_sg_tablesize * 16);
+
 	ret = class_register(&srp_class);
 	if (ret) {
 		printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index c5cd43aae8604af42fb349b160e608a10a84c16d..5b581fb8eb0d08addeea3f9a5fff4763592f1ed6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -46,6 +46,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_sa.h>
 #include <rdma/ib_cm.h>
+#include <rdma/ib_fmr_pool.h>
 
 enum {
 	SRP_PATH_REC_TIMEOUT_MS	= 1000,
@@ -55,20 +56,21 @@ enum {
 	SRP_DLID_REDIRECT	= 2,
 
 	SRP_MAX_LUN		= 512,
-	SRP_MAX_IU_LEN		= 256,
+	SRP_DEF_SG_TABLESIZE	= 12,
 
 	SRP_RQ_SHIFT    	= 6,
 	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT,
 	SRP_SQ_SIZE		= SRP_RQ_SIZE - 1,
 	SRP_CQ_SIZE		= SRP_SQ_SIZE + SRP_RQ_SIZE,
 
-	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1)
+	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1),
+
+	SRP_FMR_SIZE		= 256,
+	SRP_FMR_POOL_SIZE	= 1024,
+	SRP_FMR_DIRTY_SIZE	= SRP_FMR_POOL_SIZE / 4
 };
 
 #define SRP_OP_RECV		(1 << 31)
-#define SRP_MAX_INDIRECT	((SRP_MAX_IU_LEN -			\
-				  sizeof (struct srp_cmd) -		\
-				  sizeof (struct srp_indirect_buf)) / 16)
 
 enum srp_target_state {
 	SRP_TARGET_LIVE,
@@ -77,15 +79,24 @@ enum srp_target_state {
 	SRP_TARGET_REMOVED
 };
 
-struct srp_host {
-	u8			initiator_port_id[16];
+struct srp_device {
+	struct list_head	dev_list;
 	struct ib_device       *dev;
-	u8                      port;
 	struct ib_pd	       *pd;
 	struct ib_mr	       *mr;
+	struct ib_fmr_pool     *fmr_pool;
+	int			fmr_page_shift;
+	int			fmr_page_size;
+	unsigned long		fmr_page_mask;
+};
+
+struct srp_host {
+	u8			initiator_port_id[16];
+	struct srp_device      *dev;
+	u8			port;
 	struct class_device	class_dev;
 	struct list_head	target_list;
-	struct mutex            target_mutex;
+	spinlock_t		target_lock;
 	struct completion	released;
 	struct list_head	list;
 };
@@ -95,6 +106,7 @@ struct srp_request {
 	struct scsi_cmnd       *scmnd;
 	struct srp_iu	       *cmd;
 	struct srp_iu	       *tsk_mgmt;
+	struct ib_pool_fmr     *fmr;
 	/*
 	 * Fake scatterlist used when scmnd->use_sg==0.  Can be killed
 	 * when the SCSI midlayer no longer generates non-SG commands.
@@ -110,6 +122,7 @@ struct srp_target_port {
 	__be64			id_ext;
 	__be64			ioc_guid;
 	__be64			service_id;
+	u16			io_class;
 	struct srp_host	       *srp_host;
 	struct Scsi_Host       *scsi_host;
 	char			target_name[32];
@@ -126,6 +139,8 @@ struct srp_target_port {
 	int			max_ti_iu_len;
 	s32			req_lim;
 
+	int			zero_req_lim;
+
 	unsigned		rx_head;
 	struct srp_iu	       *rx_ring[SRP_RQ_SIZE];
 
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
new file mode 100644
index 0000000000000000000000000000000000000000..fcb5ba87dcc5f76263c30c8293cb76ebbe9f2449
--- /dev/null
+++ b/include/rdma/ib_addr.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#if !defined(IB_ADDR_H)
+#define IB_ADDR_H
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <rdma/ib_verbs.h>
+
+struct rdma_dev_addr {
+	unsigned char src_dev_addr[MAX_ADDR_LEN];
+	unsigned char dst_dev_addr[MAX_ADDR_LEN];
+	unsigned char broadcast[MAX_ADDR_LEN];
+	enum ib_node_type dev_type;
+};
+
+/**
+ * rdma_translate_ip - Translate a local IP address to an RDMA hardware
+ *   address.
+ */
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
+
+/**
+ * rdma_resolve_ip - Resolve source and destination IP addresses to
+ *   RDMA hardware addresses.
+ * @src_addr: An optional source address to use in the resolution.  If a
+ *   source address is not provided, a usable address will be returned via
+ *   the callback.
+ * @dst_addr: The destination address to resolve.
+ * @addr: A reference to a data location that will receive the resolved
+ *   addresses.  The data location must remain valid until the callback has
+ *   been invoked.
+ * @timeout_ms: Amount of time to wait for the address resolution to complete.
+ * @callback: Call invoked once address resolution has completed, timed out,
+ *   or been canceled.  A status of 0 indicates success.
+ * @context: User-specified context associated with the call.
+ */
+int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+		    struct rdma_dev_addr *addr, int timeout_ms,
+		    void (*callback)(int status, struct sockaddr *src_addr,
+				     struct rdma_dev_addr *addr, void *context),
+		    void *context);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr);
+
+static inline int ip_addr_size(struct sockaddr *addr)
+{
+	return addr->sa_family == AF_INET6 ?
+	       sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
+}
+
+static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
+{
+	return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
+}
+
+static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey)
+{
+	dev_addr->broadcast[8] = pkey >> 8;
+	dev_addr->broadcast[9] = (unsigned char) pkey;
+}
+
+static inline union ib_gid *ib_addr_get_sgid(struct rdma_dev_addr *dev_addr)
+{
+	return 	(union ib_gid *) (dev_addr->src_dev_addr + 4);
+}
+
+static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
+}
+
+static inline union ib_gid *ib_addr_get_dgid(struct rdma_dev_addr *dev_addr)
+{
+	return 	(union ib_gid *) (dev_addr->dst_dev_addr + 4);
+}
+
+static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
+}
+
+#endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 5bf9834f7dcae588f126a8800f6685654e1bb881..f179d233ffc34f22aa02ee2d928d74c691566938 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -102,4 +102,17 @@ int ib_find_cached_pkey(struct ib_device    *device,
 			u16                  pkey,
 			u16                 *index);
 
+/**
+ * ib_get_cached_lmc - Returns a cached lmc table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @lmc: The lmc value for the specified port for that device.
+ *
+ * ib_get_cached_lmc() fetches the specified lmc table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_lmc(struct ib_device *device,
+		      u8                port_num,
+		      u8                *lmc);
+
 #endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 0a9fcd59eb430966031b7d2aa4bde4b0a94ffb1f..c9b4738be9d68021067b8911c68713362d2d0c9d 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -32,7 +32,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_cm.h 2730 2005-06-28 16:43:03Z sean.hefty $
+ * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 #if !defined(IB_CM_H)
 #define IB_CM_H
@@ -102,7 +102,8 @@ enum ib_cm_data_size {
 	IB_CM_APR_INFO_LENGTH		 = 72,
 	IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
 	IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
-	IB_CM_SIDR_REP_INFO_LENGTH	 = 72
+	IB_CM_SIDR_REP_INFO_LENGTH	 = 72,
+	IB_CM_COMPARE_SIZE		 = 64
 };
 
 struct ib_cm_id;
@@ -238,7 +239,6 @@ struct ib_cm_sidr_rep_event_param {
 	u32			qpn;
 	void			*info;
 	u8			info_len;
-
 };
 
 struct ib_cm_event {
@@ -317,6 +317,15 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
 
 #define IB_SERVICE_ID_AGN_MASK	__constant_cpu_to_be64(0xFF00000000000000ULL)
 #define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL)
+#define IB_CMA_SERVICE_ID	__constant_cpu_to_be64(0x0000000001000000ULL)
+#define IB_CMA_SERVICE_ID_MASK	__constant_cpu_to_be64(0xFFFFFFFFFF000000ULL)
+#define IB_SDP_SERVICE_ID	__constant_cpu_to_be64(0x0000000000010000ULL)
+#define IB_SDP_SERVICE_ID_MASK	__constant_cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
+
+struct ib_cm_compare_data {
+	u8  data[IB_CM_COMPARE_SIZE];
+	u8  mask[IB_CM_COMPARE_SIZE];
+};
 
 /**
  * ib_cm_listen - Initiates listening on the specified service ID for
@@ -330,10 +339,12 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
  *   range of service IDs.  If set to 0, the service ID is matched
  *   exactly.  This parameter is ignored if %service_id is set to
  *   IB_CM_ASSIGN_SERVICE_ID.
+ * @compare_data: This parameter is optional.  It specifies data that must
+ *   appear in the private data of a connection request for the specified
+ *   listen request.
  */
-int ib_cm_listen(struct ib_cm_id *cm_id,
-		 __be64 service_id,
-		 __be64 service_mask);
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+		 struct ib_cm_compare_data *compare_data);
 
 struct ib_cm_req_param {
 	struct ib_sa_path_rec	*primary_path;
@@ -535,7 +546,6 @@ struct ib_cm_sidr_req_param {
 	const void		*private_data;
 	u8			private_data_len;
 	u8			max_cm_retries;
-	u16			pkey;
 };
 
 /**
@@ -559,7 +569,7 @@ struct ib_cm_sidr_rep_param {
 };
 
 /**
- * ib_send_cm_sidr_rep - Sends a service ID resolution request to the
+ * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the
  *   remote node.
  * @cm_id: Communication identifier associated with the received service ID
  *   resolution request.
diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h
new file mode 100644
index 0000000000000000000000000000000000000000..66bf4d7d0dfb9a77c3c60569d56b94f97a1c59ac
--- /dev/null
+++ b/include/rdma/ib_marshall.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_USER_MARSHALL_H)
+#define IB_USER_MARSHALL_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+			     struct ib_qp_attr *src);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+			      struct ib_sa_path_rec *src);
+
+void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+				struct ib_user_path_rec *src);
+
+#endif /* IB_USER_MARSHALL_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index ad63c215efe5b68be3d555fccda212ff09a3358a..c99e4420fd7ec308d9d2826c0bdd0493312bc5dd 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -370,5 +370,12 @@ ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
 					context, query);
 }
 
+/**
+ * ib_init_ah_from_path - Initialize address handle attributes based on an SA
+ *   path record.
+ */
+int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
+			 struct ib_sa_path_rec *rec,
+			 struct ib_ah_attr *ah_attr);
 
 #endif /* IB_SA_H */
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index 87f60737f69575e2b86426f3cbaa592638dca151..f29af135ba833c841926e33b6d9bac0de42693e9 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -85,6 +85,42 @@ struct ib_smp {
 #define IB_SMP_ATTR_LED_INFO			__constant_htons(0x0031)
 #define IB_SMP_ATTR_VENDOR_MASK			__constant_htons(0xFF00)
 
+struct ib_port_info {
+	__be64 mkey;
+	__be64 gid_prefix;
+	__be16 lid;
+	__be16 sm_lid;
+	__be32 cap_mask;
+	__be16 diag_code;
+	__be16 mkey_lease_period;
+	u8 local_port_num;
+	u8 link_width_enabled;
+	u8 link_width_supported;
+	u8 link_width_active;
+	u8 linkspeed_portstate;			/* 4 bits, 4 bits */
+	u8 portphysstate_linkdown;		/* 4 bits, 4 bits */
+	u8 mkeyprot_resv_lmc;			/* 2 bits, 3, 3 */
+	u8 linkspeedactive_enabled;		/* 4 bits, 4 bits */
+	u8 neighbormtu_mastersmsl;		/* 4 bits, 4 bits */
+	u8 vlcap_inittype;			/* 4 bits, 4 bits */
+	u8 vl_high_limit;
+	u8 vl_arb_high_cap;
+	u8 vl_arb_low_cap;
+	u8 inittypereply_mtucap;		/* 4 bits, 4 bits */
+	u8 vlstallcnt_hoqlife;			/* 3 bits, 5 bits */
+	u8 operationalvl_pei_peo_fpi_fpo;	/* 4 bits, 1, 1, 1, 1 */
+	__be16 mkey_violations;
+	__be16 pkey_violations;
+	__be16 qkey_violations;
+	u8 guid_cap;
+	u8 clientrereg_resv_subnetto;		/* 1 bit, 2 bits, 5 */
+	u8 resv_resptimevalue;			/* 3 bits, 5 bits */
+	u8 localphyerrors_overrunerrors;	/* 4 bits, 4 bits */
+	__be16 max_credit_hint;
+	u8 resv;
+	u8 link_roundtrip_latency[3];
+};
+
 static inline u8
 ib_get_smp_direction(struct ib_smp *smp)
 {
diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h
index 19be116047f676d89b4f336cf7ab30f55e49a7eb..066c20b7cdfbf08019a3f23c7b17e93e4371e49b 100644
--- a/include/rdma/ib_user_cm.h
+++ b/include/rdma/ib_user_cm.h
@@ -30,13 +30,13 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_user_cm.h 2576 2005-06-09 17:00:30Z libor $
+ * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $
  */
 
 #ifndef IB_USER_CM_H
 #define IB_USER_CM_H
 
-#include <linux/types.h>
+#include <rdma/ib_user_sa.h>
 
 #define IB_USER_CM_ABI_VERSION 4
 
@@ -110,58 +110,6 @@ struct ib_ucm_init_qp_attr {
 	__u32 qp_state;
 };
 
-struct ib_ucm_ah_attr {
-	__u8	grh_dgid[16];
-	__u32	grh_flow_label;
-	__u16	dlid;
-	__u16	reserved;
-	__u8	grh_sgid_index;
-	__u8	grh_hop_limit;
-	__u8	grh_traffic_class;
-	__u8	sl;
-	__u8	src_path_bits;
-	__u8	static_rate;
-	__u8	is_global;
-	__u8	port_num;
-};
-
-struct ib_ucm_init_qp_attr_resp {
-	__u32	qp_attr_mask;
-	__u32	qp_state;
-	__u32	cur_qp_state;
-	__u32	path_mtu;
-	__u32	path_mig_state;
-	__u32	qkey;
-	__u32	rq_psn;
-	__u32	sq_psn;
-	__u32	dest_qp_num;
-	__u32	qp_access_flags;
-
-	struct ib_ucm_ah_attr	ah_attr;
-	struct ib_ucm_ah_attr	alt_ah_attr;
-
-	/* ib_qp_cap */
-	__u32	max_send_wr;
-	__u32	max_recv_wr;
-	__u32	max_send_sge;
-	__u32	max_recv_sge;
-	__u32	max_inline_data;
-
-	__u16	pkey_index;
-	__u16	alt_pkey_index;
-	__u8	en_sqd_async_notify;
-	__u8	sq_draining;
-	__u8	max_rd_atomic;
-	__u8	max_dest_rd_atomic;
-	__u8	min_rnr_timer;
-	__u8	port_num;
-	__u8	timeout;
-	__u8	retry_cnt;
-	__u8	rnr_retry;
-	__u8	alt_port_num;
-	__u8	alt_timeout;
-};
-
 struct ib_ucm_listen {
 	__be64 service_id;
 	__be64 service_mask;
@@ -180,28 +128,6 @@ struct ib_ucm_private_data {
 	__u8  reserved[3];
 };
 
-struct ib_ucm_path_rec {
-	__u8  dgid[16];
-	__u8  sgid[16];
-	__be16 dlid;
-	__be16 slid;
-	__u32 raw_traffic;
-	__be32 flow_label;
-	__u32 reversible;
-	__u32 mtu;
-	__be16 pkey;
-	__u8  hop_limit;
-	__u8  traffic_class;
-	__u8  numb_path;
-	__u8  sl;
-	__u8  mtu_selector;
-	__u8  rate_selector;
-	__u8  rate;
-	__u8  packet_life_time_selector;
-	__u8  packet_life_time;
-	__u8  preference;
-};
-
 struct ib_ucm_req {
 	__u32 id;
 	__u32 qpn;
@@ -274,7 +200,7 @@ struct ib_ucm_sidr_req {
 	__be64 sid;
 	__u64 data;
 	__u64 path;
-	__u16 pkey;
+	__u16 reserved_pkey;
 	__u8  len;
 	__u8  max_cm_retries;
 	__u8  reserved[4];
@@ -304,8 +230,8 @@ struct ib_ucm_event_get {
 };
 
 struct ib_ucm_req_event_resp {
-	struct ib_ucm_path_rec primary_path;
-	struct ib_ucm_path_rec alternate_path;
+	struct ib_user_path_rec primary_path;
+	struct ib_user_path_rec alternate_path;
 	__be64                 remote_ca_guid;
 	__u32                  remote_qkey;
 	__u32                  remote_qpn;
@@ -349,7 +275,7 @@ struct ib_ucm_mra_event_resp {
 };
 
 struct ib_ucm_lap_event_resp {
-	struct ib_ucm_path_rec path;
+	struct ib_user_path_rec path;
 };
 
 struct ib_ucm_apr_event_resp {
diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h
new file mode 100644
index 0000000000000000000000000000000000000000..659120157e14b320c13c16e09fe2dc331db92514
--- /dev/null
+++ b/include/rdma/ib_user_sa.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_SA_H
+#define IB_USER_SA_H
+
+#include <linux/types.h>
+
+struct ib_user_path_rec {
+	__u8	dgid[16];
+	__u8	sgid[16];
+	__be16	dlid;
+	__be16	slid;
+	__u32	raw_traffic;
+	__be32	flow_label;
+	__u32	reversible;
+	__u32	mtu;
+	__be16	pkey;
+	__u8	hop_limit;
+	__u8	traffic_class;
+	__u8	numb_path;
+	__u8	sl;
+	__u8	mtu_selector;
+	__u8	rate_selector;
+	__u8	rate;
+	__u8	packet_life_time_selector;
+	__u8	packet_life_time;
+	__u8	preference;
+};
+
+#endif /* IB_USER_SA_H */
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 338ed433306381c543aa963058804caddb48d7ed..7b5372010f4b0a5168f5b7226389b990fa62aa92 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -32,7 +32,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
+ * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $
  */
 
 #ifndef IB_USER_VERBS_H
@@ -323,6 +323,64 @@ struct ib_uverbs_destroy_cq_resp {
 	__u32 async_events_reported;
 };
 
+struct ib_uverbs_global_route {
+	__u8  dgid[16];
+	__u32 flow_label;
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  reserved;
+};
+
+struct ib_uverbs_ah_attr {
+	struct ib_uverbs_global_route grh;
+	__u16 dlid;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+	__u8  reserved;
+};
+
+struct ib_uverbs_qp_attr {
+	__u32	qp_attr_mask;
+	__u32	qp_state;
+	__u32	cur_qp_state;
+	__u32	path_mtu;
+	__u32	path_mig_state;
+	__u32	qkey;
+	__u32	rq_psn;
+	__u32	sq_psn;
+	__u32	dest_qp_num;
+	__u32	qp_access_flags;
+
+	struct ib_uverbs_ah_attr ah_attr;
+	struct ib_uverbs_ah_attr alt_ah_attr;
+
+	/* ib_qp_cap */
+	__u32	max_send_wr;
+	__u32	max_recv_wr;
+	__u32	max_send_sge;
+	__u32	max_recv_sge;
+	__u32	max_inline_data;
+
+	__u16	pkey_index;
+	__u16	alt_pkey_index;
+	__u8	en_sqd_async_notify;
+	__u8	sq_draining;
+	__u8	max_rd_atomic;
+	__u8	max_dest_rd_atomic;
+	__u8	min_rnr_timer;
+	__u8	port_num;
+	__u8	timeout;
+	__u8	retry_cnt;
+	__u8	rnr_retry;
+	__u8	alt_port_num;
+	__u8	alt_timeout;
+	__u8	reserved[5];
+};
+
 struct ib_uverbs_create_qp {
 	__u64 response;
 	__u64 user_handle;
@@ -541,26 +599,6 @@ struct ib_uverbs_post_srq_recv_resp {
 	__u32 bad_wr;
 };
 
-struct ib_uverbs_global_route {
-	__u8  dgid[16];
-	__u32 flow_label;
-	__u8  sgid_index;
-	__u8  hop_limit;
-	__u8  traffic_class;
-	__u8  reserved;
-};
-
-struct ib_uverbs_ah_attr {
-	struct ib_uverbs_global_route grh;
-	__u16 dlid;
-	__u8  sl;
-	__u8  src_path_bits;
-	__u8  static_rate;
-	__u8  is_global;
-	__u8  port_num;
-	__u8  reserved;
-};
-
 struct ib_uverbs_create_ah {
 	__u64 response;
 	__u64 user_handle;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6bbf1b364400568b283215e37b6e9bec776dd84b..ee1f3a355666dbddaa388fc3d613bc329c24c9d3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -260,7 +260,8 @@ enum ib_event_type {
 	IB_EVENT_SM_CHANGE,
 	IB_EVENT_SRQ_ERR,
 	IB_EVENT_SRQ_LIMIT_REACHED,
-	IB_EVENT_QP_LAST_WQE_REACHED
+	IB_EVENT_QP_LAST_WQE_REACHED,
+	IB_EVENT_CLIENT_REREGISTER
 };
 
 struct ib_event {
@@ -696,8 +697,12 @@ struct ib_ucontext {
 struct ib_uobject {
 	u64			user_handle;	/* handle given to us by userspace */
 	struct ib_ucontext     *context;	/* associated user context */
+	void		       *object;		/* containing object */
 	struct list_head	list;		/* link to context's list */
 	u32			id;		/* index into kernel idr */
+	struct kref		ref;
+	struct rw_semaphore	mutex;		/* protects .live */
+	int			live;
 };
 
 struct ib_umem {
@@ -827,6 +832,7 @@ struct ib_cache {
 	struct ib_event_handler event_handler;
 	struct ib_pkey_cache  **pkey_cache;
 	struct ib_gid_cache   **gid_cache;
+	u8                     *lmc_cache;
 };
 
 struct ib_device {
@@ -1085,6 +1091,20 @@ int ib_dealloc_pd(struct ib_pd *pd);
  */
 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
 
+/**
+ * ib_init_ah_from_wc - Initializes address handle attributes from a
+ *   work completion.
+ * @device: Device on which the received message arrived.
+ * @port_num: Port on which the received message arrived.
+ * @wc: Work completion associated with the received message.
+ * @grh: References the received global route header.  This parameter is
+ *   ignored unless the work completion indicates that the GRH is valid.
+ * @ah_attr: Returned attributes that can be used when creating an address
+ *   handle for replying to the message.
+ */
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
+		       struct ib_grh *grh, struct ib_ah_attr *ah_attr);
+
 /**
  * ib_create_ah_from_wc - Creates an address handle associated with the
  *   sender of the specified work completion.
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
new file mode 100644
index 0000000000000000000000000000000000000000..402c63d7226ba79cba167e8e9a7e8c8f0b6fafa2
--- /dev/null
+++ b/include/rdma/rdma_cm.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#if !defined(RDMA_CM_H)
+#define RDMA_CM_H
+
+#include <linux/socket.h>
+#include <linux/in6.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_sa.h>
+
+/*
+ * Upon receiving a device removal event, users must destroy the associated
+ * RDMA identifier and release all resources allocated with the device.
+ */
+enum rdma_cm_event_type {
+	RDMA_CM_EVENT_ADDR_RESOLVED,
+	RDMA_CM_EVENT_ADDR_ERROR,
+	RDMA_CM_EVENT_ROUTE_RESOLVED,
+	RDMA_CM_EVENT_ROUTE_ERROR,
+	RDMA_CM_EVENT_CONNECT_REQUEST,
+	RDMA_CM_EVENT_CONNECT_RESPONSE,
+	RDMA_CM_EVENT_CONNECT_ERROR,
+	RDMA_CM_EVENT_UNREACHABLE,
+	RDMA_CM_EVENT_REJECTED,
+	RDMA_CM_EVENT_ESTABLISHED,
+	RDMA_CM_EVENT_DISCONNECTED,
+	RDMA_CM_EVENT_DEVICE_REMOVAL,
+};
+
+enum rdma_port_space {
+	RDMA_PS_SDP  = 0x0001,
+	RDMA_PS_TCP  = 0x0106,
+	RDMA_PS_UDP  = 0x0111,
+	RDMA_PS_SCTP = 0x0183
+};
+
+struct rdma_addr {
+	struct sockaddr src_addr;
+	u8		src_pad[sizeof(struct sockaddr_in6) -
+				sizeof(struct sockaddr)];
+	struct sockaddr dst_addr;
+	u8		dst_pad[sizeof(struct sockaddr_in6) -
+				sizeof(struct sockaddr)];
+	struct rdma_dev_addr dev_addr;
+};
+
+struct rdma_route {
+	struct rdma_addr addr;
+	struct ib_sa_path_rec *path_rec;
+	int num_paths;
+};
+
+struct rdma_cm_event {
+	enum rdma_cm_event_type	 event;
+	int			 status;
+	void			*private_data;
+	u8			 private_data_len;
+};
+
+struct rdma_cm_id;
+
+/**
+ * rdma_cm_event_handler - Callback used to report user events.
+ *
+ * Notes: Users may not call rdma_destroy_id from this callback to destroy
+ *   the passed in id, or a corresponding listen id.  Returning a
+ *   non-zero value from the callback will destroy the passed in id.
+ */
+typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
+				     struct rdma_cm_event *event);
+
+struct rdma_cm_id {
+	struct ib_device	*device;
+	void			*context;
+	struct ib_qp		*qp;
+	rdma_cm_event_handler	 event_handler;
+	struct rdma_route	 route;
+	enum rdma_port_space	 ps;
+	u8			 port_num;
+};
+
+/**
+ * rdma_create_id - Create an RDMA identifier.
+ *
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned rdma_id.
+ * @context: User specified context associated with the id.
+ * @ps: RDMA port space.
+ */
+struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+				  void *context, enum rdma_port_space ps);
+
+void rdma_destroy_id(struct rdma_cm_id *id);
+
+/**
+ * rdma_bind_addr - Bind an RDMA identifier to a source address and
+ *   associated RDMA device, if needed.
+ *
+ * @id: RDMA identifier.
+ * @addr: Local address information.  Wildcard values are permitted.
+ *
+ * This associates a source address with the RDMA identifier before calling
+ * rdma_listen.  If a specific local address is given, the RDMA identifier will
+ * be bound to a local RDMA device.
+ */
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_resolve_addr - Resolve destination and optional source addresses
+ *   from IP addresses to an RDMA address.  If successful, the specified
+ *   rdma_cm_id will be bound to a local device.
+ *
+ * @id: RDMA identifier.
+ * @src_addr: Source address information.  This parameter may be NULL.
+ * @dst_addr: Destination address information.
+ * @timeout_ms: Time to wait for resolution to complete.
+ */
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      struct sockaddr *dst_addr, int timeout_ms);
+
+/**
+ * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
+ *   into route information needed to establish a connection.
+ *
+ * This is called on the client side of a connection.
+ * Users must have first called rdma_resolve_addr to resolve a dst_addr
+ * into an RDMA address before calling this routine.
+ */
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+
+/**
+ * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
+ * identifier.
+ *
+ * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA
+ * through their states.
+ */
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+		   struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA
+ * identifier.
+ *
+ * Users must destroy any QP associated with an RDMA identifier before
+ * destroying the RDMA ID.
+ */
+void rdma_destroy_qp(struct rdma_cm_id *id);
+
+/**
+ * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning
+ *   to a specified QP state.
+ * @id: Communication identifier associated with the QP attributes to
+ *   initialize.
+ * @qp_attr: On input, specifies the desired QP state.  On output, the
+ *   mandatory and desired optional attributes will be set in order to
+ *   modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ *   QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state.  This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes.  Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ *
+ * Users that wish to have their QP automatically transitioned through its
+ * states can associate a QP with the rdma_cm_id by calling rdma_create_qp().
+ */
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+struct rdma_conn_param {
+	const void *private_data;
+	u8 private_data_len;
+	u8 responder_resources;
+	u8 initiator_depth;
+	u8 flow_control;
+	u8 retry_count;		/* ignored when accepting */
+	u8 rnr_retry_count;
+	/* Fields below ignored if a QP is created on the rdma_cm_id. */
+	u8 srq;
+	u32 qp_num;
+	enum ib_qp_type qp_type;
+};
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with
+ * by having called rdma_resolve_route before calling this routine.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_listen - This function is called by the passive side to
+ *   listen for incoming connection requests.
+ *
+ * Users must have bound the rdma_cm_id to a local address by calling
+ * rdma_bind_addr before calling this routine.
+ */
+int rdma_listen(struct rdma_cm_id *id, int backlog);
+
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection.  This must be
+ *   provided if accepting a connection request.  If accepting a connection
+ *   response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request.  It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_reject - Called to reject a connection request or response.
+ */
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+		u8 private_data_len);
+
+/**
+ * rdma_disconnect - This function disconnects the associated QP and
+ *   transitions it into the error state.
+ */
+int rdma_disconnect(struct rdma_cm_id *id);
+
+#endif /* RDMA_CM_H */
+
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
new file mode 100644
index 0000000000000000000000000000000000000000..e8c3af1804d469e84053d9232897296fca587f65
--- /dev/null
+++ b/include/rdma/rdma_cm_ib.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#if !defined(RDMA_CM_IB_H)
+#define RDMA_CM_IB_H
+
+#include <rdma/rdma_cm.h>
+
+/**
+ * rdma_set_ib_paths - Manually sets the path records used to establish a
+ *   connection.
+ * @id: Connection identifier associated with the request.
+ * @path_rec: Reference to the path record
+ *
+ * This call permits a user to specify routing information for rdma_cm_id's
+ * bound to Infiniband devices.  It is called on the client side of a
+ * connection and replaces the call to rdma_resolve_route.
+ */
+int rdma_set_ib_paths(struct rdma_cm_id *id,
+		      struct ib_sa_path_rec *path_rec, int num_paths);
+
+#endif /* RDMA_CM_IB_H */
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index 637f77eccf0c77d491a6c6a4e7d6b54ed115f645..ad178fa78f665a69fba3b4d26e66c9aee9443d75 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -87,6 +87,11 @@ enum srp_login_rej_reason {
 	SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED		= 0x00010006
 };
 
+enum {
+	SRP_REV10_IB_IO_CLASS	= 0xff00,
+	SRP_REV16A_IB_IO_CLASS	= 0x0100
+};
+
 struct srp_direct_buf {
 	__be64	va;
 	__be32	key;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cdde963909603aae61b7398afb09112dde407e52..31387abf53a21630ebbd9c2cc9993cfa715a2ade 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -666,3 +666,4 @@ void __init ip_fib_init(void)
 }
 
 EXPORT_SYMBOL(inet_addr_type);
+EXPORT_SYMBOL(ip_dev_find);