uverbs_main.c 23.7 KB
Newer Older
1
2
/*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4
5
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6
 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/mount.h>
47
#include <linux/cdev.h>
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

#include <asm/uaccess.h>

#include "uverbs.h"

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");

#define INFINIBANDEVENTFS_MAGIC	0x49426576	/* "IBev" */

enum {
	IB_UVERBS_MAJOR       = 231,
	IB_UVERBS_BASE_MINOR  = 192,
	IB_UVERBS_MAX_DEVICES = 32
};

#define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)

67
68
static struct class *uverbs_class;

69
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
70
71
72
73
74
75
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
76
DEFINE_IDR(ib_uverbs_srq_idr);
77
78

static spinlock_t map_lock;
79
static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
80
81
82
83
84
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);

static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
				     const char __user *buf, int in_len,
				     int out_len) = {
85
86
87
88
89
90
91
92
93
	[IB_USER_VERBS_CMD_GET_CONTEXT]   	= ib_uverbs_get_context,
	[IB_USER_VERBS_CMD_QUERY_DEVICE]  	= ib_uverbs_query_device,
	[IB_USER_VERBS_CMD_QUERY_PORT]    	= ib_uverbs_query_port,
	[IB_USER_VERBS_CMD_ALLOC_PD]      	= ib_uverbs_alloc_pd,
	[IB_USER_VERBS_CMD_DEALLOC_PD]    	= ib_uverbs_dealloc_pd,
	[IB_USER_VERBS_CMD_REG_MR]        	= ib_uverbs_reg_mr,
	[IB_USER_VERBS_CMD_DEREG_MR]      	= ib_uverbs_dereg_mr,
	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
	[IB_USER_VERBS_CMD_CREATE_CQ]     	= ib_uverbs_create_cq,
94
	[IB_USER_VERBS_CMD_RESIZE_CQ]     	= ib_uverbs_resize_cq,
95
96
	[IB_USER_VERBS_CMD_POLL_CQ]     	= ib_uverbs_poll_cq,
	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]     	= ib_uverbs_req_notify_cq,
97
98
	[IB_USER_VERBS_CMD_DESTROY_CQ]    	= ib_uverbs_destroy_cq,
	[IB_USER_VERBS_CMD_CREATE_QP]     	= ib_uverbs_create_qp,
99
	[IB_USER_VERBS_CMD_QUERY_QP]     	= ib_uverbs_query_qp,
100
101
	[IB_USER_VERBS_CMD_MODIFY_QP]     	= ib_uverbs_modify_qp,
	[IB_USER_VERBS_CMD_DESTROY_QP]    	= ib_uverbs_destroy_qp,
102
103
104
105
106
	[IB_USER_VERBS_CMD_POST_SEND]    	= ib_uverbs_post_send,
	[IB_USER_VERBS_CMD_POST_RECV]    	= ib_uverbs_post_recv,
	[IB_USER_VERBS_CMD_POST_SRQ_RECV]    	= ib_uverbs_post_srq_recv,
	[IB_USER_VERBS_CMD_CREATE_AH]    	= ib_uverbs_create_ah,
	[IB_USER_VERBS_CMD_DESTROY_AH]    	= ib_uverbs_destroy_ah,
107
108
109
110
	[IB_USER_VERBS_CMD_ATTACH_MCAST]  	= ib_uverbs_attach_mcast,
	[IB_USER_VERBS_CMD_DETACH_MCAST]  	= ib_uverbs_detach_mcast,
	[IB_USER_VERBS_CMD_CREATE_SRQ]    	= ib_uverbs_create_srq,
	[IB_USER_VERBS_CMD_MODIFY_SRQ]    	= ib_uverbs_modify_srq,
111
	[IB_USER_VERBS_CMD_QUERY_SRQ]     	= ib_uverbs_query_srq,
112
	[IB_USER_VERBS_CMD_DESTROY_SRQ]   	= ib_uverbs_destroy_srq,
113
114
115
116
117
118
119
};

static struct vfsmount *uverbs_event_mnt;

static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);

120
121
122
123
124
static void ib_uverbs_release_dev(struct kref *ref)
{
	struct ib_uverbs_device *dev =
		container_of(ref, struct ib_uverbs_device, ref);

125
	complete(&dev->comp);
126
127
}

128
129
130
131
132
133
134
135
static void ib_uverbs_release_event_file(struct kref *ref)
{
	struct ib_uverbs_event_file *file =
		container_of(ref, struct ib_uverbs_event_file, ref);

	kfree(file);
}

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
			  struct ib_uverbs_event_file *ev_file,
			  struct ib_ucq_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	if (ev_file) {
		spin_lock_irq(&ev_file->lock);
		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
			list_del(&evt->list);
			kfree(evt);
		}
		spin_unlock_irq(&ev_file->lock);

		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
	}

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
			      struct ib_uevent_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

174
175
176
177
178
179
180
181
182
183
184
185
static void ib_uverbs_detach_umcast(struct ib_qp *qp,
				    struct ib_uqp_object *uobj)
{
	struct ib_uverbs_mcast_entry *mcast, *tmp;

	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
		list_del(&mcast->list);
		kfree(mcast);
	}
}

186
187
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
				      struct ib_ucontext *context)
188
189
190
191
192
193
{
	struct ib_uobject *uobj, *tmp;

	if (!context)
		return 0;

194
195
	context->closing = 1;

196
	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
197
198
199
		struct ib_ah *ah = uobj->object;

		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
200
201
202
		ib_destroy_ah(ah);
		kfree(uobj);
	}
203
204

	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
205
		struct ib_qp *qp = uobj->object;
206
207
		struct ib_uqp_object *uqp =
			container_of(uobj, struct ib_uqp_object, uevent.uobject);
208
209

		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
210
		ib_uverbs_detach_umcast(qp, uqp);
211
		ib_destroy_qp(qp);
212
213
		ib_uverbs_release_uevent(file, &uqp->uevent);
		kfree(uqp);
214
215
216
	}

	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
217
		struct ib_cq *cq = uobj->object;
218
219
220
		struct ib_uverbs_event_file *ev_file = cq->cq_context;
		struct ib_ucq_object *ucq =
			container_of(uobj, struct ib_ucq_object, uobject);
221
222

		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
223
		ib_destroy_cq(cq);
224
225
		ib_uverbs_release_ucq(file, ev_file, ucq);
		kfree(ucq);
226
227
	}

228
	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
229
		struct ib_srq *srq = uobj->object;
230
231
		struct ib_uevent_object *uevent =
			container_of(uobj, struct ib_uevent_object, uobject);
232
233

		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
234
		ib_destroy_srq(srq);
235
236
		ib_uverbs_release_uevent(file, uevent);
		kfree(uevent);
237
238
	}

239
240
241
	/* XXX Free MWs */

	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
242
		struct ib_mr *mr = uobj->object;
243

244
		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
245
		ib_dereg_mr(mr);
246
		kfree(uobj);
247
248
249
	}

	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
250
251
252
		struct ib_pd *pd = uobj->object;

		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
253
254
255
256
257
258
259
260
261
262
263
264
265
		ib_dealloc_pd(pd);
		kfree(uobj);
	}

	return context->device->dealloc_ucontext(context);
}

static void ib_uverbs_release_file(struct kref *ref)
{
	struct ib_uverbs_file *file =
		container_of(ref, struct ib_uverbs_file, ref);

	module_put(file->device->ib_dev->owner);
266
267
	kref_put(&file->device->ref, ib_uverbs_release_dev);

268
269
270
271
272
273
274
	kfree(file);
}

static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
				    size_t count, loff_t *pos)
{
	struct ib_uverbs_event_file *file = filp->private_data;
275
	struct ib_uverbs_event *event;
276
277
278
279
280
	int eventsz;
	int ret = 0;

	spin_lock_irq(&file->lock);

281
	while (list_empty(&file->event_list)) {
282
283
284
285
286
287
		spin_unlock_irq(&file->lock);

		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;

		if (wait_event_interruptible(file->poll_wait,
288
					     !list_empty(&file->event_list)))
289
290
291
292
293
			return -ERESTARTSYS;

		spin_lock_irq(&file->lock);
	}

294
295
296
	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);

	if (file->is_async)
297
		eventsz = sizeof (struct ib_uverbs_async_event_desc);
298
	else
299
300
301
302
303
		eventsz = sizeof (struct ib_uverbs_comp_event_desc);

	if (eventsz > count) {
		ret   = -EINVAL;
		event = NULL;
304
	} else {
305
		list_del(file->event_list.next);
306
307
308
309
310
		if (event->counter) {
			++(*event->counter);
			list_del(&event->obj_list);
		}
	}
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

	spin_unlock_irq(&file->lock);

	if (event) {
		if (copy_to_user(buf, event, eventsz))
			ret = -EFAULT;
		else
			ret = eventsz;
	}

	kfree(event);

	return ret;
}

static unsigned int ib_uverbs_event_poll(struct file *filp,
					 struct poll_table_struct *wait)
{
	unsigned int pollflags = 0;
	struct ib_uverbs_event_file *file = filp->private_data;

	poll_wait(filp, &file->poll_wait, wait);

	spin_lock_irq(&file->lock);
335
	if (!list_empty(&file->event_list))
336
337
338
339
340
341
		pollflags = POLLIN | POLLRDNORM;
	spin_unlock_irq(&file->lock);

	return pollflags;
}

342
343
344
345
346
347
348
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
{
	struct ib_uverbs_event_file *file = filp->private_data;

	return fasync_helper(fd, filp, on, &file->async_queue);
}

349
350
351
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_event_file *file = filp->private_data;
352
353
354
	struct ib_uverbs_event *entry, *tmp;

	spin_lock_irq(&file->lock);
355
	file->is_closed = 1;
356
357
358
359
360
361
	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
		if (entry->counter)
			list_del(&entry->obj_list);
		kfree(entry);
	}
	spin_unlock_irq(&file->lock);
362

363
	ib_uverbs_event_fasync(-1, filp, 0);
364
365
366
367
368
369

	if (file->is_async) {
		ib_unregister_event_handler(&file->uverbs_file->event_handler);
		kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
	}
	kref_put(&file->ref, ib_uverbs_release_event_file);
370
371
372
373

	return 0;
}

374
static const struct file_operations uverbs_event_fops = {
375
	.owner	 = THIS_MODULE,
376
377
	.read 	 = ib_uverbs_event_read,
	.poll    = ib_uverbs_event_poll,
378
379
	.release = ib_uverbs_event_close,
	.fasync  = ib_uverbs_event_fasync
380
381
382
383
};

void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
384
385
386
387
388
389
390
391
392
	struct ib_uverbs_event_file    *file = cq_context;
	struct ib_ucq_object	       *uobj;
	struct ib_uverbs_event	       *entry;
	unsigned long			flags;

	if (!file)
		return;

	spin_lock_irqsave(&file->lock, flags);
393
	if (file->is_closed) {
394
395
396
		spin_unlock_irqrestore(&file->lock, flags);
		return;
	}
397
398

	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
399
400
	if (!entry) {
		spin_unlock_irqrestore(&file->lock, flags);
401
		return;
402
	}
403

404
405
406
407
	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);

	entry->desc.comp.cq_handle = cq->uobject->user_handle;
	entry->counter		   = &uobj->comp_events_reported;
408

409
	list_add_tail(&entry->list, &file->event_list);
410
	list_add_tail(&entry->obj_list, &uobj->comp_list);
411
	spin_unlock_irqrestore(&file->lock, flags);
412

413
414
	wake_up_interruptible(&file->poll_wait);
	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
415
416
417
}

static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
418
419
420
				    __u64 element, __u64 event,
				    struct list_head *obj_list,
				    u32 *counter)
421
{
422
	struct ib_uverbs_event *entry;
423
424
	unsigned long flags;

425
	spin_lock_irqsave(&file->async_file->lock, flags);
426
	if (!file->async_file->is_closed) {
427
428
429
430
		spin_unlock_irqrestore(&file->async_file->lock, flags);
		return;
	}

431
	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
432
433
	if (!entry) {
		spin_unlock_irqrestore(&file->async_file->lock, flags);
434
		return;
435
	}
436

437
438
439
	entry->desc.async.element    = element;
	entry->desc.async.event_type = event;
	entry->counter               = counter;
440

441
	list_add_tail(&entry->list, &file->async_file->event_list);
442
443
	if (obj_list)
		list_add_tail(&entry->obj_list, obj_list);
444
	spin_unlock_irqrestore(&file->async_file->lock, flags);
445

446
447
	wake_up_interruptible(&file->async_file->poll_wait);
	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
448
449
450
451
}

void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
452
453
	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
						  struct ib_ucq_object, uobject);
454

455
	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
456
457
				event->event, &uobj->async_list,
				&uobj->async_events_reported);
458
459
460
461
}

void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
462
463
464
465
466
467
468
469
	struct ib_uevent_object *uobj;

	uobj = container_of(event->element.qp->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
470
471
}

472
473
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
474
475
476
477
478
479
480
481
	struct ib_uevent_object *uobj;

	uobj = container_of(event->element.srq->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
482
483
}

484
485
void ib_uverbs_event_handler(struct ib_event_handler *handler,
			     struct ib_event *event)
486
487
488
489
{
	struct ib_uverbs_file *file =
		container_of(handler, struct ib_uverbs_file, event_handler);

490
491
	ib_uverbs_async_handler(file, event->element.port_num, event->event,
				NULL, NULL);
492
493
}

494
495
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
					int is_async, int *fd)
496
{
497
	struct ib_uverbs_event_file *ev_file;
498
	struct file *filp;
499
	int ret;
500

501
502
503
504
505
506
507
508
509
510
511
	ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
	if (!ev_file)
		return ERR_PTR(-ENOMEM);

	kref_init(&ev_file->ref);
	spin_lock_init(&ev_file->lock);
	INIT_LIST_HEAD(&ev_file->event_list);
	init_waitqueue_head(&ev_file->poll_wait);
	ev_file->uverbs_file = uverbs_file;
	ev_file->async_queue = NULL;
	ev_file->is_async    = is_async;
512
	ev_file->is_closed   = 0;
513
514
515
516
517
518

	*fd = get_unused_fd();
	if (*fd < 0) {
		ret = *fd;
		goto err;
	}
519
520
521

	filp = get_empty_filp();
	if (!filp) {
522
523
		ret = -ENFILE;
		goto err_fd;
524
525
	}

526
527
528
529
530
531
	/*
	 * fops_get() can't fail here, because we're coming from a
	 * system call on a uverbs file, which will already have a
	 * module reference.
	 */
	filp->f_op 	   = fops_get(&uverbs_event_fops);
532
533
534
	filp->f_path.mnt 	   = mntget(uverbs_event_mnt);
	filp->f_path.dentry 	   = dget(uverbs_event_mnt->mnt_root);
	filp->f_mapping    = filp->f_path.dentry->d_inode->i_mapping;
535
536
	filp->f_flags      = O_RDONLY;
	filp->f_mode       = FMODE_READ;
537
	filp->private_data = ev_file;
538

539
	return filp;
540

541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
err_fd:
	put_unused_fd(*fd);

err:
	kfree(ev_file);
	return ERR_PTR(ret);
}

/*
 * Look up a completion event file by FD.  If lookup is successful,
 * takes a ref to the event file struct that it returns; if
 * unsuccessful, returns NULL.
 */
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
	struct ib_uverbs_event_file *ev_file = NULL;
	struct file *filp;

	filp = fget(fd);
	if (!filp)
		return NULL;

	if (filp->f_op != &uverbs_event_fops)
		goto out;

	ev_file = filp->private_data;
	if (ev_file->is_async) {
		ev_file = NULL;
		goto out;
	}

	kref_get(&ev_file->ref);

out:
	fput(filp);
	return ev_file;
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
}

static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
			     size_t count, loff_t *pos)
{
	struct ib_uverbs_file *file = filp->private_data;
	struct ib_uverbs_cmd_hdr hdr;

	if (count < sizeof hdr)
		return -EINVAL;

	if (copy_from_user(&hdr, buf, sizeof hdr))
		return -EFAULT;

	if (hdr.in_words * 4 != count)
		return -EINVAL;

594
595
	if (hdr.command < 0				||
	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
596
597
	    !uverbs_cmd_table[hdr.command]		||
	    !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
598
599
		return -EINVAL;

600
	if (!file->ucontext &&
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
	    hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
		return -EINVAL;

	return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
					     hdr.in_words * 4, hdr.out_words * 4);
}

static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct ib_uverbs_file *file = filp->private_data;

	if (!file->ucontext)
		return -ENODEV;
	else
		return file->device->ib_dev->mmap(file->ucontext, vma);
}

static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
620
	struct ib_uverbs_device *dev;
621
	struct ib_uverbs_file *file;
622
	int ret;
623

624
625
626
627
628
629
630
631
632
633
634
635
636
	spin_lock(&map_lock);
	dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
	if (dev)
		kref_get(&dev->ref);
	spin_unlock(&map_lock);

	if (!dev)
		return -ENXIO;

	if (!try_module_get(dev->ib_dev->owner)) {
		ret = -ENODEV;
		goto err;
	}
637

638
	file = kmalloc(sizeof *file, GFP_KERNEL);
639
	if (!file) {
640
641
		ret = -ENOMEM;
		goto err_module;
642
	}
643

644
645
646
	file->device	 = dev;
	file->ucontext	 = NULL;
	file->async_file = NULL;
647
	kref_init(&file->ref);
648
	mutex_init(&file->mutex);
649
650
651
652

	filp->private_data = file;

	return 0;
653
654
655
656
657
658
659
660

err_module:
	module_put(dev->ib_dev->owner);

err:
	kref_put(&dev->ref, ib_uverbs_release_dev);

	return ret;
661
662
663
664
665
666
}

static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_file *file = filp->private_data;

667
668
669
670
	ib_uverbs_cleanup_ucontext(file, file->ucontext);

	if (file->async_file)
		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
671
672
673
674
675
676

	kref_put(&file->ref, ib_uverbs_release_file);

	return 0;
}

677
static const struct file_operations uverbs_fops = {
678
679
680
681
682
683
	.owner 	 = THIS_MODULE,
	.write 	 = ib_uverbs_write,
	.open 	 = ib_uverbs_open,
	.release = ib_uverbs_close
};

684
static const struct file_operations uverbs_mmap_fops = {
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
	.owner 	 = THIS_MODULE,
	.write 	 = ib_uverbs_write,
	.mmap    = ib_uverbs_mmap,
	.open 	 = ib_uverbs_open,
	.release = ib_uverbs_close
};

static struct ib_client uverbs_client = {
	.name   = "uverbs",
	.add    = ib_uverbs_add_one,
	.remove = ib_uverbs_remove_one
};

static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
700
701
702
703
	struct ib_uverbs_device *dev = class_get_devdata(class_dev);

	if (!dev)
		return -ENODEV;
704
705
706
707
708

	return sprintf(buf, "%s\n", dev->ib_dev->name);
}
static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);

709
710
static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf)
{
711
712
713
714
	struct ib_uverbs_device *dev = class_get_devdata(class_dev);

	if (!dev)
		return -ENODEV;
715
716
717
718
719

	return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
}
static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);

720
721
722
723
724
725
726
727
728
729
730
731
732
static ssize_t show_abi_version(struct class *class, char *buf)
{
	return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);

static void ib_uverbs_add_one(struct ib_device *device)
{
	struct ib_uverbs_device *uverbs_dev;

	if (!device->alloc_ucontext)
		return;

Roland Dreier's avatar
Roland Dreier committed
733
	uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
734
735
736
	if (!uverbs_dev)
		return;

737
	kref_init(&uverbs_dev->ref);
738
	init_completion(&uverbs_dev->comp);
739

740
741
742
743
744
745
746
747
748
	spin_lock(&map_lock);
	uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
	if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
		spin_unlock(&map_lock);
		goto err;
	}
	set_bit(uverbs_dev->devnum, dev_map);
	spin_unlock(&map_lock);

749
	uverbs_dev->ib_dev           = device;
750
	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
751

752
753
	uverbs_dev->dev = cdev_alloc();
	if (!uverbs_dev->dev)
754
		goto err;
755
756
757
758
759
	uverbs_dev->dev->owner = THIS_MODULE;
	uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
	kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum);
	if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
		goto err_cdev;
760

761
762
	uverbs_dev->class_dev = class_device_create(uverbs_class, NULL,
						    uverbs_dev->dev->dev,
763
764
765
						    device->dma_device,
						    "uverbs%d", uverbs_dev->devnum);
	if (IS_ERR(uverbs_dev->class_dev))
766
767
		goto err_cdev;

768
769
770
	class_set_devdata(uverbs_dev->class_dev, uverbs_dev);

	if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev))
771
		goto err_class;
772
	if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version))
773
		goto err_class;
774

775
776
777
778
	spin_lock(&map_lock);
	dev_table[uverbs_dev->devnum] = uverbs_dev;
	spin_unlock(&map_lock);

779
780
781
782
783
	ib_set_client_data(device, &uverbs_client, uverbs_dev);

	return;

err_class:
784
	class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
785
786

err_cdev:
787
	cdev_del(uverbs_dev->dev);
788
789
790
	clear_bit(uverbs_dev->devnum, dev_map);

err:
791
	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
792
793
	wait_for_completion(&uverbs_dev->comp);
	kfree(uverbs_dev);
794
795
796
797
798
799
800
801
802
803
	return;
}

static void ib_uverbs_remove_one(struct ib_device *device)
{
	struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);

	if (!uverbs_dev)
		return;

804
805
806
807
808
809
810
811
812
	class_set_devdata(uverbs_dev->class_dev, NULL);
	class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
	cdev_del(uverbs_dev->dev);

	spin_lock(&map_lock);
	dev_table[uverbs_dev->devnum] = NULL;
	spin_unlock(&map_lock);

	clear_bit(uverbs_dev->devnum, dev_map);
813

814
	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
815
816
	wait_for_completion(&uverbs_dev->comp);
	kfree(uverbs_dev);
817
818
}

819
820
821
static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
			       const char *dev_name, void *data,
			       struct vfsmount *mnt)
822
823
{
	return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
824
			     INFINIBANDEVENTFS_MAGIC, mnt);
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
}

static struct file_system_type uverbs_event_fs = {
	/* No owner field so module can be unloaded */
	.name    = "infinibandeventfs",
	.get_sb  = uverbs_event_get_sb,
	.kill_sb = kill_litter_super
};

static int __init ib_uverbs_init(void)
{
	int ret;

	spin_lock_init(&map_lock);

	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
				     "infiniband_verbs");
	if (ret) {
		printk(KERN_ERR "user_verbs: couldn't register device number\n");
		goto out;
	}

847
848
849
	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
	if (IS_ERR(uverbs_class)) {
		ret = PTR_ERR(uverbs_class);
850
851
852
853
		printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
		goto out_chrdev;
	}

854
	ret = class_create_file(uverbs_class, &class_attr_abi_version);
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
	if (ret) {
		printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
		goto out_class;
	}

	ret = register_filesystem(&uverbs_event_fs);
	if (ret) {
		printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
		goto out_class;
	}

	uverbs_event_mnt = kern_mount(&uverbs_event_fs);
	if (IS_ERR(uverbs_event_mnt)) {
		ret = PTR_ERR(uverbs_event_mnt);
		printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
		goto out_fs;
	}

	ret = ib_register_client(&uverbs_client);
	if (ret) {
		printk(KERN_ERR "user_verbs: couldn't register client\n");
		goto out_mnt;
	}

	return 0;

out_mnt:
	mntput(uverbs_event_mnt);

out_fs:
	unregister_filesystem(&uverbs_event_fs);

out_class:
888
	class_destroy(uverbs_class);
889
890
891
892
893
894
895
896
897
898
899
900
901

out_chrdev:
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);

out:
	return ret;
}

static void __exit ib_uverbs_cleanup(void)
{
	ib_unregister_client(&uverbs_client);
	mntput(uverbs_event_mnt);
	unregister_filesystem(&uverbs_event_fs);
902
	class_destroy(uverbs_class);
903
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
904
905
906
907
908
909
910
	idr_destroy(&ib_uverbs_pd_idr);
	idr_destroy(&ib_uverbs_mr_idr);
	idr_destroy(&ib_uverbs_mw_idr);
	idr_destroy(&ib_uverbs_ah_idr);
	idr_destroy(&ib_uverbs_cq_idr);
	idr_destroy(&ib_uverbs_qp_idr);
	idr_destroy(&ib_uverbs_srq_idr);
911
912
913
914
}

module_init(ib_uverbs_init);
module_exit(ib_uverbs_cleanup);