netvsc_drv.c 12.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19
20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21
22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/init.h>
24
#include <linux/atomic.h>
25
26
27
28
29
30
31
32
33
34
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/in.h>
35
#include <linux/slab.h>
36
37
38
39
#include <net/arp.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
40

41
#include "hyperv_net.h"
42
43

struct net_device_context {
44
	/* point back to our device context */
45
	struct hv_device *device_ctx;
46
	struct delayed_work dwork;
47
48
49
};


50
static int ring_size = 128;
51
52
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
53

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
struct set_multicast_work {
	struct work_struct work;
	struct net_device *net;
};

static void do_set_multicast(struct work_struct *w)
{
	struct set_multicast_work *swk =
		container_of(w, struct set_multicast_work, work);
	struct net_device *net = swk->net;

	struct net_device_context *ndevctx = netdev_priv(net);
	struct netvsc_device *nvdev;
	struct rndis_device *rdev;

	nvdev = hv_get_drvdata(ndevctx->device_ctx);
	if (nvdev == NULL)
71
		goto out;
72
73
74

	rdev = nvdev->extension;
	if (rdev == NULL)
75
		goto out;
76
77
78
79
80
81
82
83
84
85

	if (net->flags & IFF_PROMISC)
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_PROMISCUOUS);
	else
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_BROADCAST |
			NDIS_PACKET_TYPE_ALL_MULTICAST |
			NDIS_PACKET_TYPE_DIRECTED);

86
out:
87
88
89
	kfree(w);
}

90
static void netvsc_set_multicast_list(struct net_device *net)
91
{
92
93
94
95
96
97
98
99
	struct set_multicast_work *swk =
		kmalloc(sizeof(struct set_multicast_work), GFP_ATOMIC);
	if (swk == NULL)
		return;

	swk->net = net;
	INIT_WORK(&swk->work, do_set_multicast);
	schedule_work(&swk->work);
100
101
102
103
104
}

static int netvsc_open(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
105
	struct hv_device *device_obj = net_device_ctx->device_ctx;
106
	int ret = 0;
107

108
109
110
111
112
	/* Open up the device */
	ret = rndis_filter_open(device_obj);
	if (ret != 0) {
		netdev_err(net, "unable to open device (ret %d).\n", ret);
		return ret;
113
114
	}

115
116
	netif_start_queue(net);

117
118
119
120
121
122
	return ret;
}

static int netvsc_close(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
123
	struct hv_device *device_obj = net_device_ctx->device_ctx;
124
	int ret;
125
126
127

	netif_stop_queue(net);

128
	ret = rndis_filter_close(device_obj);
129
	if (ret != 0)
130
		netdev_err(net, "unable to close device (ret %d).\n", ret);
131
132
133
134
135
136

	return ret;
}

static void netvsc_xmit_completion(void *context)
{
137
	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
138
	struct sk_buff *skb = (struct sk_buff *)
139
		(unsigned long)packet->completion.send.send_completion_tid;
140
141
142

	kfree(packet);

143
	if (skb)
144
		dev_kfree_skb_any(skb);
145
146
}

147
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
148
149
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
150
	struct hv_netvsc_packet *packet;
151
	int ret;
152
	unsigned int i, num_pages, npg_data;
153

154
155
156
157
	/* Add multipage for skb->data and additional one for RNDIS */
	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 1;
158

159
	/* Allocate a netvsc packet based on # of frags. */
160
	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
161
			 (num_pages * sizeof(struct hv_page_buffer)) +
162
			 sizeof(struct rndis_filter_packet), GFP_ATOMIC);
163
	if (!packet) {
164
		/* out of memory, drop packet */
165
		netdev_err(net, "unable to allocate hv_netvsc_packet\n");
166
167
168

		dev_kfree_skb(skb);
		net->stats.tx_dropped++;
169
		return NETDEV_TX_BUSY;
170
171
	}

172
	packet->extension = (void *)(unsigned long)packet +
173
				sizeof(struct hv_netvsc_packet) +
174
				    (num_pages * sizeof(struct hv_page_buffer));
175

176
	/* Setup the rndis header */
177
	packet->page_buf_cnt = num_pages;
178

179
	/* Initialize it from the skb */
180
	packet->total_data_buflen = skb->len;
181

182
	/* Start filling in the page buffers starting after RNDIS buffer. */
183
184
	packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
	packet->page_buf[1].offset
185
		= (unsigned long)skb->data & (PAGE_SIZE - 1);
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
	if (npg_data == 1)
		packet->page_buf[1].len = skb_headlen(skb);
	else
		packet->page_buf[1].len = PAGE_SIZE
			- packet->page_buf[1].offset;

	for (i = 2; i <= npg_data; i++) {
		packet->page_buf[i].pfn = virt_to_phys(skb->data
			+ PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
		packet->page_buf[i].offset = 0;
		packet->page_buf[i].len = PAGE_SIZE;
	}
	if (npg_data > 1)
		packet->page_buf[npg_data].len = (((unsigned long)skb->data
			+ skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
201
202
203

	/* Additional fragments are after SKB data */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
204
		const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
205

206
207
208
209
		packet->page_buf[i+npg_data+1].pfn =
			page_to_pfn(skb_frag_page(f));
		packet->page_buf[i+npg_data+1].offset = f->page_offset;
		packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
210
211
	}

212
	/* Set the completion routine */
213
214
215
	packet->completion.send.send_completion = netvsc_xmit_completion;
	packet->completion.send.send_completion_ctx = packet;
	packet->completion.send.send_completion_tid = (unsigned long)skb;
216

217
	ret = rndis_filter_send(net_device_ctx->device_ctx,
218
219
				  packet);
	if (ret == 0) {
220
221
		net->stats.tx_bytes += skb->len;
		net->stats.tx_packets++;
222
223
	} else {
		/* we are shutting down or bus overloaded, just drop packet */
224
		net->stats.tx_dropped++;
225
226
		kfree(packet);
		dev_kfree_skb_any(skb);
227
228
	}

229
	return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
230
231
}

232
/*
233
234
 * netvsc_linkstatus_callback - Link up/down notification
 */
235
void netvsc_linkstatus_callback(struct hv_device *device_obj,
236
				       unsigned int status)
237
{
238
	struct net_device *net;
239
	struct net_device_context *ndev_ctx;
240
241
242
243
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(device_obj);
	net = net_device->ndev;
244

245
	if (!net) {
246
247
		netdev_err(net, "got link status but net device "
				"not initialized yet\n");
248
249
250
		return;
	}

251
	if (status == 1) {
252
253
		netif_carrier_on(net);
		netif_wake_queue(net);
254
		ndev_ctx = netdev_priv(net);
255
		schedule_delayed_work(&ndev_ctx->dwork, 0);
256
		schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
257
	} else {
258
259
260
261
262
		netif_carrier_off(net);
		netif_stop_queue(net);
	}
}

263
264
265
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
266
 */
267
int netvsc_recv_callback(struct hv_device *device_obj,
268
				struct hv_netvsc_packet *packet)
269
{
270
	struct net_device *net = dev_get_drvdata(&device_obj->device);
271
	struct sk_buff *skb;
272
273
274
275
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(device_obj);
	net = net_device->ndev;
276

277
	if (!net) {
278
279
		netdev_err(net, "got receive callback but net device"
			" not initialized yet\n");
280
281
282
		return 0;
	}

283
	/* Allocate a skb - TODO direct I/O to pages? */
284
	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
285
286
287
288
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		return 0;
	}
289

290
291
292
293
	/*
	 * Copy to skb. This copy is needed here since the memory pointed by
	 * hv_netvsc_packet cannot be deallocated
	 */
294
295
	memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
		packet->total_data_buflen);
296
297
298
299

	skb->protocol = eth_type_trans(skb, net);
	skb->ip_summed = CHECKSUM_NONE;

300
301
302
	net->stats.rx_packets++;
	net->stats.rx_bytes += skb->len;

303
304
	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
305
306
	 * is done.
	 * TODO - use NAPI?
307
	 */
308
	netif_rx(skb);
309
310
311
312

	return 0;
}

313
314
315
316
317
318
319
320
static void netvsc_get_drvinfo(struct net_device *net,
			       struct ethtool_drvinfo *info)
{
	strcpy(info->driver, "hv_netvsc");
	strcpy(info->version, HV_DRV_VERSION);
	strcpy(info->fw_version, "N/A");
}

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct hv_device *hdev =  ndevctx->device_ctx;
	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
	struct netvsc_device_info device_info;
	int limit = ETH_DATA_LEN;

	if (nvdev == NULL || nvdev->destroy)
		return -ENODEV;

	if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
		limit = NETVSC_MTU;

	if (mtu < 68 || mtu > limit)
		return -EINVAL;

	nvdev->start_remove = true;
	cancel_delayed_work_sync(&ndevctx->dwork);
	netif_stop_queue(ndev);
	rndis_filter_device_remove(hdev);

	ndev->mtu = mtu;

	ndevctx->device_ctx = hdev;
	hv_set_drvdata(hdev, ndev);
	device_info.ring_size = ring_size;
	rndis_filter_device_add(hdev, &device_info);
	netif_wake_queue(ndev);

	return 0;
}

354
355
356
357
358
static const struct ethtool_ops ethtool_ops = {
	.get_drvinfo	= netvsc_get_drvinfo,
	.get_link	= ethtool_op_get_link,
};

359
360
361
362
static const struct net_device_ops device_ops = {
	.ndo_open =			netvsc_open,
	.ndo_stop =			netvsc_close,
	.ndo_start_xmit =		netvsc_start_xmit,
363
	.ndo_set_rx_mode =		netvsc_set_multicast_list,
364
	.ndo_change_mtu =		netvsc_change_mtu,
365
366
	.ndo_validate_addr =		eth_validate_addr,
	.ndo_set_mac_address =		eth_mac_addr,
367
368
};

369
370
371
372
/*
 * Send GARP packet to network peers after migrations.
 * After Quick Migration, the network is not immediately operational in the
 * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
373
 * another netif_notify_peers() into a delayed work, otherwise GARP packet
374
375
376
377
378
379
 * will not be sent after quick migration, and cause network disconnection.
 */
static void netvsc_send_garp(struct work_struct *w)
{
	struct net_device_context *ndev_ctx;
	struct net_device *net;
380
	struct netvsc_device *net_device;
381

382
	ndev_ctx = container_of(w, struct net_device_context, dwork.work);
383
384
	net_device = hv_get_drvdata(ndev_ctx->device_ctx);
	net = net_device->ndev;
385
386
387
388
	netif_notify_peers(net);
}


389
390
static int netvsc_probe(struct hv_device *dev,
			const struct hv_vmbus_device_id *dev_id)
391
392
393
394
395
396
{
	struct net_device *net = NULL;
	struct net_device_context *net_device_ctx;
	struct netvsc_device_info device_info;
	int ret;

397
	net = alloc_etherdev(sizeof(struct net_device_context));
398
	if (!net)
399
		return -ENOMEM;
400
401
402
403
404

	/* Set initial state */
	netif_carrier_off(net);

	net_device_ctx = netdev_priv(net);
405
	net_device_ctx->device_ctx = dev;
406
	hv_set_drvdata(dev, net);
407
	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
408
409
410

	net->netdev_ops = &device_ops;

411
	/* TODO: Add GSO and Checksum offload */
412
	net->hw_features = NETIF_F_SG;
413
414
	net->features = NETIF_F_SG;

415
	SET_ETHTOOL_OPS(net, &ethtool_ops);
416
	SET_NETDEV_DEV(net, &dev->device);
417
418
419

	ret = register_netdev(net);
	if (ret != 0) {
420
		pr_err("Unable to register netdev.\n");
421
		free_netdev(net);
422
		goto out;
423
424
	}

425
426
427
428
429
430
	/* Notify the netvsc driver of the new device */
	device_info.ring_size = ring_size;
	ret = rndis_filter_device_add(dev, &device_info);
	if (ret != 0) {
		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
		unregister_netdev(net);
431
		free_netdev(net);
432
		hv_set_drvdata(dev, NULL);
433
		return ret;
434
	}
435
436
437
	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);

	netif_carrier_on(net);
438

439
out:
440
441
442
	return ret;
}

443
static int netvsc_remove(struct hv_device *dev)
444
{
445
	struct net_device *net;
446
	struct net_device_context *ndev_ctx;
447
448
449
450
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(dev);
	net = net_device->ndev;
451
452

	if (net == NULL) {
453
		dev_err(&dev->device, "No net device to remove\n");
454
455
456
		return 0;
	}

457
458
	net_device->start_remove = true;

459
460
461
	ndev_ctx = netdev_priv(net);
	cancel_delayed_work_sync(&ndev_ctx->dwork);

462
463
464
465
466
467
468
469
470
	/* Stop outbound asap */
	netif_stop_queue(net);

	unregister_netdev(net);

	/*
	 * Call to the vsc driver to let it know that the device is being
	 * removed
	 */
471
	rndis_filter_device_remove(dev);
472
473

	free_netdev(net);
474
	return 0;
475
476
}

477
static const struct hv_vmbus_device_id id_table[] = {
478
479
480
481
	/* Network guid */
	{ VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
		       0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) },
	{ },
482
483
484
485
};

MODULE_DEVICE_TABLE(vmbus, id_table);

486
/* The one and only one */
487
static struct  hv_driver netvsc_drv = {
488
	.name = "netvsc",
489
	.id_table = id_table,
490
491
	.probe = netvsc_probe,
	.remove = netvsc_remove,
492
};
493

494
static void __exit netvsc_drv_exit(void)
495
{
496
	vmbus_driver_unregister(&netvsc_drv);
497
498
}

499
static int __init netvsc_drv_init(void)
500
{
501
	return vmbus_driver_register(&netvsc_drv);
502
503
}

504
505
MODULE_LICENSE("GPL");
MODULE_VERSION(HV_DRV_VERSION);
506
MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
507

508
module_init(netvsc_drv_init);
509
module_exit(netvsc_drv_exit);