en_rx.c 28.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
/*
 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

34
#include <net/busy_poll.h>
35
#include <linux/mlx4/cq.h>
36
#include <linux/slab.h>
37
38
#include <linux/mlx4/qp.h>
#include <linux/skbuff.h>
39
#include <linux/rculist.h>
40
41
42
43
44
45
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>

#include "mlx4_en.h"

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
			    struct mlx4_en_rx_alloc *page_alloc,
			    const struct mlx4_en_frag_info *frag_info,
			    gfp_t _gfp)
{
	int order;
	struct page *page;
	dma_addr_t dma;

	for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
		gfp_t gfp = _gfp;

		if (order)
			gfp |= __GFP_COMP | __GFP_NOWARN;
		page = alloc_pages(gfp, order);
		if (likely(page))
			break;
		if (--order < 0 ||
		    ((PAGE_SIZE << order) < frag_info->frag_size))
			return -ENOMEM;
	}
	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
			   PCI_DMA_FROMDEVICE);
	if (dma_mapping_error(priv->ddev, dma)) {
		put_page(page);
		return -ENOMEM;
	}
	page_alloc->size = PAGE_SIZE << order;
	page_alloc->page = page;
	page_alloc->dma = dma;
	page_alloc->offset = frag_info->frag_align;
	/* Not doing get_page() for each frag is a big win
	 * on asymetric workloads.
	 */
	atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride);
	return 0;
}

84
85
86
static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
			       struct mlx4_en_rx_desc *rx_desc,
			       struct mlx4_en_rx_alloc *frags,
87
88
			       struct mlx4_en_rx_alloc *ring_alloc,
			       gfp_t gfp)
89
{
90
	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
91
	const struct mlx4_en_frag_info *frag_info;
92
93
	struct page *page;
	dma_addr_t dma;
94
	int i;
95

96
97
	for (i = 0; i < priv->num_frags; i++) {
		frag_info = &priv->frag_info[i];
98
99
100
101
102
103
		page_alloc[i] = ring_alloc[i];
		page_alloc[i].offset += frag_info->frag_stride;
		if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size)
			continue;
		if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
			goto out;
104
	}
105

106
107
108
109
110
	for (i = 0; i < priv->num_frags; i++) {
		frags[i] = ring_alloc[i];
		dma = ring_alloc[i].dma + ring_alloc[i].offset;
		ring_alloc[i] = page_alloc[i];
		rx_desc->data[i].addr = cpu_to_be64(dma);
111
	}
112

113
	return 0;
114
115
116
117

out:
	while (i--) {
		frag_info = &priv->frag_info[i];
118
		if (page_alloc[i].page != ring_alloc[i].page) {
119
			dma_unmap_page(priv->ddev, page_alloc[i].dma,
120
121
122
123
124
				page_alloc[i].size, PCI_DMA_FROMDEVICE);
			page = page_alloc[i].page;
			atomic_set(&page->_count, 1);
			put_page(page);
		}
125
126
127
128
129
130
131
132
	}
	return -ENOMEM;
}

static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
			      struct mlx4_en_rx_alloc *frags,
			      int i)
{
133
	const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
134

135
136
	if (frags[i].offset + frag_info->frag_stride > frags[i].size)
		dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size,
137
					 PCI_DMA_FROMDEVICE);
138

139
140
	if (frags[i].page)
		put_page(frags[i].page);
141
142
143
144
145
146
}

static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
				  struct mlx4_en_rx_ring *ring)
{
	int i;
147
	struct mlx4_en_rx_alloc *page_alloc;
148
149

	for (i = 0; i < priv->num_frags; i++) {
150
		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
151

152
153
		if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
				     frag_info, GFP_KERNEL))
154
			goto out;
155
156
157
158
159
	}
	return 0;

out:
	while (i--) {
160
161
		struct page *page;

162
		page_alloc = &ring->page_alloc[i];
163
		dma_unmap_page(priv->ddev, page_alloc->dma,
164
165
166
167
			       page_alloc->size, PCI_DMA_FROMDEVICE);
		page = page_alloc->page;
		atomic_set(&page->_count, 1);
		put_page(page);
168
169
170
171
172
173
174
175
176
177
178
179
		page_alloc->page = NULL;
	}
	return -ENOMEM;
}

static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
				      struct mlx4_en_rx_ring *ring)
{
	struct mlx4_en_rx_alloc *page_alloc;
	int i;

	for (i = 0; i < priv->num_frags; i++) {
180
181
		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];

182
		page_alloc = &ring->page_alloc[i];
183
184
		en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
		       i, page_count(page_alloc->page));
185

186
		dma_unmap_page(priv->ddev, page_alloc->dma,
187
188
189
190
191
				page_alloc->size, PCI_DMA_FROMDEVICE);
		while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) {
			put_page(page_alloc->page);
			page_alloc->offset += frag_info->frag_stride;
		}
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
		page_alloc->page = NULL;
	}
}

static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
				 struct mlx4_en_rx_ring *ring, int index)
{
	struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
	int possible_frags;
	int i;

	/* Set size and memtype fields */
	for (i = 0; i < priv->num_frags; i++) {
		rx_desc->data[i].byte_count =
			cpu_to_be32(priv->frag_info[i].frag_size);
		rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
	}

	/* If the number of used fragments does not fill up the ring stride,
	 * remaining (unused) fragments must be padded with null address/size
	 * and a special memory key */
	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
	for (i = priv->num_frags; i < possible_frags; i++) {
		rx_desc->data[i].byte_count = 0;
		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
		rx_desc->data[i].addr = 0;
	}
}

static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
222
223
				   struct mlx4_en_rx_ring *ring, int index,
				   gfp_t gfp)
224
225
{
	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
226
227
	struct mlx4_en_rx_alloc *frags = ring->rx_info +
					(index << priv->log_rx_info);
228

229
	return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
230
231
232
233
234
235
236
}

static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
{
	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
}

237
238
239
240
static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
				 struct mlx4_en_rx_ring *ring,
				 int index)
{
241
	struct mlx4_en_rx_alloc *frags;
242
243
	int nr;

244
	frags = ring->rx_info + (index << priv->log_rx_info);
245
	for (nr = 0; nr < priv->num_frags; nr++) {
246
		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
247
		mlx4_en_free_frag(priv, frags, nr);
248
249
250
	}
}

251
252
253
254
255
static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
{
	struct mlx4_en_rx_ring *ring;
	int ring_ind;
	int buf_ind;
256
	int new_size;
257
258
259
260
261
262

	for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
		for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
			ring = &priv->rx_ring[ring_ind];

			if (mlx4_en_prepare_rx_desc(priv, ring,
263
264
						    ring->actual_size,
						    GFP_KERNEL)) {
265
				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
266
267
					en_err(priv, "Failed to allocate "
						     "enough rx buffers\n");
268
269
					return -ENOMEM;
				} else {
270
					new_size = rounddown_pow_of_two(ring->actual_size);
271
272
273
					en_warn(priv, "Only %d buffers allocated "
						      "reducing ring size to %d",
						ring->actual_size, new_size);
274
					goto reduce_rings;
275
276
277
278
279
280
				}
			}
			ring->actual_size++;
			ring->prod++;
		}
	}
281
282
283
284
285
286
287
288
289
290
291
292
	return 0;

reduce_rings:
	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
		ring = &priv->rx_ring[ring_ind];
		while (ring->actual_size > new_size) {
			ring->actual_size--;
			ring->prod--;
			mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
		}
	}

293
294
295
296
297
298
299
300
	return 0;
}

static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
				struct mlx4_en_rx_ring *ring)
{
	int index;

301
302
	en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
	       ring->cons, ring->prod);
303
304

	/* Unmap and free Rx buffers */
305
	BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
306
307
	while (ring->cons != ring->prod) {
		index = ring->cons & ring->size_mask;
308
		en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
309
		mlx4_en_free_rx_desc(priv, ring, index);
310
311
312
313
314
315
316
317
		++ring->cons;
	}
}

int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
			   struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
{
	struct mlx4_en_dev *mdev = priv->mdev;
318
	int err = -ENOMEM;
319
320
321
322
323
324
325
326
	int tmp;

	ring->prod = 0;
	ring->cons = 0;
	ring->size = size;
	ring->size_mask = size - 1;
	ring->stride = stride;
	ring->log_stride = ffs(ring->stride) - 1;
327
	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
328
329

	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
330
					sizeof(struct mlx4_en_rx_alloc));
331
	ring->rx_info = vmalloc(tmp);
332
	if (!ring->rx_info)
333
		return -ENOMEM;
334

335
	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
336
337
338
339
340
341
342
343
344
		 ring->rx_info, tmp);

	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
				 ring->buf_size, 2 * PAGE_SIZE);
	if (err)
		goto err_ring;

	err = mlx4_en_map_buffer(&ring->wqres.buf);
	if (err) {
345
		en_err(priv, "Failed to map RX buffer\n");
346
347
348
349
		goto err_hwq;
	}
	ring->buf = ring->wqres.buf.direct.buf;

350
351
	ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;

352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
	return 0;

err_hwq:
	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
err_ring:
	vfree(ring->rx_info);
	ring->rx_info = NULL;
	return err;
}

int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
{
	struct mlx4_en_rx_ring *ring;
	int i;
	int ring_ind;
	int err;
	int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
					DS_SIZE * priv->num_frags);

	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
		ring = &priv->rx_ring[ring_ind];

		ring->prod = 0;
		ring->cons = 0;
		ring->actual_size = 0;
		ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;

		ring->stride = stride;
380
381
382
		if (ring->stride <= TXBB_SIZE)
			ring->buf += TXBB_SIZE;

383
384
385
386
387
388
		ring->log_stride = ffs(ring->stride) - 1;
		ring->buf_size = ring->size * ring->stride;

		memset(ring->buf, 0, ring->buf_size);
		mlx4_en_update_rx_prod_db(ring);

389
		/* Initialize all descriptors */
390
391
392
393
394
395
		for (i = 0; i < ring->size; i++)
			mlx4_en_init_rx_desc(priv, ring, i);

		/* Initialize page allocators */
		err = mlx4_en_init_allocator(priv, ring);
		if (err) {
396
			en_err(priv, "Failed initializing ring allocator\n");
397
398
			if (ring->stride <= TXBB_SIZE)
				ring->buf -= TXBB_SIZE;
399
400
			ring_ind--;
			goto err_allocator;
401
402
		}
	}
403
404
	err = mlx4_en_fill_rx_buffers(priv);
	if (err)
405
406
407
408
409
		goto err_buffers;

	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
		ring = &priv->rx_ring[ring_ind];

410
		ring->size_mask = ring->actual_size - 1;
411
412
413
414
415
416
417
418
419
420
421
422
		mlx4_en_update_rx_prod_db(ring);
	}

	return 0;

err_buffers:
	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
		mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);

	ring_ind = priv->rx_ring_num - 1;
err_allocator:
	while (ring_ind >= 0) {
423
424
		if (priv->rx_ring[ring_ind].stride <= TXBB_SIZE)
			priv->rx_ring[ring_ind].buf -= TXBB_SIZE;
425
426
427
428
429
430
431
		mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
		ring_ind--;
	}
	return err;
}

void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
432
			     struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
433
434
435
436
{
	struct mlx4_en_dev *mdev = priv->mdev;

	mlx4_en_unmap_buffer(&ring->wqres.buf);
437
	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
438
439
	vfree(ring->rx_info);
	ring->rx_info = NULL;
440
441
442
#ifdef CONFIG_RFS_ACCEL
	mlx4_en_cleanup_filters(priv, ring);
#endif
443
444
445
446
447
448
}

void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
				struct mlx4_en_rx_ring *ring)
{
	mlx4_en_free_rx_buf(priv, ring);
449
450
	if (ring->stride <= TXBB_SIZE)
		ring->buf -= TXBB_SIZE;
451
452
453
454
455
456
	mlx4_en_destroy_allocator(priv, ring);
}


static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
				    struct mlx4_en_rx_desc *rx_desc,
457
				    struct mlx4_en_rx_alloc *frags,
458
				    struct sk_buff *skb,
459
460
				    int length)
{
461
	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
462
463
464
465
	struct mlx4_en_frag_info *frag_info;
	int nr;
	dma_addr_t dma;

466
	/* Collect used fragments while replacing them in the HW descriptors */
467
468
469
470
	for (nr = 0; nr < priv->num_frags; nr++) {
		frag_info = &priv->frag_info[nr];
		if (length <= frag_info->frag_prefix_size)
			break;
471
472
		if (!frags[nr].page)
			goto fail;
473
474

		dma = be64_to_cpu(rx_desc->data[nr].addr);
475
476
		dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
					DMA_FROM_DEVICE);
477

478
479
480
481
482
		/* Save page reference in skb */
		__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
		skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
		skb_frags_rx[nr].page_offset = frags[nr].offset;
		skb->truesize += frag_info->frag_stride;
483
		frags[nr].page = NULL;
484
485
	}
	/* Adjust size of last fragment to match actual length */
486
	if (nr > 0)
487
488
		skb_frag_size_set(&skb_frags_rx[nr - 1],
			length - priv->frag_info[nr - 1].frag_prefix_size);
489
490
491
492
493
	return nr;

fail:
	while (nr > 0) {
		nr--;
494
		__skb_frag_unref(&skb_frags_rx[nr]);
495
496
497
498
499
500
501
	}
	return 0;
}


static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
				      struct mlx4_en_rx_desc *rx_desc,
502
				      struct mlx4_en_rx_alloc *frags,
503
504
505
506
507
508
509
				      unsigned int length)
{
	struct sk_buff *skb;
	void *va;
	int used_frags;
	dma_addr_t dma;

510
	skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
511
	if (!skb) {
512
		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
513
514
515
516
517
518
519
		return NULL;
	}
	skb_reserve(skb, NET_IP_ALIGN);
	skb->len = length;

	/* Get pointer to first fragment so we could copy the headers into the
	 * (linear part of the) skb */
520
	va = page_address(frags[0].page) + frags[0].offset;
521
522
523

	if (length <= SMALL_PACKET_SIZE) {
		/* We are copying all relevant data to the skb - temporarily
524
		 * sync buffers for the copy */
525
		dma = be64_to_cpu(rx_desc->data[0].addr);
526
		dma_sync_single_for_cpu(priv->ddev, dma, length,
527
					DMA_FROM_DEVICE);
528
529
530
531
		skb_copy_to_linear_data(skb, va, length);
		skb->tail += length;
	} else {
		/* Move relevant fragments to skb */
532
533
		used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags,
							skb, length);
534
535
536
537
		if (unlikely(!used_frags)) {
			kfree_skb(skb);
			return NULL;
		}
538
539
540
541
542
543
544
545
546
547
		skb_shinfo(skb)->nr_frags = used_frags;

		/* Copy headers into the skb linear buffer */
		memcpy(skb->data, va, HEADER_COPY_SIZE);
		skb->tail += HEADER_COPY_SIZE;

		/* Skip headers in first fragment */
		skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;

		/* Adjust size of first fragment */
548
		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], HEADER_COPY_SIZE);
549
550
551
552
553
		skb->data_len = length - HEADER_COPY_SIZE;
	}
	return skb;
}

554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
{
	int i;
	int offset = ETH_HLEN;

	for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
		if (*(skb->data + offset) != (unsigned char) (i & 0xff))
			goto out_loopback;
	}
	/* Loopback found */
	priv->loopback_ok = 1;

out_loopback:
	dev_kfree_skb_any(skb);
}
569

570
571
572
573
574
575
static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
				     struct mlx4_en_rx_ring *ring)
{
	int index = ring->prod & ring->size_mask;

	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
576
		if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC))
577
578
579
580
581
582
			break;
		ring->prod++;
		index = ring->prod & ring->size_mask;
	}
}

583
584
585
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
586
	struct mlx4_en_dev *mdev = priv->mdev;
587
588
	struct mlx4_cqe *cqe;
	struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
589
	struct mlx4_en_rx_alloc *frags;
590
591
592
593
594
595
596
	struct mlx4_en_rx_desc *rx_desc;
	struct sk_buff *skb;
	int index;
	int nr;
	unsigned int length;
	int polled = 0;
	int ip_summed;
Or Gerlitz's avatar
Or Gerlitz committed
597
	int factor = priv->cqe_factor;
598
	u64 timestamp;
599
600
601
602
603
604
605
606

	if (!priv->port_up)
		return 0;

	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
	 * descriptor offset can be deduced from the CQE index instead of
	 * reading 'cqe->index' */
	index = cq->mcq.cons_index & ring->size_mask;
Or Gerlitz's avatar
Or Gerlitz committed
607
	cqe = &cq->buf[(index << factor) + factor];
608
609
610
611
612

	/* Process all completed CQEs */
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
		    cq->mcq.cons_index & cq->size)) {

613
		frags = ring->rx_info + (index << priv->log_rx_info);
614
615
616
617
618
619
620
621
622
623
		rx_desc = ring->buf + (index << ring->log_stride);

		/*
		 * make sure we read the CQE after we read the ownership bit
		 */
		rmb();

		/* Drop packet on bad receive or bad checksum */
		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
						MLX4_CQE_OPCODE_ERROR)) {
624
			en_err(priv, "CQE completed in error - vendor "
625
626
627
628
629
630
				  "syndrom:%d syndrom:%d\n",
				  ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
				  ((struct mlx4_err_cqe *) cqe)->syndrome);
			goto next;
		}
		if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
631
			en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
632
633
634
			goto next;
		}

635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
		/* Check if we need to drop the packet if SRIOV is not enabled
		 * and not performing the selftest or flb disabled
		 */
		if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) {
			struct ethhdr *ethh;
			dma_addr_t dma;
			/* Get pointer to first fragment since we haven't
			 * skb yet and cast it to ethhdr struct
			 */
			dma = be64_to_cpu(rx_desc->data[0].addr);
			dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
						DMA_FROM_DEVICE);
			ethh = (struct ethhdr *)(page_address(frags[0].page) +
						 frags[0].offset);

650
651
652
653
654
655
656
657
658
			if (is_multicast_ether_addr(ethh->h_dest)) {
				struct mlx4_mac_entry *entry;
				struct hlist_head *bucket;
				unsigned int mac_hash;

				/* Drop the packet, since HW loopback-ed it */
				mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
				bucket = &priv->mac_hash[mac_hash];
				rcu_read_lock();
659
				hlist_for_each_entry_rcu(entry, bucket, hlist) {
660
661
662
663
664
665
666
667
					if (ether_addr_equal_64bits(entry->mac,
								    ethh->h_source)) {
						rcu_read_unlock();
						goto next;
					}
				}
				rcu_read_unlock();
			}
668
		}
669

670
671
672
673
		/*
		 * Packet is OK - process it.
		 */
		length = be32_to_cpu(cqe->byte_cnt);
674
		length -= ring->fcs_del;
675
676
677
		ring->bytes += length;
		ring->packets++;

678
		if (likely(dev->features & NETIF_F_RXCSUM)) {
679
680
			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
			    (cqe->checksum == cpu_to_be16(0xffff))) {
681
				ring->csum_ok++;
682
				/* This packet is eligible for GRO if it is:
683
684
685
				 * - DIX Ethernet (type interpretation)
				 * - TCP/IP (v4)
				 * - without IP options
686
687
688
689
690
				 * - not an IP fragment
				 * - no LLS polling in progress
				 */
				if (!mlx4_en_cq_ll_polling(cq) &&
				    (dev->features & NETIF_F_GRO)) {
691
					struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
692
693
					if (!gro_skb)
						goto next;
694

695
696
697
					nr = mlx4_en_complete_rx_desc(priv,
						rx_desc, frags, gro_skb,
						length);
698
699
700
					if (!nr)
						goto next;

701
702
703
704
705
					skb_shinfo(gro_skb)->nr_frags = nr;
					gro_skb->len = length;
					gro_skb->data_len = length;
					gro_skb->ip_summed = CHECKSUM_UNNECESSARY;

706
707
708
					if ((cqe->vlan_my_qpn &
					    cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
					    (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
Jiri Pirko's avatar
Jiri Pirko committed
709
710
						u16 vid = be16_to_cpu(cqe->sl_vid);

711
						__vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid);
Jiri Pirko's avatar
Jiri Pirko committed
712
713
					}

714
715
716
					if (dev->features & NETIF_F_RXHASH)
						gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);

717
					skb_record_rx_queue(gro_skb, cq->ring);
718

719
720
721
722
723
724
725
726
					if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
						timestamp = mlx4_en_get_cqe_ts(cqe);
						mlx4_en_fill_hwtstamps(mdev,
								       skb_hwtstamps(gro_skb),
								       timestamp);
					}

					napi_gro_frags(&cq->napi);
727
728
729
					goto next;
				}

730
				/* GRO not possible, complete processing here */
731
732
733
				ip_summed = CHECKSUM_UNNECESSARY;
			} else {
				ip_summed = CHECKSUM_NONE;
734
				ring->csum_none++;
735
736
737
			}
		} else {
			ip_summed = CHECKSUM_NONE;
738
			ring->csum_none++;
739
740
		}

741
		skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
742
743
744
745
746
		if (!skb) {
			priv->stats.rx_dropped++;
			goto next;
		}

747
748
749
750
751
                if (unlikely(priv->validate_loopback)) {
			validate_loopback(priv, skb);
			goto next;
		}

752
753
		skb->ip_summed = ip_summed;
		skb->protocol = eth_type_trans(skb, dev);
754
		skb_record_rx_queue(skb, cq->ring);
755

756
757
758
		if (dev->features & NETIF_F_RXHASH)
			skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);

759
760
761
		if ((be32_to_cpu(cqe->vlan_my_qpn) &
		    MLX4_CQE_VLAN_PRESENT_MASK) &&
		    (dev->features & NETIF_F_HW_VLAN_CTAG_RX))
762
			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid));
Jiri Pirko's avatar
Jiri Pirko committed
763

764
765
766
767
768
769
		if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
			timestamp = mlx4_en_get_cqe_ts(cqe);
			mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
					       timestamp);
		}

770
771
		skb_mark_ll(skb, &cq->napi);

772
		/* Push it up the stack */
Jiri Pirko's avatar
Jiri Pirko committed
773
		netif_receive_skb(skb);
774
775

next:
776
777
778
		for (nr = 0; nr < priv->num_frags; nr++)
			mlx4_en_free_frag(priv, frags, nr);

779
780
		++cq->mcq.cons_index;
		index = (cq->mcq.cons_index) & ring->size_mask;
Or Gerlitz's avatar
Or Gerlitz committed
781
		cqe = &cq->buf[(index << factor) + factor];
782
		if (++polled == budget)
783
784
785
786
787
788
789
790
			goto out;
	}

out:
	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
	mlx4_cq_set_ci(&cq->mcq);
	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
	ring->cons = cq->mcq.cons_index;
791
	mlx4_en_refill_rx_buffers(priv, ring);
792
793
794
795
796
797
798
799
800
801
802
	mlx4_en_update_rx_prod_db(ring);
	return polled;
}


void mlx4_en_rx_irq(struct mlx4_cq *mcq)
{
	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
	struct mlx4_en_priv *priv = netdev_priv(cq->dev);

	if (priv->port_up)
803
		napi_schedule(&cq->napi);
804
805
806
807
808
809
810
811
812
813
814
815
	else
		mlx4_en_arm_cq(priv, cq);
}

/* Rx CQ polling - called by NAPI */
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
{
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct net_device *dev = cq->dev;
	struct mlx4_en_priv *priv = netdev_priv(dev);
	int done;

816
817
818
	if (!mlx4_en_cq_lock_napi(cq))
		return budget;

819
820
	done = mlx4_en_process_rx_cq(dev, cq, budget);

821
822
	mlx4_en_cq_unlock_napi(cq);

823
824
825
826
827
	/* If we used up all the quota - we're probably not done yet... */
	if (done == budget)
		INC_PERF_COUNTER(priv->pstats.napi_quota);
	else {
		/* Done for now */
828
		napi_complete(napi);
829
830
831
832
833
		mlx4_en_arm_cq(priv, cq);
	}
	return done;
}

834
static const int frag_sizes[] = {
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
	FRAG_SZ0,
	FRAG_SZ1,
	FRAG_SZ2,
	FRAG_SZ3
};

void mlx4_en_calc_rx_buf(struct net_device *dev)
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
	int buf_size = 0;
	int i = 0;

	while (buf_size < eff_mtu) {
		priv->frag_info[i].frag_size =
			(eff_mtu > buf_size + frag_sizes[i]) ?
				frag_sizes[i] : eff_mtu - buf_size;
		priv->frag_info[i].frag_prefix_size = buf_size;
		if (!i)	{
			priv->frag_info[i].frag_align = NET_IP_ALIGN;
			priv->frag_info[i].frag_stride =
				ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
		} else {
			priv->frag_info[i].frag_align = 0;
			priv->frag_info[i].frag_stride =
				ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
		}
		buf_size += priv->frag_info[i].frag_size;
		i++;
	}

	priv->num_frags = i;
	priv->rx_skb_size = eff_mtu;
868
	priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc));
869

870
	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
871
872
		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
	for (i = 0; i < priv->num_frags; i++) {
873
874
875
876
877
878
879
		en_err(priv,
		       "  frag:%d - size:%d prefix:%d align:%d stride:%d\n",
		       i,
		       priv->frag_info[i].frag_size,
		       priv->frag_info[i].frag_prefix_size,
		       priv->frag_info[i].frag_align,
		       priv->frag_info[i].frag_stride);
880
881
882
883
884
	}
}

/* RSS related functions */

885
886
static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
				 struct mlx4_en_rx_ring *ring,
887
888
889
890
891
892
893
				 enum mlx4_qp_state *state,
				 struct mlx4_qp *qp)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_qp_context *context;
	int err = 0;

894
895
	context = kmalloc(sizeof(*context), GFP_KERNEL);
	if (!context)
896
897
898
899
		return -ENOMEM;

	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
	if (err) {
900
		en_err(priv, "Failed to allocate qp #%x\n", qpn);
901
902
903
904
905
		goto out;
	}
	qp->event = mlx4_en_sqp_event;

	memset(context, 0, sizeof *context);
906
	mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
907
				qpn, ring->cqn, -1, context);
908
	context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
909

910
	/* Cancel FCS removal if FW allows */
911
	if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
912
		context->param3 |= cpu_to_be32(1 << 29);
913
914
915
		ring->fcs_del = ETH_FCS_LEN;
	} else
		ring->fcs_del = 0;
916

917
	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
918
919
920
921
	if (err) {
		mlx4_qp_remove(mdev->dev, qp);
		mlx4_qp_free(mdev->dev, qp);
	}
922
	mlx4_en_update_rx_prod_db(ring);
923
924
925
926
927
out:
	kfree(context);
	return err;
}

928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
{
	int err;
	u32 qpn;

	err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn);
	if (err) {
		en_err(priv, "Failed reserving drop qpn\n");
		return err;
	}
	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
	if (err) {
		en_err(priv, "Failed allocating drop qp\n");
		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
		return err;
	}

	return 0;
}

void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
{
	u32 qpn;

	qpn = priv->drop_qp.qpn;
	mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
	mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
	mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
}

958
959
960
961
962
963
/* Allocate rx qp's and configure them according to rss map */
int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
	struct mlx4_qp_context context;
964
	struct mlx4_rss_context *rss_context;
965
	int rss_rings;
966
	void *ptr;
967
	u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
968
			MLX4_RSS_TCP_IPV6);
969
	int i, qpn;
970
971
	int err = 0;
	int good_qps = 0;
972
973
974
	static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC,
				0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD,
				0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
975

976
	en_dbg(DRV, priv, "Configuring rss steering\n");
977
978
979
	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
				    priv->rx_ring_num,
				    &rss_map->base_qpn);
980
	if (err) {
981
		en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
982
983
984
		return err;
	}

985
	for (i = 0; i < priv->rx_ring_num; i++) {
986
		qpn = rss_map->base_qpn + i;
987
		err = mlx4_en_config_rss_qp(priv, qpn, &priv->rx_ring[i],
988
989
990
991
992
993
994
995
996
997
998
					    &rss_map->state[i],
					    &rss_map->qps[i]);
		if (err)
			goto rss_err;

		++good_qps;
	}

	/* Configure RSS indirection qp */
	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
	if (err) {
999
		en_err(priv, "Failed to allocate RSS indirection QP\n");
1000
		goto rss_err;
1001
1002
1003
	}
	rss_map->indir_qp.event = mlx4_en_sqp_event;
	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
1004
				priv->rx_ring[0].cqn, -1, &context);
1005

1006
1007
1008
1009
1010
	if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
		rss_rings = priv->rx_ring_num;
	else
		rss_rings = priv->prof->rss_rings;

1011
1012
	ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
					+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
1013
	rss_context = ptr;
1014
	rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
1015
					    (rss_map->base_qpn));
1016
	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
1017
1018
1019
1020
	if (priv->mdev->profile.udp_rss) {
		rss_mask |=  MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
		rss_context->base_qpn_udp = rss_context->default_qpn;
	}
Yevgeny Petrilin's avatar
Yevgeny Petrilin committed
1021
	rss_context->flags = rss_mask;
1022
	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
1023
	for (i = 0; i < 10; i++)
1024
		rss_context->rss_key[i] = cpu_to_be32(rsskey[i]);
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044

	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
			       &rss_map->indir_qp, &rss_map->indir_state);
	if (err)
		goto indir_err;

	return 0;

indir_err:
	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
rss_err:
	for (i = 0; i < good_qps; i++) {
		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
	}
1045
	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
	return err;
}

void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
	int i;

	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);

1060
	for (i = 0; i < priv->rx_ring_num; i++) {
1061
1062
1063
1064
1065
		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
	}
1066
	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
1067
}