svcsock.c 40.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 * linux/net/sunrpc/svcsock.c
 *
 * These are the RPC server socket internals.
 *
 * The server scheduling algorithm does not always distribute the load
 * evenly when servicing a single client. May need to modify the
8
 * svc_xprt_enqueue procedure...
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * TCP support is largely untested and may be a little slow. The problem
 * is that we currently do two separate recvfrom's, one for the 4-byte
 * record length, and the second for the actual record. This could possibly
 * be improved by always reading a minimum size of around 100 bytes and
 * tucking any superfluous bytes away in a temporary store. Still, that
 * leaves write requests out in the rain. An alternative may be to peek at
 * the first skb in the queue, and if it matches the next TCP sequence
 * number, to extract the record marker. Yuck.
 *
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */

22
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
23
24
25
26
27
28
29
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/udp.h>
30
#include <linux/tcp.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33
34
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
35
#include <linux/file.h>
36
#include <linux/freezer.h>
Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
40
#include <net/ipv6.h>
41
#include <net/tcp.h>
42
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
43
44
45
46
#include <asm/uaccess.h>
#include <asm/ioctls.h>

#include <linux/sunrpc/types.h>
47
#include <linux/sunrpc/clnt.h>
Linus Torvalds's avatar
Linus Torvalds committed
48
#include <linux/sunrpc/xdr.h>
49
#include <linux/sunrpc/msg_prot.h>
Linus Torvalds's avatar
Linus Torvalds committed
50
51
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
52
#include <linux/sunrpc/xprt.h>
Linus Torvalds's avatar
Linus Torvalds committed
53

54
#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
Linus Torvalds's avatar
Linus Torvalds committed
55
56
57


static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
58
					 int *errp, int flags);
Linus Torvalds's avatar
Linus Torvalds committed
59
60
61
static void		svc_udp_data_ready(struct sock *, int);
static int		svc_udp_recvfrom(struct svc_rqst *);
static int		svc_udp_sendto(struct svc_rqst *);
62
static void		svc_sock_detach(struct svc_xprt *);
63
static void		svc_tcp_sock_detach(struct svc_xprt *);
64
static void		svc_sock_free(struct svc_xprt *);
Linus Torvalds's avatar
Linus Torvalds committed
65

66
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
67
68
					  struct net *, struct sockaddr *,
					  int, int);
69
70
71
72
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
static struct lock_class_key svc_slock_key[2];

73
static void svc_reclassify_socket(struct socket *sock)
74
75
{
	struct sock *sk = sock->sk;
76
	BUG_ON(sock_owned_by_user(sk));
77
78
79
	switch (sk->sk_family) {
	case AF_INET:
		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
80
81
82
					      &svc_slock_key[0],
					      "sk_xprt.xpt_lock-AF_INET-NFSD",
					      &svc_key[0]);
83
84
85
86
		break;

	case AF_INET6:
		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
87
88
89
					      &svc_slock_key[1],
					      "sk_xprt.xpt_lock-AF_INET6-NFSD",
					      &svc_key[1]);
90
91
92
93
94
95
96
		break;

	default:
		BUG();
	}
}
#else
97
static void svc_reclassify_socket(struct socket *sock)
98
99
100
101
{
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
102
103
104
/*
 * Release an skbuff after use
 */
105
static void svc_release_skb(struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
106
{
107
	struct sk_buff *skb = rqstp->rq_xprt_ctxt;
Linus Torvalds's avatar
Linus Torvalds committed
108
109

	if (skb) {
110
111
		struct svc_sock *svsk =
			container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
112
		rqstp->rq_xprt_ctxt = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
113
114

		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
115
		skb_free_datagram_locked(svsk->sk_sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
116
117
118
	}
}

119
120
121
122
union svc_pktinfo_u {
	struct in_pktinfo pkti;
	struct in6_pktinfo pkti6;
};
123
124
#define SVC_PKTINFO_SPACE \
	CMSG_SPACE(sizeof(union svc_pktinfo_u))
125
126
127

static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
{
128
129
130
	struct svc_sock *svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
	switch (svsk->sk_sk->sk_family) {
131
132
133
134
135
136
137
138
139
140
	case AF_INET: {
			struct in_pktinfo *pki = CMSG_DATA(cmh);

			cmh->cmsg_level = SOL_IP;
			cmh->cmsg_type = IP_PKTINFO;
			pki->ipi_ifindex = 0;
			pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr;
			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
		}
		break;
141

142
143
144
145
146
147
148
149
150
151
152
153
154
155
	case AF_INET6: {
			struct in6_pktinfo *pki = CMSG_DATA(cmh);

			cmh->cmsg_level = SOL_IPV6;
			cmh->cmsg_type = IPV6_PKTINFO;
			pki->ipi6_ifindex = 0;
			ipv6_addr_copy(&pki->ipi6_addr,
					&rqstp->rq_daddr.addr6);
			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
		}
		break;
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
156
/*
157
 * send routine intended to be shared by the fore- and back-channel
Linus Torvalds's avatar
Linus Torvalds committed
158
 */
159
160
161
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
		    struct page *headpage, unsigned long headoffset,
		    struct page *tailpage, unsigned long tailoffset)
Linus Torvalds's avatar
Linus Torvalds committed
162
163
164
165
166
167
168
{
	int		result;
	int		size;
	struct page	**ppage = xdr->pages;
	size_t		base = xdr->page_base;
	unsigned int	pglen = xdr->page_len;
	unsigned int	flags = MSG_MORE;
169
170
	int		slen;
	int		len = 0;
Linus Torvalds's avatar
Linus Torvalds committed
171
172
173
174
175
176

	slen = xdr->len;

	/* send head */
	if (slen == xdr->head[0].iov_len)
		flags = 0;
177
	len = kernel_sendpage(sock, headpage, headoffset,
178
				  xdr->head[0].iov_len, flags);
Linus Torvalds's avatar
Linus Torvalds committed
179
180
181
182
183
184
185
186
187
188
189
	if (len != xdr->head[0].iov_len)
		goto out;
	slen -= xdr->head[0].iov_len;
	if (slen == 0)
		goto out;

	/* send page data */
	size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
	while (pglen > 0) {
		if (slen == size)
			flags = 0;
190
		result = kernel_sendpage(sock, *ppage, base, size, flags);
Linus Torvalds's avatar
Linus Torvalds committed
191
192
193
194
195
196
197
198
199
200
		if (result > 0)
			len += result;
		if (result != size)
			goto out;
		slen -= size;
		pglen -= size;
		size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
		base = 0;
		ppage++;
	}
201

Linus Torvalds's avatar
Linus Torvalds committed
202
203
	/* send tail */
	if (xdr->tail[0].iov_len) {
204
205
		result = kernel_sendpage(sock, tailpage, tailoffset,
				   xdr->tail[0].iov_len, 0);
Linus Torvalds's avatar
Linus Torvalds committed
206
207
208
		if (result > 0)
			len += result;
	}
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252

out:
	return len;
}


/*
 * Generic sendto routine
 */
static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
{
	struct svc_sock	*svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
	struct socket	*sock = svsk->sk_sock;
	union {
		struct cmsghdr	hdr;
		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
	} buffer;
	struct cmsghdr *cmh = &buffer.hdr;
	int		len = 0;
	unsigned long tailoff;
	unsigned long headoff;
	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);

	if (rqstp->rq_prot == IPPROTO_UDP) {
		struct msghdr msg = {
			.msg_name	= &rqstp->rq_addr,
			.msg_namelen	= rqstp->rq_addrlen,
			.msg_control	= cmh,
			.msg_controllen	= sizeof(buffer),
			.msg_flags	= MSG_MORE,
		};

		svc_set_cmsg_data(rqstp, cmh);

		if (sock_sendmsg(sock, &msg, 0) < 0)
			goto out;
	}

	tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
	headoff = 0;
	len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
			       rqstp->rq_respages[0], tailoff);

Linus Torvalds's avatar
Linus Torvalds committed
253
out:
254
	dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
255
		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
256
		xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
Linus Torvalds's avatar
Linus Torvalds committed
257
258
259
260

	return len;
}

261
262
263
/*
 * Report socket names for nfsdfs
 */
264
static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
265
{
Chuck Lever's avatar
Chuck Lever committed
266
267
268
	const struct sock *sk = svsk->sk_sk;
	const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
							"udp" : "tcp";
269
270
	int len;

Chuck Lever's avatar
Chuck Lever committed
271
	switch (sk->sk_family) {
272
273
	case PF_INET:
		len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
Chuck Lever's avatar
Chuck Lever committed
274
				proto_name,
275
276
				&inet_sk(sk)->inet_rcv_saddr,
				inet_sk(sk)->inet_num);
277
		break;
278
279
	case PF_INET6:
		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
Chuck Lever's avatar
Chuck Lever committed
280
281
				proto_name,
				&inet6_sk(sk)->rcv_saddr,
282
				inet_sk(sk)->inet_num);
283
284
		break;
	default:
285
		len = snprintf(buf, remaining, "*unknown-%d*\n",
Chuck Lever's avatar
Chuck Lever committed
286
				sk->sk_family);
287
	}
288
289
290
291

	if (len >= remaining) {
		*buf = '\0';
		return -ENAMETOOLONG;
292
293
294
295
	}
	return len;
}

296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
/**
 * svc_sock_names - construct a list of listener names in a string
 * @serv: pointer to RPC service
 * @buf: pointer to a buffer to fill in with socket names
 * @buflen: size of the buffer to be filled
 * @toclose: pointer to '\0'-terminated C string containing the name
 *		of a listener to be closed
 *
 * Fills in @buf with a '\n'-separated list of names of listener
 * sockets.  If @toclose is not NULL, the socket named by @toclose
 * is closed, and is not included in the output list.
 *
 * Returns positive length of the socket name string, or a negative
 * errno value on error.
 */
int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
		   const char *toclose)
313
{
314
	struct svc_sock *svsk, *closesk = NULL;
315
316
317
318
	int len = 0;

	if (!serv)
		return 0;
319

320
	spin_lock_bh(&serv->sv_lock);
321
	list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
322
323
324
325
326
		int onelen = svc_one_sock_name(svsk, buf + len, buflen - len);
		if (onelen < 0) {
			len = onelen;
			break;
		}
327
		if (toclose && strcmp(toclose, buf + len) == 0) {
328
			closesk = svsk;
329
330
			svc_xprt_get(&closesk->sk_xprt);
		} else
331
			len += onelen;
332
	}
333
	spin_unlock_bh(&serv->sv_lock);
334

335
	if (closesk) {
336
337
338
		/* Should unregister with portmap, but you cannot
		 * unregister just one protocol...
		 */
339
		svc_close_xprt(&closesk->sk_xprt);
340
341
		svc_xprt_put(&closesk->sk_xprt);
	} else if (toclose)
342
		return -ENOENT;
343
344
	return len;
}
345
EXPORT_SYMBOL_GPL(svc_sock_names);
346

Linus Torvalds's avatar
Linus Torvalds committed
347
348
349
/*
 * Check input queue length
 */
350
static int svc_recv_available(struct svc_sock *svsk)
Linus Torvalds's avatar
Linus Torvalds committed
351
352
353
354
{
	struct socket	*sock = svsk->sk_sock;
	int		avail, err;

355
	err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
Linus Torvalds's avatar
Linus Torvalds committed
356
357
358
359
360
361
362

	return (err >= 0)? avail : err;
}

/*
 * Generic recvfrom routine.
 */
363
364
static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
			int buflen)
Linus Torvalds's avatar
Linus Torvalds committed
365
{
366
367
	struct svc_sock *svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
368
369
370
371
	struct msghdr msg = {
		.msg_flags	= MSG_DONTWAIT,
	};
	int len;
Linus Torvalds's avatar
Linus Torvalds committed
372

373
374
	rqstp->rq_xprt_hlen = 0;

375
376
	len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
				msg.msg_flags);
Linus Torvalds's avatar
Linus Torvalds committed
377
378

	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
379
		svsk, iov[0].iov_base, iov[0].iov_len, len);
Linus Torvalds's avatar
Linus Torvalds committed
380
381
382
383
384
385
	return len;
}

/*
 * Set socket snd and rcv buffer lengths
 */
386
387
static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
				unsigned int rcv)
Linus Torvalds's avatar
Linus Torvalds committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
{
#if 0
	mm_segment_t	oldfs;
	oldfs = get_fs(); set_fs(KERNEL_DS);
	sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
			(char*)&snd, sizeof(snd));
	sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
			(char*)&rcv, sizeof(rcv));
#else
	/* sock_setsockopt limits use to sysctl_?mem_max,
	 * which isn't acceptable.  Until that is made conditional
	 * on not having CAP_SYS_RESOURCE or similar, we go direct...
	 * DaveM said I could!
	 */
	lock_sock(sock->sk);
	sock->sk->sk_sndbuf = snd * 2;
	sock->sk->sk_rcvbuf = rcv * 2;
405
	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
406
	sock->sk->sk_write_space(sock->sk);
Linus Torvalds's avatar
Linus Torvalds committed
407
408
409
410
411
412
	release_sock(sock->sk);
#endif
}
/*
 * INET callback when data has been received on the socket.
 */
413
static void svc_udp_data_ready(struct sock *sk, int count)
Linus Torvalds's avatar
Linus Torvalds committed
414
{
415
	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
416

417
418
	if (svsk) {
		dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
419
420
421
			svsk, sk, count,
			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
422
		svc_xprt_enqueue(&svsk->sk_xprt);
423
	}
Eric Dumazet's avatar
Eric Dumazet committed
424
425
	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
		wake_up_interruptible(sk_sleep(sk));
Linus Torvalds's avatar
Linus Torvalds committed
426
427
428
429
430
}

/*
 * INET callback when space is newly available on the socket.
 */
431
static void svc_write_space(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
432
433
434
435
436
{
	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);

	if (svsk) {
		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
437
			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
438
		svc_xprt_enqueue(&svsk->sk_xprt);
Linus Torvalds's avatar
Linus Torvalds committed
439
440
	}

Eric Dumazet's avatar
Eric Dumazet committed
441
	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) {
442
		dprintk("RPC svc_write_space: someone sleeping on %p\n",
Linus Torvalds's avatar
Linus Torvalds committed
443
		       svsk);
Eric Dumazet's avatar
Eric Dumazet committed
444
		wake_up_interruptible(sk_sleep(sk));
Linus Torvalds's avatar
Linus Torvalds committed
445
446
447
	}
}

448
449
450
451
452
453
454
455
456
static void svc_tcp_write_space(struct sock *sk)
{
	struct socket *sock = sk->sk_socket;

	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock)
		clear_bit(SOCK_NOSPACE, &sock->flags);
	svc_write_space(sk);
}

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
/*
 * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
 */
static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
				     struct cmsghdr *cmh)
{
	struct in_pktinfo *pki = CMSG_DATA(cmh);
	if (cmh->cmsg_type != IP_PKTINFO)
		return 0;
	rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
	return 1;
}

/*
 * See net/ipv6/datagram.c : datagram_recv_ctl
 */
static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
				     struct cmsghdr *cmh)
{
	struct in6_pktinfo *pki = CMSG_DATA(cmh);
	if (cmh->cmsg_type != IPV6_PKTINFO)
		return 0;
	ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
	return 1;
}

483
484
485
486
487
488
489
/*
 * Copy the UDP datagram's destination address to the rqstp structure.
 * The 'destination' address in this case is the address to which the
 * peer sent the datagram, i.e. our local address. For multihomed
 * hosts, this can change from msg to msg. Note that only the IP
 * address changes, the port number should remain the same.
 */
490
491
static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
				    struct cmsghdr *cmh)
492
{
493
494
495
496
497
	switch (cmh->cmsg_level) {
	case SOL_IP:
		return svc_udp_get_dest_address4(rqstp, cmh);
	case SOL_IPV6:
		return svc_udp_get_dest_address6(rqstp, cmh);
498
	}
499
500

	return 0;
501
502
}

Linus Torvalds's avatar
Linus Torvalds committed
503
504
505
/*
 * Receive a datagram from a UDP socket.
 */
506
static int svc_udp_recvfrom(struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
507
{
508
509
	struct svc_sock	*svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
510
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
Linus Torvalds's avatar
Linus Torvalds committed
511
	struct sk_buff	*skb;
512
513
514
515
516
	union {
		struct cmsghdr	hdr;
		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
	} buffer;
	struct cmsghdr *cmh = &buffer.hdr;
517
518
519
520
521
522
	struct msghdr msg = {
		.msg_name = svc_addr(rqstp),
		.msg_control = cmh,
		.msg_controllen = sizeof(buffer),
		.msg_flags = MSG_DONTWAIT,
	};
523
524
	size_t len;
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
525

526
	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
Linus Torvalds's avatar
Linus Torvalds committed
527
528
529
	    /* udp sockets need large rcvbuf as all pending
	     * requests are still in that buffer.  sndbuf must
	     * also be large enough that there is enough space
530
531
532
533
	     * for one reply per thread.  We count all threads
	     * rather than threads in a particular pool, which
	     * provides an upper bound on the number of threads
	     * which will access the socket.
Linus Torvalds's avatar
Linus Torvalds committed
534
535
	     */
	    svc_sock_setbufsize(svsk->sk_sock,
536
537
				(serv->sv_nrthreads+3) * serv->sv_max_mesg,
				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
Linus Torvalds's avatar
Linus Torvalds committed
538

539
	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
540
541
542
543
544
545
546
547
548
549
	skb = NULL;
	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
			     0, 0, MSG_PEEK | MSG_DONTWAIT);
	if (err >= 0)
		skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);

	if (skb == NULL) {
		if (err != -EAGAIN) {
			/* possibly an icmp error */
			dprintk("svc: recvfrom returned error %d\n", -err);
550
			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
551
		}
552
		return -EAGAIN;
Linus Torvalds's avatar
Linus Torvalds committed
553
	}
554
	len = svc_addr_len(svc_addr(rqstp));
555
556
	if (len == 0)
		return -EAFNOSUPPORT;
557
	rqstp->rq_addrlen = len;
558
559
	if (skb->tstamp.tv64 == 0) {
		skb->tstamp = ktime_get_real();
560
		/* Don't enable netstamp, sunrpc doesn't
Linus Torvalds's avatar
Linus Torvalds committed
561
562
		   need that much accuracy */
	}
563
	svsk->sk_sk->sk_stamp = skb->tstamp;
564
	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
Linus Torvalds's avatar
Linus Torvalds committed
565
566
567
568

	len  = skb->len - sizeof(struct udphdr);
	rqstp->rq_arg.len = len;

569
	rqstp->rq_prot = IPPROTO_UDP;
570

571
	if (!svc_udp_get_dest_address(rqstp, cmh)) {
572
		if (net_ratelimit())
573
574
575
576
			printk(KERN_WARNING
				"svc: received unknown control message %d/%d; "
				"dropping RPC reply datagram\n",
					cmh->cmsg_level, cmh->cmsg_type);
577
		skb_free_datagram_locked(svsk->sk_sk, skb);
578
579
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
580
581
582
583
584
585
586

	if (skb_is_nonlinear(skb)) {
		/* we have to copy */
		local_bh_disable();
		if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
			local_bh_enable();
			/* checksum error */
587
			skb_free_datagram_locked(svsk->sk_sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
588
589
590
			return 0;
		}
		local_bh_enable();
591
		skb_free_datagram_locked(svsk->sk_sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
592
593
	} else {
		/* we can use it in-place */
594
595
		rqstp->rq_arg.head[0].iov_base = skb->data +
			sizeof(struct udphdr);
Linus Torvalds's avatar
Linus Torvalds committed
596
		rqstp->rq_arg.head[0].iov_len = len;
597
		if (skb_checksum_complete(skb)) {
598
			skb_free_datagram_locked(svsk->sk_sk, skb);
599
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
600
		}
601
		rqstp->rq_xprt_ctxt = skb;
Linus Torvalds's avatar
Linus Torvalds committed
602
603
604
605
606
607
	}

	rqstp->rq_arg.page_base = 0;
	if (len <= rqstp->rq_arg.head[0].iov_len) {
		rqstp->rq_arg.head[0].iov_len = len;
		rqstp->rq_arg.page_len = 0;
608
		rqstp->rq_respages = rqstp->rq_pages+1;
Linus Torvalds's avatar
Linus Torvalds committed
609
610
	} else {
		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
611
		rqstp->rq_respages = rqstp->rq_pages + 1 +
612
			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
	}

	if (serv->sv_stats)
		serv->sv_stats->netudpcnt++;

	return len;
}

static int
svc_udp_sendto(struct svc_rqst *rqstp)
{
	int		error;

	error = svc_sendto(rqstp, &rqstp->rq_res);
	if (error == -ECONNREFUSED)
		/* ICMP error on earlier request. */
		error = svc_sendto(rqstp, &rqstp->rq_res);

	return error;
}

Tom Tucker's avatar
Tom Tucker committed
634
635
636
637
static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
{
}

638
639
640
static int svc_udp_has_wspace(struct svc_xprt *xprt)
{
	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
641
	struct svc_serv	*serv = xprt->xpt_server;
642
643
644
645
646
647
648
	unsigned long required;

	/*
	 * Set the SOCK_NOSPACE flag before checking the available
	 * sock space.
	 */
	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
Tom Tucker's avatar
Tom Tucker committed
649
	required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
650
651
652
653
654
655
	if (required*2 > sock_wspace(svsk->sk_sk))
		return 0;
	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
	return 1;
}

656
657
658
659
660
661
static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
{
	BUG();
	return NULL;
}

662
static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
663
				       struct net *net,
664
665
666
				       struct sockaddr *sa, int salen,
				       int flags)
{
667
	return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
668
669
}

670
static struct svc_xprt_ops svc_udp_ops = {
671
	.xpo_create = svc_udp_create,
672
673
	.xpo_recvfrom = svc_udp_recvfrom,
	.xpo_sendto = svc_udp_sendto,
674
	.xpo_release_rqst = svc_release_skb,
675
676
	.xpo_detach = svc_sock_detach,
	.xpo_free = svc_sock_free,
Tom Tucker's avatar
Tom Tucker committed
677
	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
678
	.xpo_has_wspace = svc_udp_has_wspace,
679
	.xpo_accept = svc_udp_accept,
680
681
682
683
};

static struct svc_xprt_class svc_udp_class = {
	.xcl_name = "udp",
684
	.xcl_owner = THIS_MODULE,
685
	.xcl_ops = &svc_udp_ops,
686
	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
687
688
};

689
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
Linus Torvalds's avatar
Linus Torvalds committed
690
{
691
	int err, level, optname, one = 1;
692

693
	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
694
	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
695
696
697
698
	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
	svsk->sk_sk->sk_write_space = svc_write_space;

	/* initialise setting must have enough space to
699
	 * receive and respond to one request.
Linus Torvalds's avatar
Linus Torvalds committed
700
701
702
	 * svc_udp_recvfrom will re-adjust if necessary
	 */
	svc_sock_setbufsize(svsk->sk_sock,
703
704
			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
Linus Torvalds's avatar
Linus Torvalds committed
705

706
707
	/* data might have come in before data_ready set up */
	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
708
	set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
709
710

	/* make sure we get destination address info */
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
	switch (svsk->sk_sk->sk_family) {
	case AF_INET:
		level = SOL_IP;
		optname = IP_PKTINFO;
		break;
	case AF_INET6:
		level = SOL_IPV6;
		optname = IPV6_RECVPKTINFO;
		break;
	default:
		BUG();
	}
	err = kernel_setsockopt(svsk->sk_sock, level, optname,
					(char *)&one, sizeof(one));
	dprintk("svc: kernel_setsockopt returned %d\n", err);
Linus Torvalds's avatar
Linus Torvalds committed
726
727
728
729
730
731
}

/*
 * A data_ready event on a listening socket means there's a connection
 * pending. Do not use state_change as a substitute for it.
 */
732
static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
Linus Torvalds's avatar
Linus Torvalds committed
733
{
734
	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
735
736

	dprintk("svc: socket %p TCP (listen) state change %d\n",
737
		sk, sk->sk_state);
Linus Torvalds's avatar
Linus Torvalds committed
738

739
740
741
742
743
744
745
746
747
748
749
750
	/*
	 * This callback may called twice when a new connection
	 * is established as a child socket inherits everything
	 * from a parent LISTEN socket.
	 * 1) data_ready method of the parent socket will be called
	 *    when one of child sockets become ESTABLISHED.
	 * 2) data_ready method of the child socket may be called
	 *    when it receives data before the socket is accepted.
	 * In case of 2, we should ignore it silently.
	 */
	if (sk->sk_state == TCP_LISTEN) {
		if (svsk) {
751
			set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
752
			svc_xprt_enqueue(&svsk->sk_xprt);
753
754
		} else
			printk("svc: socket %p: no user data\n", sk);
Linus Torvalds's avatar
Linus Torvalds committed
755
	}
756

Eric Dumazet's avatar
Eric Dumazet committed
757
758
	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
		wake_up_interruptible_all(sk_sleep(sk));
Linus Torvalds's avatar
Linus Torvalds committed
759
760
761
762
763
}

/*
 * A state change on a connected socket means it's dying or dead.
 */
764
static void svc_tcp_state_change(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
765
{
766
	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
767
768

	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
769
		sk, sk->sk_state, sk->sk_user_data);
Linus Torvalds's avatar
Linus Torvalds committed
770

771
	if (!svsk)
Linus Torvalds's avatar
Linus Torvalds committed
772
		printk("svc: socket %p: no user data\n", sk);
773
	else {
774
		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
775
		svc_xprt_enqueue(&svsk->sk_xprt);
Linus Torvalds's avatar
Linus Torvalds committed
776
	}
Eric Dumazet's avatar
Eric Dumazet committed
777
778
	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
		wake_up_interruptible_all(sk_sleep(sk));
Linus Torvalds's avatar
Linus Torvalds committed
779
780
}

781
static void svc_tcp_data_ready(struct sock *sk, int count)
Linus Torvalds's avatar
Linus Torvalds committed
782
{
783
	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
784
785

	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
786
787
		sk, sk->sk_user_data);
	if (svsk) {
788
		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
789
		svc_xprt_enqueue(&svsk->sk_xprt);
790
	}
Eric Dumazet's avatar
Eric Dumazet committed
791
792
	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
		wake_up_interruptible(sk_sleep(sk));
Linus Torvalds's avatar
Linus Torvalds committed
793
794
795
796
797
}

/*
 * Accept a TCP connection
 */
798
static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
Linus Torvalds's avatar
Linus Torvalds committed
799
{
800
	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
801
802
	struct sockaddr_storage addr;
	struct sockaddr	*sin = (struct sockaddr *) &addr;
803
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
Linus Torvalds's avatar
Linus Torvalds committed
804
805
806
807
	struct socket	*sock = svsk->sk_sock;
	struct socket	*newsock;
	struct svc_sock	*newsvsk;
	int		err, slen;
808
	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
Linus Torvalds's avatar
Linus Torvalds committed
809
810
811

	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
	if (!sock)
812
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
813

814
	clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
815
816
	err = kernel_accept(sock, &newsock, O_NONBLOCK);
	if (err < 0) {
Linus Torvalds's avatar
Linus Torvalds committed
817
818
819
		if (err == -ENOMEM)
			printk(KERN_WARNING "%s: no more sockets!\n",
			       serv->sv_name);
820
		else if (err != -EAGAIN && net_ratelimit())
Linus Torvalds's avatar
Linus Torvalds committed
821
822
			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
				   serv->sv_name, -err);
823
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
824
	}
825
	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
826

827
	err = kernel_getpeername(newsock, sin, &slen);
Linus Torvalds's avatar
Linus Torvalds committed
828
829
830
831
832
833
834
835
	if (err < 0) {
		if (net_ratelimit())
			printk(KERN_WARNING "%s: peername failed (err %d)!\n",
				   serv->sv_name, -err);
		goto failed;		/* aborted connection or whatever */
	}

	/* Ideally, we would want to reject connections from unauthorized
836
837
	 * hosts here, but when we get encryption, the IP of the host won't
	 * tell us anything.  For now just warn about unpriv connections.
Linus Torvalds's avatar
Linus Torvalds committed
838
	 */
839
	if (!svc_port_is_privileged(sin)) {
Linus Torvalds's avatar
Linus Torvalds committed
840
		dprintk(KERN_WARNING
841
			"%s: connect from unprivileged port: %s\n",
842
			serv->sv_name,
843
			__svc_print_addr(sin, buf, sizeof(buf)));
Linus Torvalds's avatar
Linus Torvalds committed
844
	}
845
	dprintk("%s: connect from %s\n", serv->sv_name,
846
		__svc_print_addr(sin, buf, sizeof(buf)));
Linus Torvalds's avatar
Linus Torvalds committed
847
848
849
850
851
852

	/* make sure that a write doesn't block forever when
	 * low on memory
	 */
	newsock->sk->sk_sndtimeo = HZ*30;

853
854
	if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
				 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
Linus Torvalds's avatar
Linus Torvalds committed
855
		goto failed;
856
	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
857
858
859
860
861
	err = kernel_getsockname(newsock, sin, &slen);
	if (unlikely(err < 0)) {
		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
		slen = offsetof(struct sockaddr, sa_data);
	}
862
	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
863

864
865
866
867
868
869
870
871
872
873
	if (serv->sv_stats)
		serv->sv_stats->nettcpconn++;

	return &newsvsk->sk_xprt;

failed:
	sock_release(newsock);
	return NULL;
}

Linus Torvalds's avatar
Linus Torvalds committed
874
/*
875
876
877
878
 * Receive data.
 * If we haven't gotten the record length yet, get the next four bytes.
 * Otherwise try to gobble up as much as possible up to the complete
 * record length.
Linus Torvalds's avatar
Linus Torvalds committed
879
 */
880
static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
881
{
882
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
883
	int len;
Linus Torvalds's avatar
Linus Torvalds committed
884

885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
		/* sndbuf needs to have room for one request
		 * per thread, otherwise we can stall even when the
		 * network isn't a bottleneck.
		 *
		 * We count all threads rather than threads in a
		 * particular pool, which provides an upper bound
		 * on the number of threads which will access the socket.
		 *
		 * rcvbuf just needs to be able to hold a few requests.
		 * Normally they will be removed from the queue
		 * as soon a a complete request arrives.
		 */
		svc_sock_setbufsize(svsk->sk_sock,
				    (serv->sv_nrthreads+3) * serv->sv_max_mesg,
				    3 * serv->sv_max_mesg);

902
	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
903

904
905
	if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
		int		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
Linus Torvalds's avatar
Linus Torvalds committed
906
907
908
909
910
911
912
913
914
		struct kvec	iov;

		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
		iov.iov_len  = want;
		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
			goto error;
		svsk->sk_tcplen += len;

		if (len < want) {
915
916
			dprintk("svc: short recvfrom while reading record "
				"length (%d of %d)\n", len, want);
917
			goto err_again; /* record header not complete */
Linus Torvalds's avatar
Linus Torvalds committed
918
919
920
		}

		svsk->sk_reclen = ntohl(svsk->sk_reclen);
921
		if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) {
Linus Torvalds's avatar
Linus Torvalds committed
922
923
924
925
926
			/* FIXME: technically, a record can be fragmented,
			 *  and non-terminal fragments will not have the top
			 *  bit set in the fragment length header.
			 *  But apparently no known nfs clients send fragmented
			 *  records. */
927
			if (net_ratelimit())
928
929
				printk(KERN_NOTICE "RPC: multiple fragments "
					"per record not supported\n");
Linus Torvalds's avatar
Linus Torvalds committed
930
931
			goto err_delete;
		}
932

933
		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
934
		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
935
		if (svsk->sk_reclen > serv->sv_max_mesg) {
936
			if (net_ratelimit())
937
938
939
				printk(KERN_NOTICE "RPC: "
					"fragment too large: 0x%08lx\n",
					(unsigned long)svsk->sk_reclen);
Linus Torvalds's avatar
Linus Torvalds committed
940
941
942
943
944
945
946
947
948
949
950
951
			goto err_delete;
		}
	}

	/* Check whether enough data is available */
	len = svc_recv_available(svsk);
	if (len < 0)
		goto error;

	if (len < svsk->sk_reclen) {
		dprintk("svc: incomplete TCP record (%d of %d)\n",
			len, svsk->sk_reclen);
952
		goto err_again;	/* record not complete */
Linus Torvalds's avatar
Linus Torvalds committed
953
954
	}
	len = svsk->sk_reclen;
955
	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
956

957
958
	return len;
 error:
959
	if (len == -EAGAIN)
960
961
962
963
964
965
966
967
		dprintk("RPC: TCP recv_record got EAGAIN\n");
	return len;
 err_delete:
	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 err_again:
	return -EAGAIN;
}

968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
			       struct rpc_rqst **reqpp, struct kvec *vec)
{
	struct rpc_rqst *req = NULL;
	u32 *p;
	u32 xid;
	u32 calldir;
	int len;

	len = svc_recvfrom(rqstp, vec, 1, 8);
	if (len < 0)
		goto error;

	p = (u32 *)rqstp->rq_arg.head[0].iov_base;
	xid = *p++;
	calldir = *p;

	if (calldir == 0) {
		/* REQUEST is the most common case */
		vec[0] = rqstp->rq_arg.head[0];
	} else {
		/* REPLY */
990
991
992
993
		struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;

		if (bc_xprt)
			req = xprt_lookup_rqst(bc_xprt, xid);
994
995
996
997

		if (!req) {
			printk(KERN_NOTICE
				"%s: Got unrecognized reply: "
998
				"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
999
				__func__, ntohl(calldir),
1000
				bc_xprt, xid);
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
			vec[0] = rqstp->rq_arg.head[0];
			goto out;
		}

		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
		       sizeof(struct xdr_buf));
		/* copy the xid and call direction */
		memcpy(req->rq_private_buf.head[0].iov_base,
		       rqstp->rq_arg.head[0].iov_base, 8);
		vec[0] = req->rq_private_buf.head[0];
	}
 out:
	vec[0].iov_base += 8;
	vec[0].iov_len -= 8;
	len = svsk->sk_reclen - 8;
 error:
	*reqpp = req;
	return len;
}

1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
/*
 * Receive data from a TCP socket.
 */
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
	struct svc_sock	*svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
	int		len;
	struct kvec *vec;
	int pnum, vlen;
1032
	struct rpc_rqst *req = NULL;
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042

	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));

	len = svc_tcp_recv_record(svsk, rqstp);
	if (len < 0)
		goto error;

1043
	vec = rqstp->rq_vec;
Linus Torvalds's avatar
Linus Torvalds committed
1044
1045
	vec[0] = rqstp->rq_arg.head[0];
	vlen = PAGE_SIZE;
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061

	/*
	 * We have enough data for the whole tcp record. Let's try and read the
	 * first 8 bytes to get the xid and the call direction. We can use this
	 * to figure out if this is a call or a reply to a callback. If
	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
	 * In that case, don't bother with the calldir and just read the data.
	 * It will be rejected in svc_process.
	 */
	if (len >= 8) {
		len = svc_process_calldir(svsk, rqstp, &req, vec);
		if (len < 0)
			goto err_again;
		vlen -= 8;
	}

Linus Torvalds's avatar
Linus Torvalds committed
1062
1063
	pnum = 1;
	while (vlen < len) {
1064
1065
1066
		vec[pnum].iov_base = (req) ?
			page_address(req->rq_private_buf.pages[pnum - 1]) :
			page_address(rqstp->rq_pages[pnum]);
Linus Torvalds's avatar
Linus Torvalds committed
1067
1068
1069
1070
		vec[pnum].iov_len = PAGE_SIZE;
		pnum++;
		vlen += PAGE_SIZE;
	}
1071
	rqstp->rq_respages = &rqstp->rq_pages[pnum];
Linus Torvalds's avatar
Linus Torvalds committed
1072
1073
1074
1075

	/* Now receive data */
	len = svc_recvfrom(rqstp, vec, pnum, len);
	if (len < 0)
1076
		goto err_again;
Linus Torvalds's avatar
Linus Torvalds committed
1077

1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
	/*
	 * Account for the 8 bytes we read earlier
	 */
	len += 8;

	if (req) {
		xprt_complete_rqst(req->rq_task, len);
		len = 0;
		goto out;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
	dprintk("svc: TCP complete record (%d bytes)\n", len);
	rqstp->rq_arg.len = len;
	rqstp->rq_arg.page_base = 0;
	if (len <= rqstp->rq_arg.head[0].iov_len) {
		rqstp->rq_arg.head[0].iov_len = len;
		rqstp->rq_arg.page_len = 0;
	} else {
		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
	}

1098
	rqstp->rq_xprt_ctxt   = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1099
1100
	rqstp->rq_prot	      = IPPROTO_TCP;

1101
out:
Linus Torvalds's avatar
Linus Torvalds committed
1102
1103
1104
1105
	/* Reset TCP read info */
	svsk->sk_reclen = 0;
	svsk->sk_tcplen = 0;

1106
	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
Linus Torvalds's avatar
Linus Torvalds committed
1107
1108
1109
1110
1111
	if (serv->sv_stats)
		serv->sv_stats->nettcpcnt++;

	return len;

1112
err_again:
Linus Torvalds's avatar
Linus Torvalds committed
1113
1114
	if (len == -EAGAIN) {
		dprintk("RPC: TCP recvfrom got EAGAIN\n");
1115
1116
1117
1118
		return len;
	}
error:
	if (len != -EAGAIN) {
Linus Torvalds's avatar
Linus Torvalds committed
1119
		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1120
		       svsk->sk_xprt.xpt_server->sv_name, -len);
1121
		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
1122
	}
1123
	return -EAGAIN;
Linus Torvalds's avatar
Linus Torvalds committed
1124
1125
1126
1127
1128
}

/*
 * Send out data on TCP socket.
 */
1129
static int svc_tcp_sendto(struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
1130
1131
1132
{
	struct xdr_buf	*xbufp = &rqstp->rq_res;
	int sent;
1133
	__be32 reclen;
Linus Torvalds's avatar
Linus Torvalds committed
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143

	/* Set up the first element of the reply kvec.
	 * Any other kvecs that may be in use have been taken
	 * care of by the server implementation itself.
	 */
	reclen = htonl(0x80000000|((xbufp->len ) - 4));
	memcpy(xbufp->head[0].iov_base, &reclen, 4);

	sent = svc_sendto(rqstp, &rqstp->rq_res);
	if (sent != xbufp->len) {
1144
1145
1146
		printk(KERN_NOTICE
		       "rpc-srv/tcp: %s: %s %d when sending %d bytes "
		       "- shutting down socket\n",
1147
		       rqstp->rq_xprt->xpt_server->sv_name,
Linus Torvalds's avatar
Linus Torvalds committed
1148
1149
		       (sent<0)?"got error":"sent only",
		       sent, xbufp->len);
1150
		set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
1151
		svc_xprt_enqueue(rqstp->rq_xprt);
Linus Torvalds's avatar
Linus Torvalds committed
1152
1153
1154
1155
1156
		sent = -EAGAIN;
	}
	return sent;
}

Tom Tucker's avatar
Tom Tucker committed
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
/*
 * Setup response header. TCP has a 4B record length field.
 */
static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
{
	struct kvec *resv = &rqstp->rq_res.head[0];

	/* tcp needs a space for the record length... */
	svc_putnl(resv, 0);
}

1168
1169
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
1170
	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);