af_unix.c 52.2 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
/*
 * NET4:	Implementation of BSD Unix domain sockets.
 *
4
 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds's avatar
Linus Torvalds committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Fixes:
 *		Linus Torvalds	:	Assorted bug cures.
 *		Niibe Yutaka	:	async I/O support.
 *		Carsten Paeth	:	PF_UNIX check, address fixes.
 *		Alan Cox	:	Limit size of allocated blocks.
 *		Alan Cox	:	Fixed the stupid socketpair bug.
 *		Alan Cox	:	BSD compatibility fine tuning.
 *		Alan Cox	:	Fixed a bug in connect when interrupted.
 *		Alan Cox	:	Sorted out a proper draft version of
 *					file descriptor passing hacked up from
 *					Mike Shaver's work.
 *		Marty Leisner	:	Fixes to fd passing
 *		Nick Nevin	:	recvmsg bugfix.
 *		Alan Cox	:	Started proper garbage collector
 *		Heiko EiBfeldt	:	Missing verify_area check
 *		Alan Cox	:	Started POSIXisms
 *		Andreas Schwab	:	Replace inode by dentry for proper
 *					reference counting
 *		Kirk Petersen	:	Made this a module
 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
 *					Lots of bug fixes.
 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
 *					by above two patches.
 *	     Andrea Arcangeli	:	If possible we block in connect(2)
 *					if the max backlog of the listen socket
 *					is been reached. This won't break
 *					old apps and it will avoid huge amount
 *					of socks hashed (this for unix_gc()
 *					performances reasons).
 *					Security fix that limits the max
 *					number of socks to 2*max_files and
 *					the number of skb queueable in the
 *					dgram receiver.
 *		Artur Skawina   :	Hash function optimizations
 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
 *	      Malcolm Beattie   :	Set peercred for socketpair
 *	     Michal Ostrowski   :       Module initialization cleanup.
 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
 *	     				the core infrastructure is doing that
 *	     				for all net proto families now (2.5.69+)
 *
 *
 * Known differences from reference BSD that was tested:
 *
 *	[TO FIX]
 *	ECONNREFUSED is not returned from one end of a connected() socket to the
 *		other the moment one end closes.
 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
 *	[NOT TO FIX]
 *	accept() returns a path name even if the connecting socket has closed
 *		in the meantime (BSD loses the path and gives up).
 *	accept() returns 0 length path for an unbound connector. BSD returns 16
 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
 *	BSD af_unix apparently has connect forgetting to block properly.
 *		(need to check this with the POSIX spec in detail)
 *
 * Differences from 2.0.0-11-... (ANK)
 *	Bug fixes and improvements.
 *		- client shutdown killed server socket.
 *		- removed all useless cli/sti pairs.
 *
 *	Semantic changes/extensions.
 *		- generic control message passing.
 *		- SCM_CREDENTIALS control message.
 *		- "Abstract" (not FS based) socket bindings.
 *		  Abstract names are sequences of bytes (not zero terminated)
 *		  started by 0, so that this name space does not intersect
 *		  with BSD names.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/fcntl.h>
#include <linux/termios.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
104
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
105
#include <net/sock.h>
106
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
107
108
109
110
111
112
113
114
115
116
117
#include <net/af_unix.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/scm.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/rtnetlink.h>
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>

Adrian Bunk's avatar
Adrian Bunk committed
118
119
static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
static DEFINE_SPINLOCK(unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
120
121
122
123
124
125
static atomic_t unix_nr_socks = ATOMIC_INIT(0);

#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])

#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)

126
#ifdef CONFIG_SECURITY_NETWORK
127
static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128
{
129
	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130
131
132
133
}

static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
134
	scm->secid = *UNIXSID(skb);
135
136
}
#else
137
static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138
139
140
141
142
143
{ }

static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{ }
#endif /* CONFIG_SECURITY_NETWORK */

Linus Torvalds's avatar
Linus Torvalds committed
144
145
/*
 *  SMP locking strategy:
146
 *    hash table is protected with spinlock unix_table_lock
Linus Torvalds's avatar
Linus Torvalds committed
147
148
149
 *    each socket state is protected by separate rwlock.
 */

150
static inline unsigned unix_hash_fold(__wsum n)
Linus Torvalds's avatar
Linus Torvalds committed
151
{
152
	unsigned hash = (__force unsigned)n;
Linus Torvalds's avatar
Linus Torvalds committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
	hash ^= hash>>16;
	hash ^= hash>>8;
	return hash&(UNIX_HASH_SIZE-1);
}

#define unix_peer(sk) (unix_sk(sk)->peer)

static inline int unix_our_peer(struct sock *sk, struct sock *osk)
{
	return unix_peer(osk) == sk;
}

static inline int unix_may_send(struct sock *sk, struct sock *osk)
{
Eric Dumazet's avatar
Eric Dumazet committed
167
	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
Linus Torvalds's avatar
Linus Torvalds committed
168
169
}

170
171
172
173
174
static inline int unix_recvq_full(struct sock const *sk)
{
	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}

Linus Torvalds's avatar
Linus Torvalds committed
175
176
177
178
static struct sock *unix_peer_get(struct sock *s)
{
	struct sock *peer;

179
	unix_state_lock(s);
Linus Torvalds's avatar
Linus Torvalds committed
180
181
182
	peer = unix_peer(s);
	if (peer)
		sock_hold(peer);
183
	unix_state_unlock(s);
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
	return peer;
}

static inline void unix_release_addr(struct unix_address *addr)
{
	if (atomic_dec_and_test(&addr->refcnt))
		kfree(addr);
}

/*
 *	Check unix socket name:
 *		- should be not zero length.
 *	        - if started by not zero, should be NULL terminated (FS object)
 *		- if started by zero, it is abstract name.
 */
199

Eric Dumazet's avatar
Eric Dumazet committed
200
static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
204
205
206
207
208
209
210
211
212
213
{
	if (len <= sizeof(short) || len > sizeof(*sunaddr))
		return -EINVAL;
	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
		return -EINVAL;
	if (sunaddr->sun_path[0]) {
		/*
		 * This may look like an off by one error but it is a bit more
		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
		 * sun_path[108] doesnt as such exist.  However in kernel space
		 * we are guaranteed that it is a valid memory location in our
		 * kernel address buffer.
		 */
214
		((char *)sunaddr)[len] = 0;
Linus Torvalds's avatar
Linus Torvalds committed
215
216
217
218
		len = strlen(sunaddr->sun_path)+1+sizeof(short);
		return len;
	}

219
	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
Linus Torvalds's avatar
Linus Torvalds committed
220
221
222
223
224
225
226
227
228
229
	return len;
}

static void __unix_remove_socket(struct sock *sk)
{
	sk_del_node_init(sk);
}

static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
{
230
	WARN_ON(!sk_unhashed(sk));
Linus Torvalds's avatar
Linus Torvalds committed
231
232
233
234
235
	sk_add_node(sk, list);
}

static inline void unix_remove_socket(struct sock *sk)
{
236
	spin_lock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
237
	__unix_remove_socket(sk);
238
	spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
239
240
241
242
}

static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
{
243
	spin_lock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
244
	__unix_insert_socket(list, sk);
245
	spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
246
247
}

248
249
static struct sock *__unix_find_socket_byname(struct net *net,
					      struct sockaddr_un *sunname,
Linus Torvalds's avatar
Linus Torvalds committed
250
251
252
253
254
255
256
257
					      int len, int type, unsigned hash)
{
	struct sock *s;
	struct hlist_node *node;

	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
		struct unix_sock *u = unix_sk(s);

258
		if (!net_eq(sock_net(s), net))
259
260
			continue;

Linus Torvalds's avatar
Linus Torvalds committed
261
262
263
264
265
266
267
268
269
		if (u->addr->len == len &&
		    !memcmp(u->addr->name, sunname, len))
			goto found;
	}
	s = NULL;
found:
	return s;
}

270
271
static inline struct sock *unix_find_socket_byname(struct net *net,
						   struct sockaddr_un *sunname,
Linus Torvalds's avatar
Linus Torvalds committed
272
273
274
275
276
						   int len, int type,
						   unsigned hash)
{
	struct sock *s;

277
	spin_lock(&unix_table_lock);
278
	s = __unix_find_socket_byname(net, sunname, len, type, hash);
Linus Torvalds's avatar
Linus Torvalds committed
279
280
	if (s)
		sock_hold(s);
281
	spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
282
283
284
	return s;
}

285
static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
Linus Torvalds's avatar
Linus Torvalds committed
286
287
288
289
{
	struct sock *s;
	struct hlist_node *node;

290
	spin_lock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
291
292
293
294
	sk_for_each(s, node,
		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
		struct dentry *dentry = unix_sk(s)->dentry;

295
		if (!net_eq(sock_net(s), net))
296
297
			continue;

Eric Dumazet's avatar
Eric Dumazet committed
298
		if (dentry && dentry->d_inode == i) {
Linus Torvalds's avatar
Linus Torvalds committed
299
300
301
302
303
304
			sock_hold(s);
			goto found;
		}
	}
	s = NULL;
found:
305
	spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
306
307
308
309
310
311
312
313
314
315
316
317
318
	return s;
}

static inline int unix_writable(struct sock *sk)
{
	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
}

static void unix_write_space(struct sock *sk)
{
	read_lock(&sk->sk_callback_lock);
	if (unix_writable(sk)) {
		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
Ingo Molnar's avatar
Ingo Molnar committed
319
			wake_up_interruptible_sync(sk->sk_sleep);
320
		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds's avatar
Linus Torvalds committed
321
322
323
324
325
326
327
328
329
330
	}
	read_unlock(&sk->sk_callback_lock);
}

/* When dgram socket disconnects (or changes its peer), we clear its receive
 * queue of packets arrived from previous peer. First, it allows to do
 * flow control based only on wmem_alloc; second, sk connected to peer
 * may receive messages only from that peer. */
static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
{
331
	if (!skb_queue_empty(&sk->sk_receive_queue)) {
Linus Torvalds's avatar
Linus Torvalds committed
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
		skb_queue_purge(&sk->sk_receive_queue);
		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);

		/* If one link of bidirectional dgram pipe is disconnected,
		 * we signal error. Messages are lost. Do not make this,
		 * when peer was not connected to us.
		 */
		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
			other->sk_err = ECONNRESET;
			other->sk_error_report(other);
		}
	}
}

static void unix_sock_destructor(struct sock *sk)
{
	struct unix_sock *u = unix_sk(sk);

	skb_queue_purge(&sk->sk_receive_queue);

352
353
354
	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
	WARN_ON(!sk_unhashed(sk));
	WARN_ON(sk->sk_socket);
Linus Torvalds's avatar
Linus Torvalds committed
355
	if (!sock_flag(sk, SOCK_DEAD)) {
356
		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
Linus Torvalds's avatar
Linus Torvalds committed
357
358
359
360
361
362
363
		return;
	}

	if (u->addr)
		unix_release_addr(u->addr);

	atomic_dec(&unix_nr_socks);
364
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
Linus Torvalds's avatar
Linus Torvalds committed
365
#ifdef UNIX_REFCNT_DEBUG
Eric Dumazet's avatar
Eric Dumazet committed
366
367
	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
		atomic_read(&unix_nr_socks));
Linus Torvalds's avatar
Linus Torvalds committed
368
369
370
#endif
}

Eric Dumazet's avatar
Eric Dumazet committed
371
static int unix_release_sock(struct sock *sk, int embrion)
Linus Torvalds's avatar
Linus Torvalds committed
372
373
374
375
376
377
378
379
380
381
382
{
	struct unix_sock *u = unix_sk(sk);
	struct dentry *dentry;
	struct vfsmount *mnt;
	struct sock *skpair;
	struct sk_buff *skb;
	int state;

	unix_remove_socket(sk);

	/* Clear state */
383
	unix_state_lock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
384
385
386
387
388
389
390
391
	sock_orphan(sk);
	sk->sk_shutdown = SHUTDOWN_MASK;
	dentry	     = u->dentry;
	u->dentry    = NULL;
	mnt	     = u->mnt;
	u->mnt	     = NULL;
	state = sk->sk_state;
	sk->sk_state = TCP_CLOSE;
392
	unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
393
394
395

	wake_up_interruptible_all(&u->peer_wait);

396
	skpair = unix_peer(sk);
Linus Torvalds's avatar
Linus Torvalds committed
397

398
	if (skpair != NULL) {
Linus Torvalds's avatar
Linus Torvalds committed
399
		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
400
			unix_state_lock(skpair);
Linus Torvalds's avatar
Linus Torvalds committed
401
402
403
404
			/* No more writes */
			skpair->sk_shutdown = SHUTDOWN_MASK;
			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
				skpair->sk_err = ECONNRESET;
405
			unix_state_unlock(skpair);
Linus Torvalds's avatar
Linus Torvalds committed
406
407
			skpair->sk_state_change(skpair);
			read_lock(&skpair->sk_callback_lock);
408
			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
Linus Torvalds's avatar
Linus Torvalds committed
409
410
411
412
413
414
415
416
417
			read_unlock(&skpair->sk_callback_lock);
		}
		sock_put(skpair); /* It may now die */
		unix_peer(sk) = NULL;
	}

	/* Try to flush out this socket. Throw out buffers at least */

	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
418
		if (state == TCP_LISTEN)
Linus Torvalds's avatar
Linus Torvalds committed
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
			unix_release_sock(skb->sk, 1);
		/* passed fds are erased in the kfree_skb hook	      */
		kfree_skb(skb);
	}

	if (dentry) {
		dput(dentry);
		mntput(mnt);
	}

	sock_put(sk);

	/* ---- Socket is dead now and most probably destroyed ---- */

	/*
	 * Fixme: BSD difference: In BSD all sockets connected to use get
	 *	  ECONNRESET and we die on the spot. In Linux we behave
	 *	  like files and pipes do and wait for the last
	 *	  dereference.
	 *
	 * Can't we simply set sock->err?
	 *
	 *	  What the above comment does talk about? --ANK(980817)
	 */

444
	if (unix_tot_inflight)
445
		unix_gc();		/* Garbage collect fds */
Linus Torvalds's avatar
Linus Torvalds committed
446
447
448
449
450
451
452
453
454
455
456

	return 0;
}

static int unix_listen(struct socket *sock, int backlog)
{
	int err;
	struct sock *sk = sock->sk;
	struct unix_sock *u = unix_sk(sk);

	err = -EOPNOTSUPP;
Eric Dumazet's avatar
Eric Dumazet committed
457
458
	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
		goto out;	/* Only stream/seqpacket sockets accept */
Linus Torvalds's avatar
Linus Torvalds committed
459
460
	err = -EINVAL;
	if (!u->addr)
Eric Dumazet's avatar
Eric Dumazet committed
461
		goto out;	/* No listens on an unbound socket */
462
	unix_state_lock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
463
464
465
466
467
468
469
	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
		goto out_unlock;
	if (backlog > sk->sk_max_ack_backlog)
		wake_up_interruptible_all(&u->peer_wait);
	sk->sk_max_ack_backlog	= backlog;
	sk->sk_state		= TCP_LISTEN;
	/* set credentials so connect can copy them */
470
	sk->sk_peercred.pid	= task_tgid_vnr(current);
Linus Torvalds's avatar
Linus Torvalds committed
471
472
473
474
475
	sk->sk_peercred.uid	= current->euid;
	sk->sk_peercred.gid	= current->egid;
	err = 0;

out_unlock:
476
	unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
477
478
479
480
481
482
483
484
485
486
487
488
out:
	return err;
}

static int unix_release(struct socket *);
static int unix_bind(struct socket *, struct sockaddr *, int);
static int unix_stream_connect(struct socket *, struct sockaddr *,
			       int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
489
490
static unsigned int unix_dgram_poll(struct file *, struct socket *,
				    poll_table *);
Linus Torvalds's avatar
Linus Torvalds committed
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct kiocb *, struct socket *,
			       struct msghdr *, size_t);
static int unix_stream_recvmsg(struct kiocb *, struct socket *,
			       struct msghdr *, size_t, int);
static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
			      struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
			      struct msghdr *, size_t, int);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
			      int, int);
static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
				  struct msghdr *, size_t);

506
static const struct proto_ops unix_stream_ops = {
Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
	.family =	PF_UNIX,
	.owner =	THIS_MODULE,
	.release =	unix_release,
	.bind =		unix_bind,
	.connect =	unix_stream_connect,
	.socketpair =	unix_socketpair,
	.accept =	unix_accept,
	.getname =	unix_getname,
	.poll =		unix_poll,
	.ioctl =	unix_ioctl,
	.listen =	unix_listen,
	.shutdown =	unix_shutdown,
	.setsockopt =	sock_no_setsockopt,
	.getsockopt =	sock_no_getsockopt,
	.sendmsg =	unix_stream_sendmsg,
	.recvmsg =	unix_stream_recvmsg,
	.mmap =		sock_no_mmap,
	.sendpage =	sock_no_sendpage,
};

527
static const struct proto_ops unix_dgram_ops = {
Linus Torvalds's avatar
Linus Torvalds committed
528
529
530
531
532
533
534
535
	.family =	PF_UNIX,
	.owner =	THIS_MODULE,
	.release =	unix_release,
	.bind =		unix_bind,
	.connect =	unix_dgram_connect,
	.socketpair =	unix_socketpair,
	.accept =	sock_no_accept,
	.getname =	unix_getname,
536
	.poll =		unix_dgram_poll,
Linus Torvalds's avatar
Linus Torvalds committed
537
538
539
540
541
542
543
544
545
546
547
	.ioctl =	unix_ioctl,
	.listen =	sock_no_listen,
	.shutdown =	unix_shutdown,
	.setsockopt =	sock_no_setsockopt,
	.getsockopt =	sock_no_getsockopt,
	.sendmsg =	unix_dgram_sendmsg,
	.recvmsg =	unix_dgram_recvmsg,
	.mmap =		sock_no_mmap,
	.sendpage =	sock_no_sendpage,
};

548
static const struct proto_ops unix_seqpacket_ops = {
Linus Torvalds's avatar
Linus Torvalds committed
549
550
551
552
553
554
555
556
	.family =	PF_UNIX,
	.owner =	THIS_MODULE,
	.release =	unix_release,
	.bind =		unix_bind,
	.connect =	unix_stream_connect,
	.socketpair =	unix_socketpair,
	.accept =	unix_accept,
	.getname =	unix_getname,
557
	.poll =		unix_dgram_poll,
Linus Torvalds's avatar
Linus Torvalds committed
558
559
560
561
562
563
564
565
566
567
568
569
	.ioctl =	unix_ioctl,
	.listen =	unix_listen,
	.shutdown =	unix_shutdown,
	.setsockopt =	sock_no_setsockopt,
	.getsockopt =	sock_no_getsockopt,
	.sendmsg =	unix_seqpacket_sendmsg,
	.recvmsg =	unix_dgram_recvmsg,
	.mmap =		sock_no_mmap,
	.sendpage =	sock_no_sendpage,
};

static struct proto unix_proto = {
570
571
572
573
	.name			= "UNIX",
	.owner			= THIS_MODULE,
	.sockets_allocated	= &unix_nr_socks,
	.obj_size		= sizeof(struct unix_sock),
Linus Torvalds's avatar
Linus Torvalds committed
574
575
};

576
577
578
579
580
581
582
583
/*
 * AF_UNIX sockets do not interact with hardware, hence they
 * dont trigger interrupts - so it's safe for them to have
 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 * this special lock-class by reinitializing the spinlock key:
 */
static struct lock_class_key af_unix_sk_receive_queue_lock_key;

Eric Dumazet's avatar
Eric Dumazet committed
584
static struct sock *unix_create1(struct net *net, struct socket *sock)
Linus Torvalds's avatar
Linus Torvalds committed
585
586
587
588
{
	struct sock *sk = NULL;
	struct unix_sock *u;

589
590
	atomic_inc(&unix_nr_socks);
	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
Linus Torvalds's avatar
Linus Torvalds committed
591
592
		goto out;

593
	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
Linus Torvalds's avatar
Linus Torvalds committed
594
595
596
	if (!sk)
		goto out;

Eric Dumazet's avatar
Eric Dumazet committed
597
	sock_init_data(sock, sk);
598
599
	lockdep_set_class(&sk->sk_receive_queue.lock,
				&af_unix_sk_receive_queue_lock_key);
Linus Torvalds's avatar
Linus Torvalds committed
600
601

	sk->sk_write_space	= unix_write_space;
602
	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
Linus Torvalds's avatar
Linus Torvalds committed
603
604
605
606
	sk->sk_destruct		= unix_sock_destructor;
	u	  = unix_sk(sk);
	u->dentry = NULL;
	u->mnt	  = NULL;
607
	spin_lock_init(&u->lock);
Al Viro's avatar
Al Viro committed
608
	atomic_long_set(&u->inflight, 0);
609
	INIT_LIST_HEAD(&u->link);
Ingo Molnar's avatar
Ingo Molnar committed
610
	mutex_init(&u->readlock); /* single task reading lock */
Linus Torvalds's avatar
Linus Torvalds committed
611
612
613
	init_waitqueue_head(&u->peer_wait);
	unix_insert_socket(unix_sockets_unbound, sk);
out:
614
615
	if (sk == NULL)
		atomic_dec(&unix_nr_socks);
616
617
618
	else
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);

Linus Torvalds's avatar
Linus Torvalds committed
619
620
621
	return sk;
}

622
static int unix_create(struct net *net, struct socket *sock, int protocol)
Linus Torvalds's avatar
Linus Torvalds committed
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
{
	if (protocol && protocol != PF_UNIX)
		return -EPROTONOSUPPORT;

	sock->state = SS_UNCONNECTED;

	switch (sock->type) {
	case SOCK_STREAM:
		sock->ops = &unix_stream_ops;
		break;
		/*
		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
		 *	nothing uses it.
		 */
	case SOCK_RAW:
638
		sock->type = SOCK_DGRAM;
Linus Torvalds's avatar
Linus Torvalds committed
639
640
641
642
643
644
645
646
647
648
	case SOCK_DGRAM:
		sock->ops = &unix_dgram_ops;
		break;
	case SOCK_SEQPACKET:
		sock->ops = &unix_seqpacket_ops;
		break;
	default:
		return -ESOCKTNOSUPPORT;
	}

649
	return unix_create1(net, sock) ? 0 : -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
650
651
652
653
654
655
656
657
658
659
660
}

static int unix_release(struct socket *sock)
{
	struct sock *sk = sock->sk;

	if (!sk)
		return 0;

	sock->sk = NULL;

Eric Dumazet's avatar
Eric Dumazet committed
661
	return unix_release_sock(sk, 0);
Linus Torvalds's avatar
Linus Torvalds committed
662
663
664
665
666
}

static int unix_autobind(struct socket *sock)
{
	struct sock *sk = sock->sk;
667
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
668
669
	struct unix_sock *u = unix_sk(sk);
	static u32 ordernum = 1;
Eric Dumazet's avatar
Eric Dumazet committed
670
	struct unix_address *addr;
Linus Torvalds's avatar
Linus Torvalds committed
671
672
	int err;

Ingo Molnar's avatar
Ingo Molnar committed
673
	mutex_lock(&u->readlock);
Linus Torvalds's avatar
Linus Torvalds committed
674
675
676
677
678
679

	err = 0;
	if (u->addr)
		goto out;

	err = -ENOMEM;
680
	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
681
682
683
684
685
686
687
688
	if (!addr)
		goto out;

	addr->name->sun_family = AF_UNIX;
	atomic_set(&addr->refcnt, 1);

retry:
	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
689
	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
Linus Torvalds's avatar
Linus Torvalds committed
690

691
	spin_lock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
692
693
	ordernum = (ordernum+1)&0xFFFFF;

694
	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
Linus Torvalds's avatar
Linus Torvalds committed
695
				      addr->hash)) {
696
		spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
697
698
699
700
701
702
703
704
705
706
		/* Sanity yield. It is unusual case, but yet... */
		if (!(ordernum&0xFF))
			yield();
		goto retry;
	}
	addr->hash ^= sk->sk_type;

	__unix_remove_socket(sk);
	u->addr = addr;
	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
707
	spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
708
709
	err = 0;

Ingo Molnar's avatar
Ingo Molnar committed
710
out:	mutex_unlock(&u->readlock);
Linus Torvalds's avatar
Linus Torvalds committed
711
712
713
	return err;
}

714
715
static struct sock *unix_find_other(struct net *net,
				    struct sockaddr_un *sunname, int len,
Linus Torvalds's avatar
Linus Torvalds committed
716
717
718
				    int type, unsigned hash, int *error)
{
	struct sock *u;
719
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
720
	int err = 0;
721

Linus Torvalds's avatar
Linus Torvalds committed
722
	if (sunname->sun_path[0]) {
723
724
		struct inode *inode;
		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
Linus Torvalds's avatar
Linus Torvalds committed
725
726
		if (err)
			goto fail;
727
728
		inode = path.dentry->d_inode;
		err = inode_permission(inode, MAY_WRITE);
Linus Torvalds's avatar
Linus Torvalds committed
729
730
731
732
		if (err)
			goto put_fail;

		err = -ECONNREFUSED;
733
		if (!S_ISSOCK(inode->i_mode))
Linus Torvalds's avatar
Linus Torvalds committed
734
			goto put_fail;
735
		u = unix_find_socket_byinode(net, inode);
Linus Torvalds's avatar
Linus Torvalds committed
736
737
738
739
		if (!u)
			goto put_fail;

		if (u->sk_type == type)
740
			touch_atime(path.mnt, path.dentry);
Linus Torvalds's avatar
Linus Torvalds committed
741

742
		path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
743

744
		err = -EPROTOTYPE;
Linus Torvalds's avatar
Linus Torvalds committed
745
746
747
748
749
750
		if (u->sk_type != type) {
			sock_put(u);
			goto fail;
		}
	} else {
		err = -ECONNREFUSED;
751
		u = unix_find_socket_byname(net, sunname, len, type, hash);
Linus Torvalds's avatar
Linus Torvalds committed
752
753
754
755
756
757
758
759
760
761
762
		if (u) {
			struct dentry *dentry;
			dentry = unix_sk(u)->dentry;
			if (dentry)
				touch_atime(unix_sk(u)->mnt, dentry);
		} else
			goto fail;
	}
	return u;

put_fail:
763
	path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
764
fail:
765
	*error = err;
Linus Torvalds's avatar
Linus Torvalds committed
766
767
768
769
770
771
772
	return NULL;
}


static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
	struct sock *sk = sock->sk;
773
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
774
	struct unix_sock *u = unix_sk(sk);
775
	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Eric Dumazet's avatar
Eric Dumazet committed
776
	struct dentry *dentry = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
777
778
779
780
781
782
783
784
785
786
	struct nameidata nd;
	int err;
	unsigned hash;
	struct unix_address *addr;
	struct hlist_head *list;

	err = -EINVAL;
	if (sunaddr->sun_family != AF_UNIX)
		goto out;

787
	if (addr_len == sizeof(short)) {
Linus Torvalds's avatar
Linus Torvalds committed
788
789
790
791
792
793
794
795
796
		err = unix_autobind(sock);
		goto out;
	}

	err = unix_mkname(sunaddr, addr_len, &hash);
	if (err < 0)
		goto out;
	addr_len = err;

Ingo Molnar's avatar
Ingo Molnar committed
797
	mutex_lock(&u->readlock);
Linus Torvalds's avatar
Linus Torvalds committed
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822

	err = -EINVAL;
	if (u->addr)
		goto out_up;

	err = -ENOMEM;
	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
	if (!addr)
		goto out_up;

	memcpy(addr->name, sunaddr, addr_len);
	addr->len = addr_len;
	addr->hash = hash ^ sk->sk_type;
	atomic_set(&addr->refcnt, 1);

	if (sunaddr->sun_path[0]) {
		unsigned int mode;
		err = 0;
		/*
		 * Get the parent directory, calculate the hash for last
		 * component.
		 */
		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
		if (err)
			goto out_mknod_parent;
823
824

		dentry = lookup_create(&nd, 0);
Linus Torvalds's avatar
Linus Torvalds committed
825
826
827
		err = PTR_ERR(dentry);
		if (IS_ERR(dentry))
			goto out_mknod_unlock;
828

Linus Torvalds's avatar
Linus Torvalds committed
829
830
831
832
833
		/*
		 * All right, let's create it.
		 */
		mode = S_IFSOCK |
		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
834
835
836
		err = mnt_want_write(nd.path.mnt);
		if (err)
			goto out_mknod_dput;
837
		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
838
		mnt_drop_write(nd.path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
839
840
		if (err)
			goto out_mknod_dput;
841
842
843
		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
		dput(nd.path.dentry);
		nd.path.dentry = dentry;
Linus Torvalds's avatar
Linus Torvalds committed
844
845
846
847

		addr->hash = UNIX_HASH_SIZE;
	}

848
	spin_lock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
849
850
851

	if (!sunaddr->sun_path[0]) {
		err = -EADDRINUSE;
852
		if (__unix_find_socket_byname(net, sunaddr, addr_len,
Linus Torvalds's avatar
Linus Torvalds committed
853
854
855
856
857
858
859
860
					      sk->sk_type, hash)) {
			unix_release_addr(addr);
			goto out_unlock;
		}

		list = &unix_socket_table[addr->hash];
	} else {
		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
861
862
		u->dentry = nd.path.dentry;
		u->mnt    = nd.path.mnt;
Linus Torvalds's avatar
Linus Torvalds committed
863
864
865
866
867
868
869
870
	}

	err = 0;
	__unix_remove_socket(sk);
	u->addr = addr;
	__unix_insert_socket(list, sk);

out_unlock:
871
	spin_unlock(&unix_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
872
out_up:
Ingo Molnar's avatar
Ingo Molnar committed
873
	mutex_unlock(&u->readlock);
Linus Torvalds's avatar
Linus Torvalds committed
874
875
876
877
878
879
out:
	return err;

out_mknod_dput:
	dput(dentry);
out_mknod_unlock:
880
	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
Jan Blunck's avatar
Jan Blunck committed
881
	path_put(&nd.path);
Linus Torvalds's avatar
Linus Torvalds committed
882
out_mknod_parent:
883
884
	if (err == -EEXIST)
		err = -EADDRINUSE;
Linus Torvalds's avatar
Linus Torvalds committed
885
886
887
888
	unix_release_addr(addr);
	goto out_up;
}

889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
{
	if (unlikely(sk1 == sk2) || !sk2) {
		unix_state_lock(sk1);
		return;
	}
	if (sk1 < sk2) {
		unix_state_lock(sk1);
		unix_state_lock_nested(sk2);
	} else {
		unix_state_lock(sk2);
		unix_state_lock_nested(sk1);
	}
}

static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
{
	if (unlikely(sk1 == sk2) || !sk2) {
		unix_state_unlock(sk1);
		return;
	}
	unix_state_unlock(sk1);
	unix_state_unlock(sk2);
}

Linus Torvalds's avatar
Linus Torvalds committed
914
915
916
917
static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
			      int alen, int flags)
{
	struct sock *sk = sock->sk;
918
	struct net *net = sock_net(sk);
919
	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
Linus Torvalds's avatar
Linus Torvalds committed
920
921
922
923
924
925
926
927
928
929
930
931
932
933
	struct sock *other;
	unsigned hash;
	int err;

	if (addr->sa_family != AF_UNSPEC) {
		err = unix_mkname(sunaddr, alen, &hash);
		if (err < 0)
			goto out;
		alen = err;

		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
			goto out;

934
restart:
935
		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
Linus Torvalds's avatar
Linus Torvalds committed
936
937
938
		if (!other)
			goto out;

939
940
941
942
943
944
945
946
		unix_state_double_lock(sk, other);

		/* Apparently VFS overslept socket death. Retry. */
		if (sock_flag(other, SOCK_DEAD)) {
			unix_state_double_unlock(sk, other);
			sock_put(other);
			goto restart;
		}
Linus Torvalds's avatar
Linus Torvalds committed
947
948
949
950
951
952
953
954
955
956
957
958
959
960

		err = -EPERM;
		if (!unix_may_send(sk, other))
			goto out_unlock;

		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
		if (err)
			goto out_unlock;

	} else {
		/*
		 *	1003.1g breaking connected state with AF_UNSPEC
		 */
		other = NULL;
961
		unix_state_double_lock(sk, other);
Linus Torvalds's avatar
Linus Torvalds committed
962
963
964
965
966
967
968
	}

	/*
	 * If it was connected, reconnect.
	 */
	if (unix_peer(sk)) {
		struct sock *old_peer = unix_peer(sk);
969
		unix_peer(sk) = other;
970
		unix_state_double_unlock(sk, other);
Linus Torvalds's avatar
Linus Torvalds committed
971
972
973
974
975

		if (other != old_peer)
			unix_dgram_disconnected(sk, old_peer);
		sock_put(old_peer);
	} else {
976
		unix_peer(sk) = other;
977
		unix_state_double_unlock(sk, other);
Linus Torvalds's avatar
Linus Torvalds committed
978
	}
979
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
980
981

out_unlock:
982
	unix_state_double_unlock(sk, other);
Linus Torvalds's avatar
Linus Torvalds committed
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
	sock_put(other);
out:
	return err;
}

static long unix_wait_for_peer(struct sock *other, long timeo)
{
	struct unix_sock *u = unix_sk(other);
	int sched;
	DEFINE_WAIT(wait);

	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);

	sched = !sock_flag(other, SOCK_DEAD) &&
		!(other->sk_shutdown & RCV_SHUTDOWN) &&
998
		unix_recvq_full(other);
Linus Torvalds's avatar
Linus Torvalds committed
999

1000
	unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011

	if (sched)
		timeo = schedule_timeout(timeo);

	finish_wait(&u->peer_wait, &wait);
	return timeo;
}

static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
			       int addr_len, int flags)
{
1012
	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Linus Torvalds's avatar
Linus Torvalds committed
1013
	struct sock *sk = sock->sk;
1014
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
	struct sock *newsk = NULL;
	struct sock *other = NULL;
	struct sk_buff *skb = NULL;
	unsigned hash;
	int st;
	int err;
	long timeo;

	err = unix_mkname(sunaddr, addr_len, &hash);
	if (err < 0)
		goto out;
	addr_len = err;

	if (test_bit(SOCK_PASSCRED, &sock->flags)
		&& !u->addr && (err = unix_autobind(sock)) != 0)
		goto out;

	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);

	/* First of all allocate resources.
	   If we will make it after state is locked,
	   we will have to recheck all again in any case.
	 */

	err = -ENOMEM;

	/* create new sock for complete connection */
1043
	newsk = unix_create1(sock_net(sk), NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
	if (newsk == NULL)
		goto out;

	/* Allocate skb for sending to listening sock */
	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
	if (skb == NULL)
		goto out;

restart:
	/*  Find listening sock. */
1054
	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
Linus Torvalds's avatar
Linus Torvalds committed
1055
1056
1057
1058
	if (!other)
		goto out;

	/* Latch state of peer */
1059
	unix_state_lock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1060
1061
1062

	/* Apparently VFS overslept socket death. Retry. */
	if (sock_flag(other, SOCK_DEAD)) {
1063
		unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1064
1065
1066
1067
1068
1069
1070
1071
		sock_put(other);
		goto restart;
	}

	err = -ECONNREFUSED;
	if (other->sk_state != TCP_LISTEN)
		goto out_unlock;

1072
	if (unix_recvq_full(other)) {
Linus Torvalds's avatar
Linus Torvalds committed
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
		err = -EAGAIN;
		if (!timeo)
			goto out_unlock;

		timeo = unix_wait_for_peer(other, timeo);

		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			goto out;
		sock_put(other);
		goto restart;
1084
	}
Linus Torvalds's avatar
Linus Torvalds committed
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111

	/* Latch our state.

	   It is tricky place. We need to grab write lock and cannot
	   drop lock on peer. It is dangerous because deadlock is
	   possible. Connect to self case and simultaneous
	   attempt to connect are eliminated by checking socket
	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
	   check this before attempt to grab lock.

	   Well, and we have to recheck the state after socket locked.
	 */
	st = sk->sk_state;

	switch (st) {
	case TCP_CLOSE:
		/* This is ok... continue with connect */
		break;
	case TCP_ESTABLISHED:
		/* Socket is already connected */
		err = -EISCONN;
		goto out_unlock;
	default:
		err = -EINVAL;
		goto out_unlock;
	}

1112
	unix_state_lock_nested(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1113
1114

	if (sk->sk_state != st) {
1115
1116
		unix_state_unlock(sk);
		unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1117
1118
1119
1120
1121
1122
		sock_put(other);
		goto restart;
	}

	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
	if (err) {
1123
		unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1124
1125
1126
1127
1128
1129
1130
1131
1132
		goto out_unlock;
	}

	/* The way is open! Fastly set all the necessary fields... */

	sock_hold(sk);
	unix_peer(newsk)	= sk;
	newsk->sk_state		= TCP_ESTABLISHED;
	newsk->sk_type		= sk->sk_type;
1133
	newsk->sk_peercred.pid	= task_tgid_vnr(current);
Linus Torvalds's avatar
Linus Torvalds committed
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
	newsk->sk_peercred.uid	= current->euid;
	newsk->sk_peercred.gid	= current->egid;
	newu = unix_sk(newsk);
	newsk->sk_sleep		= &newu->peer_wait;
	otheru = unix_sk(other);

	/* copy address information from listening to new sock*/
	if (otheru->addr) {
		atomic_inc(&otheru->addr->refcnt);
		newu->addr = otheru->addr;
	}
	if (otheru->dentry) {
		newu->dentry	= dget(otheru->dentry);
		newu->mnt	= mntget(otheru->mnt);
	}

	/* Set credentials */
	sk->sk_peercred = other->sk_peercred;

	sock->state	= SS_CONNECTED;
	sk->sk_state	= TCP_ESTABLISHED;
1155
1156
1157
1158
	sock_hold(newsk);

	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
	unix_peer(sk)	= newsk;
Linus Torvalds's avatar
Linus Torvalds committed
1159

1160
	unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1161
1162
1163
1164
1165

	/* take ten and and send info to listening sock */
	spin_lock(&other->sk_receive_queue.lock);
	__skb_queue_tail(&other->sk_receive_queue, skb);
	spin_unlock(&other->sk_receive_queue.lock);
1166
	unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1167
1168
1169
1170
1171
1172
	other->sk_data_ready(other, 0);
	sock_put(other);
	return 0;

out_unlock:
	if (other)
1173
		unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186

out:
	if (skb)
		kfree_skb(skb);
	if (newsk)
		unix_release_sock(newsk, 0);
	if (other)
		sock_put(other);
	return err;
}

static int unix_socketpair(struct socket *socka, struct socket *sockb)
{
1187
	struct sock *ska = socka->sk, *skb = sockb->sk;
Linus Torvalds's avatar
Linus Torvalds committed
1188
1189
1190
1191

	/* Join our sockets back to back */
	sock_hold(ska);
	sock_hold(skb);
1192
1193
	unix_peer(ska) = skb;
	unix_peer(skb) = ska;
1194
	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
Linus Torvalds's avatar
Linus Torvalds committed
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;

	if (ska->sk_type != SOCK_DGRAM) {
		ska->sk_state = TCP_ESTABLISHED;
		skb->sk_state = TCP_ESTABLISHED;
		socka->state  = SS_CONNECTED;
		sockb->state  = SS_CONNECTED;
	}
	return 0;
}

static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
	struct sock *sk = sock->sk;
	struct sock *tsk;
	struct sk_buff *skb;
	int err;

	err = -EOPNOTSUPP;
Eric Dumazet's avatar
Eric Dumazet committed
1215
	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
Linus Torvalds's avatar
Linus Torvalds committed
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
		goto out;

	err = -EINVAL;
	if (sk->sk_state != TCP_LISTEN)
		goto out;

	/* If socket state is TCP_LISTEN it cannot change (for now...),
	 * so that no locks are necessary.
	 */

	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
	if (!skb) {
		/* This means receive shutdown. */
		if (err == 0)
			err = -EINVAL;
		goto out;
	}

	tsk = skb->sk;
	skb_free_datagram(sk, skb);
	wake_up_interruptible(&unix_sk(sk)->peer_wait);

	/* attach accepted sock to socket */
1239
	unix_state_lock(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
1240
1241
	newsock->state = SS_CONNECTED;
	sock_graft(tsk, newsock);
1242
	unix_state_unlock(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
	return 0;

out:
	return err;
}


static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
{
	struct sock *sk = sock->sk;
	struct unix_sock *u;
1254
	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Linus Torvalds's avatar
Linus Torvalds committed
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
	int err = 0;

	if (peer) {
		sk = unix_peer_get(sk);

		err = -ENOTCONN;
		if (!sk)
			goto out;
		err = 0;
	} else {
		sock_hold(sk);
	}

	u = unix_sk(sk);
1269
	unix_state_lock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
	if (!u->addr) {
		sunaddr->sun_family = AF_UNIX;
		sunaddr->sun_path[0] = 0;
		*uaddr_len = sizeof(short);
	} else {
		struct unix_address *addr = u->addr;

		*uaddr_len = addr->len;
		memcpy(sunaddr, addr->name, *uaddr_len);
	}
1280
	unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
	sock_put(sk);
out:
	return err;
}

static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
	int i;

	scm->fp = UNIXCB(skb).fp;
	skb->destructor = sock_wfree;
	UNIXCB(skb).fp = NULL;

Eric Dumazet's avatar
Eric Dumazet committed
1294
	for (i = scm->fp->count-1; i >= 0; i--)
Linus Torvalds's avatar
Linus Torvalds committed
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
		unix_notinflight(scm->fp->fp[i]);
}

static void unix_destruct_fds(struct sk_buff *skb)
{
	struct scm_cookie scm;
	memset(&scm, 0, sizeof(scm));
	unix_detach_fds(&scm, skb);

	/* Alas, it calls VFS */
	/* So fscking what? fput() had been SMP-safe since the last Summer */
	scm_destroy(&scm);
	sock_wfree(skb);
}

1310
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
1311
1312
{
	int i;
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322

	/*
	 * Need to duplicate file references for the sake of garbage
	 * collection.  Otherwise a socket in the fps might become a
	 * candidate for GC while the skb is not yet queued.
	 */
	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
	if (!UNIXCB(skb).fp)
		return -ENOMEM;

Eric Dumazet's avatar
Eric Dumazet committed
1323
	for (i = scm->fp->count-1; i >= 0; i--)
Linus Torvalds's avatar
Linus Torvalds committed
1324
1325
		unix_inflight(scm->fp->fp[i]);
	skb->destructor = unix_destruct_fds;
1326
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
}

/*
 *	Send AF_UNIX data.
 */

static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
			      struct msghdr *msg, size_t len)
{
	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
	struct sock *sk = sock->sk;
1338
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1339
	struct unix_sock *u = unix_sk(sk);
1340
	struct sockaddr_un *sunaddr = msg->msg_name;
Linus Torvalds's avatar
Linus Torvalds committed
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
	struct sock *other = NULL;
	int namelen = 0; /* fake GCC */
	int err;
	unsigned hash;
	struct sk_buff *skb;
	long timeo;
	struct scm_cookie tmp_scm;

	if (NULL == siocb->scm)
		siocb->scm = &tmp_scm;
	err = scm_send(sock, msg, siocb->scm);
	if (err < 0)
		return err;

	err = -EOPNOTSUPP;
	if (msg->msg_flags&MSG_OOB)
		goto out;

	if (msg->msg_namelen) {
		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
		if (err < 0)
			goto out;
		namelen = err;
	} else {
		sunaddr = NULL;
		err = -ENOTCONN;
		other = unix_peer_get(sk);
		if (!other)
			goto out;
	}

	if (test_bit(SOCK_PASSCRED, &sock->flags)
		&& !u->addr && (err = unix_autobind(sock)) != 0)
		goto out;

	err = -EMSGSIZE;
	if (len > sk->sk_sndbuf - 32)
		goto out;

	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1381
	if (skb == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
1382
1383
1384
		goto out;

	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1385
1386
1387
1388
1389
	if (siocb->scm->fp) {
		err = unix_attach_fds(siocb->scm, skb);
		if (err)
			goto out_free;
	}
1390
	unix_get_secdata(siocb->scm, skb);
1391

1392
	skb_reset_transport_header(skb);
Eric Dumazet's avatar
Eric Dumazet committed
1393
	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
Linus Torvalds's avatar
Linus Torvalds committed
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
	if (err)
		goto out_free;

	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

restart:
	if (!other) {
		err = -ECONNRESET;
		if (sunaddr == NULL)
			goto out_free;

1405
		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
Linus Torvalds's avatar
Linus Torvalds committed
1406
					hash, &err);
1407
		if (other == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
1408
1409
1410
			goto out_free;
	}

1411
	unix_state_lock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1412
1413
1414
1415
1416
1417
1418
1419
1420
	err = -EPERM;
	if (!unix_may_send(sk, other))
		goto out_unlock;

	if (sock_flag(other, SOCK_DEAD)) {
		/*
		 *	Check with 1003.1g - what should
		 *	datagram error
		 */
1421
		unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1422
1423
1424
		sock_put(other);

		err = 0;
1425
		unix_state_lock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1426
		if (unix_peer(sk) == other) {
1427
			unix_peer(sk) = NULL;
1428
			unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1429
1430
1431
1432
1433

			unix_dgram_disconnected(sk, other);
			sock_put(other);
			err = -ECONNREFUSED;
		} else {
1434
			unix_state_unlock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
		}

		other = NULL;
		if (err)
			goto out_free;
		goto restart;
	}

	err = -EPIPE;
	if (other->sk_shutdown & RCV_SHUTDOWN)
		goto out_unlock;

	if (sk->sk_type != SOCK_SEQPACKET) {
		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
		if (err)
			goto out_unlock;
	}

1453
	if (unix_peer(other) != sk && unix_recvq_full(other)) {
Linus Torvalds's avatar
Linus Torvalds committed
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
		if (!timeo) {
			err = -EAGAIN;
			goto out_unlock;
		}

		timeo = unix_wait_for_peer(other, timeo);

		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			goto out_free;

		goto restart;
	}

	skb_queue_tail(&other->sk_receive_queue, skb);
1469
	unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1470
1471
1472
1473
1474
1475
	other->sk_data_ready(other, len);
	sock_put(other);
	scm_destroy(siocb->scm);
	return len;

out_unlock:
1476
	unix_state_unlock(other);
Linus Torvalds's avatar
Linus Torvalds committed
1477
1478
1479
1480
1481
1482
1483
1484
1485
out_free:
	kfree_skb(skb);
out:
	if (other)
		sock_put(other);
	scm_destroy(siocb->scm);
	return err;
}

1486

Linus Torvalds's avatar
Linus Torvalds committed
1487
1488
1489
1490
1491