sem.c 46.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
/*
 * linux/ipc/sem.c
 * Copyright (C) 1992 Krishna Balasubramanian
 * Copyright (C) 1995 Eric Schenk, Bruno Haible
 *
 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
 *
 * SMP-threaded, sysctl's added
9
 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
Linus Torvalds's avatar
Linus Torvalds committed
10
 * Enforced range limit on SEM_UNDO
Alan Cox's avatar
Alan Cox committed
11
 * (c) 2001 Red Hat Inc
Linus Torvalds's avatar
Linus Torvalds committed
12
13
 * Lockless wakeup
 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
14
15
 * Further wakeup optimizations, documentation
 * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
Steve Grubb's avatar
Steve Grubb committed
16
17
18
 *
 * support for audit of ipc object properties and permission changes
 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
Kirill Korotaev's avatar
Kirill Korotaev committed
19
20
21
22
 *
 * namespaces support
 * OpenVZ, SWsoft Inc.
 * Pavel Emelianov <xemul@openvz.org>
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
 *
 * Implementation notes: (May 2010)
 * This file implements System V semaphores.
 *
 * User space visible behavior:
 * - FIFO ordering for semop() operations (just FIFO, not starvation
 *   protection)
 * - multiple semaphore operations that alter the same semaphore in
 *   one semop() are handled.
 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
 *   SETALL calls.
 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
 * - undo adjustments at process exit are limited to 0..SEMVMX.
 * - namespace are supported.
 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
 *   to /proc/sys/kernel/sem.
 * - statistics about the usage are reported in /proc/sysvipc/sem.
 *
 * Internals:
 * - scalability:
 *   - all global variables are read-mostly.
 *   - semop() calls and semctl(RMID) are synchronized by RCU.
 *   - most operations do write operations (actually: spin_lock calls) to
 *     the per-semaphore array structure.
 *   Thus: Perfect SMP scaling between independent semaphore arrays.
 *         If multiple semaphores in one array are used, then cache line
 *         trashing on the semaphore array spinlock will limit the scaling.
 * - semncnt and semzcnt are calculated on demand in count_semncnt() and
 *   count_semzcnt()
 * - the task that performs a successful semop() scans the list of all
 *   sleeping tasks and completes any pending operations that can be fulfilled.
 *   Semaphores are actively given to waiting tasks (necessary for FIFO).
 *   (see update_queue())
 * - To improve the scalability, the actual wake-up calls are performed after
 *   dropping all locks. (see wake_up_sem_queue_prepare(),
 *   wake_up_sem_queue_do())
 * - All work is done by the waker, the woken up task does not have to do
 *   anything - not even acquiring a lock or dropping a refcount.
 * - A woken up task may not even touch the semaphore array anymore, it may
 *   have been destroyed already by a semctl(RMID).
 * - The synchronizations between wake-ups due to a timeout/signal and a
 *   wake-up due to a completed semaphore operation is achieved by using an
 *   intermediate state (IN_WAKEUP).
 * - UNDO values are stored in an array (one per process and per
 *   semaphore array, lazily allocated). For backwards compatibility, multiple
 *   modes for the UNDO variables are supported (per process, per thread)
 *   (see copy_semundo, CLONE_SYSVSEM)
 * - There are two lists of the pending operations: a per-array list
 *   and per-semaphore list (stored in the array). This allows to achieve FIFO
 *   ordering without always scanning all pending operations.
 *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
Linus Torvalds's avatar
Linus Torvalds committed
74
75
76
77
78
79
80
81
82
83
 */

#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/time.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
84
#include <linux/capability.h>
85
#include <linux/seq_file.h>
Nadia Derbey's avatar
Nadia Derbey committed
86
#include <linux/rwsem.h>
Kirill Korotaev's avatar
Kirill Korotaev committed
87
#include <linux/nsproxy.h>
88
#include <linux/ipc_namespace.h>
Ingo Molnar's avatar
Ingo Molnar committed
89

Linus Torvalds's avatar
Linus Torvalds committed
90
91
92
#include <asm/uaccess.h>
#include "util.h"

93
94
95
96
/* One semaphore structure for each semaphore in the system. */
struct sem {
	int	semval;		/* current value */
	int	sempid;		/* pid of last operation */
97
	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	struct list_head sem_pending; /* pending single-sop operations */
};

/* One queue for each sleeping process in the system. */
struct sem_queue {
	struct list_head	list;	 /* queue of pending operations */
	struct task_struct	*sleeper; /* this process */
	struct sem_undo		*undo;	 /* undo structure */
	int			pid;	 /* process id of requesting process */
	int			status;	 /* completion status of operation */
	struct sembuf		*sops;	 /* array of pending operations */
	int			nsops;	 /* number of operations */
	int			alter;	 /* does *sops alter the array? */
};

/* Each task has a list of undo requests. They are executed automatically
 * when the process exits.
 */
struct sem_undo {
	struct list_head	list_proc;	/* per-process list: *
						 * all undos from one process
						 * rcu protected */
	struct rcu_head		rcu;		/* rcu struct for sem_undo */
	struct sem_undo_list	*ulp;		/* back ptr to sem_undo_list */
	struct list_head	list_id;	/* per semaphore array list:
						 * all undos for one array */
	int			semid;		/* semaphore set identifier */
	short			*semadj;	/* array of adjustments */
						/* one per semaphore */
};

/* sem_undo_list controls shared access to the list of sem_undo structures
 * that may be shared among all a CLONE_SYSVSEM task group.
 */
struct sem_undo_list {
	atomic_t		refcnt;
	spinlock_t		lock;
	struct list_head	list_proc;
};


139
#define sem_ids(ns)	((ns)->ids[IPC_SEM_IDS])
Kirill Korotaev's avatar
Kirill Korotaev committed
140

Nadia Derbey's avatar
Nadia Derbey committed
141
#define sem_checkid(sma, semid)	ipc_checkid(&sma->sem_perm, semid)
Linus Torvalds's avatar
Linus Torvalds committed
142

Nadia Derbey's avatar
Nadia Derbey committed
143
static int newary(struct ipc_namespace *, struct ipc_params *);
144
static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
Linus Torvalds's avatar
Linus Torvalds committed
145
#ifdef CONFIG_PROC_FS
146
static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
Linus Torvalds's avatar
Linus Torvalds committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#endif

#define SEMMSL_FAST	256 /* 512 bytes on stack */
#define SEMOPM_FAST	64  /* ~ 372 bytes on stack */

/*
 * linked list protection:
 *	sem_undo.id_next,
 *	sem_array.sem_pending{,last},
 *	sem_array.sem_undo: sem_lock() for read/write
 *	sem_undo.proc_next: only "current" is allowed to read/write that field.
 *	
 */

Kirill Korotaev's avatar
Kirill Korotaev committed
161
162
163
164
165
#define sc_semmsl	sem_ctls[0]
#define sc_semmns	sem_ctls[1]
#define sc_semopm	sem_ctls[2]
#define sc_semmni	sem_ctls[3]

166
void sem_init_ns(struct ipc_namespace *ns)
Kirill Korotaev's avatar
Kirill Korotaev committed
167
168
169
170
171
172
{
	ns->sc_semmsl = SEMMSL;
	ns->sc_semmns = SEMMNS;
	ns->sc_semopm = SEMOPM;
	ns->sc_semmni = SEMMNI;
	ns->used_sems = 0;
173
	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
Kirill Korotaev's avatar
Kirill Korotaev committed
174
175
}

176
#ifdef CONFIG_IPC_NS
Kirill Korotaev's avatar
Kirill Korotaev committed
177
178
void sem_exit_ns(struct ipc_namespace *ns)
{
179
	free_ipcs(ns, &sem_ids(ns), freeary);
180
	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
Kirill Korotaev's avatar
Kirill Korotaev committed
181
}
182
#endif
Linus Torvalds's avatar
Linus Torvalds committed
183
184
185

void __init sem_init (void)
{
186
	sem_init_ns(&init_ipc_ns);
187
188
	ipc_init_proc_interface("sysvipc/sem",
				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
Kirill Korotaev's avatar
Kirill Korotaev committed
189
				IPC_SEM_IDS, sysvipc_sem_proc_show);
Linus Torvalds's avatar
Linus Torvalds committed
190
191
}

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
/*
 * If the request contains only one semaphore operation, and there are
 * no complex transactions pending, lock only the semaphore involved.
 * Otherwise, lock the entire semaphore array, since we either have
 * multiple semaphores in our own semops, or we need to look at
 * semaphores from other pending complex operations.
 *
 * Carefully guard against sma->complex_count changing between zero
 * and non-zero while we are spinning for the lock. The value of
 * sma->complex_count cannot change while we are holding the lock,
 * so sem_unlock should be fine.
 *
 * The global lock path checks that all the local locks have been released,
 * checking each local lock once. This means that the local lock paths
 * cannot start their critical sections while the global lock is held.
 */
static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
			      int nsops)
{
	int locknum;
 again:
	if (nsops == 1 && !sma->complex_count) {
		struct sem *sem = sma->sem_base + sops->sem_num;

		/* Lock just the semaphore we are interested in. */
		spin_lock(&sem->lock);

		/*
		 * If sma->complex_count was set while we were spinning,
		 * we may need to look at things we did not lock here.
		 */
		if (unlikely(sma->complex_count)) {
			spin_unlock(&sem->lock);
			goto lock_array;
		}

		/*
		 * Another process is holding the global lock on the
		 * sem_array; we cannot enter our critical section,
		 * but have to wait for the global lock to be released.
		 */
		if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
			spin_unlock(&sem->lock);
			spin_unlock_wait(&sma->sem_perm.lock);
			goto again;
		}

		locknum = sops->sem_num;
	} else {
		int i;
		/*
		 * Lock the semaphore array, and wait for all of the
		 * individual semaphore locks to go away.  The code
		 * above ensures no new single-lock holders will enter
		 * their critical section while the array lock is held.
		 */
 lock_array:
		spin_lock(&sma->sem_perm.lock);
		for (i = 0; i < sma->sem_nsems; i++) {
			struct sem *sem = sma->sem_base + i;
			spin_unlock_wait(&sem->lock);
		}
		locknum = -1;
	}
	return locknum;
}

static inline void sem_unlock(struct sem_array *sma, int locknum)
{
	if (locknum == -1) {
		spin_unlock(&sma->sem_perm.lock);
	} else {
		struct sem *sem = sma->sem_base + locknum;
		spin_unlock(&sem->lock);
	}
	rcu_read_unlock();
}

Nadia Derbey's avatar
Nadia Derbey committed
270
271
272
273
/*
 * sem_lock_(check_) routines are called in the paths where the rw_mutex
 * is not held.
 */
274
275
static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
			int id, struct sembuf *sops, int nsops, int *locknum)
276
{
277
278
	struct kern_ipc_perm *ipcp;
	struct sem_array *sma;
Nadia Derbey's avatar
Nadia Derbey committed
279

280
281
282
283
284
285
	rcu_read_lock();
	ipcp = ipc_obtain_object(&sem_ids(ns), id);
	if (IS_ERR(ipcp)) {
		sma = ERR_CAST(ipcp);
		goto err;
	}
286

287
288
	sma = container_of(ipcp, struct sem_array, sem_perm);
	*locknum = sem_lock(sma, sops, nsops);
289
290
291
292
293
294
295

	/* ipc_rmid() may have already freed the ID while sem_lock
	 * was spinning: verify that the structure is still valid
	 */
	if (!ipcp->deleted)
		return container_of(ipcp, struct sem_array, sem_perm);

296
	sem_unlock(sma, *locknum);
297
298
299
300
	sma = ERR_PTR(-EINVAL);
err:
	rcu_read_unlock();
	return sma;
301
302
}

303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
{
	struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);

	if (IS_ERR(ipcp))
		return ERR_CAST(ipcp);

	return container_of(ipcp, struct sem_array, sem_perm);
}

static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
							int id)
{
	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);

	if (IS_ERR(ipcp))
		return ERR_CAST(ipcp);
320

Nadia Derbey's avatar
Nadia Derbey committed
321
	return container_of(ipcp, struct sem_array, sem_perm);
322
323
}

324
325
static inline void sem_lock_and_putref(struct sem_array *sma)
{
326
327
	rcu_read_lock();
	sem_lock(sma, NULL, -1);
328
329
330
331
332
	ipc_rcu_putref(sma);
}

static inline void sem_putref(struct sem_array *sma)
{
333
334
	sem_lock_and_putref(sma);
	sem_unlock(sma, -1);
335
336
}

Nadia Derbey's avatar
Nadia Derbey committed
337
338
339
340
341
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
{
	ipc_rmid(&sem_ids(ns), &s->sem_perm);
}

Linus Torvalds's avatar
Linus Torvalds committed
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/*
 * Lockless wakeup algorithm:
 * Without the check/retry algorithm a lockless wakeup is possible:
 * - queue.status is initialized to -EINTR before blocking.
 * - wakeup is performed by
 *	* unlinking the queue entry from sma->sem_pending
 *	* setting queue.status to IN_WAKEUP
 *	  This is the notification for the blocked thread that a
 *	  result value is imminent.
 *	* call wake_up_process
 *	* set queue.status to the final value.
 * - the previously blocked thread checks queue.status:
 *   	* if it's IN_WAKEUP, then it must wait until the value changes
 *   	* if it's not -EINTR, then the operation was completed by
 *   	  update_queue. semtimedop can return queue.status without
Ingo Molnar's avatar
Ingo Molnar committed
357
 *   	  performing any operation on the sem array.
Linus Torvalds's avatar
Linus Torvalds committed
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
 *   	* otherwise it must acquire the spinlock and check what's up.
 *
 * The two-stage algorithm is necessary to protect against the following
 * races:
 * - if queue.status is set after wake_up_process, then the woken up idle
 *   thread could race forward and try (and fail) to acquire sma->lock
 *   before update_queue had a chance to set queue.status
 * - if queue.status is written before wake_up_process and if the
 *   blocked process is woken up by a signal between writing
 *   queue.status and the wake_up_process, then the woken up
 *   process could return from semtimedop and die by calling
 *   sys_exit before wake_up_process is called. Then wake_up_process
 *   will oops, because the task structure is already invalid.
 *   (yes, this happened on s390 with sysv msg).
 *
 */
#define IN_WAKEUP	1

Nadia Derbey's avatar
Nadia Derbey committed
376
377
378
379
380
/**
 * newary - Create a new semaphore set
 * @ns: namespace
 * @params: ptr to the structure that contains key, semflg and nsems
 *
Nadia Derbey's avatar
Nadia Derbey committed
381
 * Called with sem_ids.rw_mutex held (as a writer)
Nadia Derbey's avatar
Nadia Derbey committed
382
383
 */

Nadia Derbey's avatar
Nadia Derbey committed
384
static int newary(struct ipc_namespace *ns, struct ipc_params *params)
Linus Torvalds's avatar
Linus Torvalds committed
385
386
387
388
389
{
	int id;
	int retval;
	struct sem_array *sma;
	int size;
Nadia Derbey's avatar
Nadia Derbey committed
390
391
392
	key_t key = params->key;
	int nsems = params->u.nsems;
	int semflg = params->flg;
393
	int i;
Linus Torvalds's avatar
Linus Torvalds committed
394
395
396

	if (!nsems)
		return -EINVAL;
Kirill Korotaev's avatar
Kirill Korotaev committed
397
	if (ns->used_sems + nsems > ns->sc_semmns)
Linus Torvalds's avatar
Linus Torvalds committed
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
		return -ENOSPC;

	size = sizeof (*sma) + nsems * sizeof (struct sem);
	sma = ipc_rcu_alloc(size);
	if (!sma) {
		return -ENOMEM;
	}
	memset (sma, 0, size);

	sma->sem_perm.mode = (semflg & S_IRWXUGO);
	sma->sem_perm.key = key;

	sma->sem_perm.security = NULL;
	retval = security_sem_alloc(sma);
	if (retval) {
		ipc_rcu_putref(sma);
		return retval;
	}

Kirill Korotaev's avatar
Kirill Korotaev committed
417
	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
418
	if (id < 0) {
Linus Torvalds's avatar
Linus Torvalds committed
419
420
		security_sem_free(sma);
		ipc_rcu_putref(sma);
421
		return id;
Linus Torvalds's avatar
Linus Torvalds committed
422
	}
Kirill Korotaev's avatar
Kirill Korotaev committed
423
	ns->used_sems += nsems;
Linus Torvalds's avatar
Linus Torvalds committed
424
425

	sma->sem_base = (struct sem *) &sma[1];
426

427
	for (i = 0; i < nsems; i++) {
428
		INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
429
430
		spin_lock_init(&sma->sem_base[i].lock);
	}
431
432

	sma->complex_count = 0;
433
	INIT_LIST_HEAD(&sma->sem_pending);
434
	INIT_LIST_HEAD(&sma->list_id);
Linus Torvalds's avatar
Linus Torvalds committed
435
436
	sma->sem_nsems = nsems;
	sma->sem_ctime = get_seconds();
437
	sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
438

Nadia Derbey's avatar
Nadia Derbey committed
439
	return sma->sem_perm.id;
Linus Torvalds's avatar
Linus Torvalds committed
440
441
}

Nadia Derbey's avatar
Nadia Derbey committed
442

Nadia Derbey's avatar
Nadia Derbey committed
443
/*
Nadia Derbey's avatar
Nadia Derbey committed
444
 * Called with sem_ids.rw_mutex and ipcp locked.
Nadia Derbey's avatar
Nadia Derbey committed
445
 */
Nadia Derbey's avatar
Nadia Derbey committed
446
static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
Nadia Derbey's avatar
Nadia Derbey committed
447
{
Nadia Derbey's avatar
Nadia Derbey committed
448
449
450
451
	struct sem_array *sma;

	sma = container_of(ipcp, struct sem_array, sem_perm);
	return security_sem_associate(sma, semflg);
Nadia Derbey's avatar
Nadia Derbey committed
452
453
}

Nadia Derbey's avatar
Nadia Derbey committed
454
/*
Nadia Derbey's avatar
Nadia Derbey committed
455
 * Called with sem_ids.rw_mutex and ipcp locked.
Nadia Derbey's avatar
Nadia Derbey committed
456
 */
Nadia Derbey's avatar
Nadia Derbey committed
457
458
static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
				struct ipc_params *params)
Nadia Derbey's avatar
Nadia Derbey committed
459
{
Nadia Derbey's avatar
Nadia Derbey committed
460
461
462
463
	struct sem_array *sma;

	sma = container_of(ipcp, struct sem_array, sem_perm);
	if (params->u.nsems > sma->sem_nsems)
Nadia Derbey's avatar
Nadia Derbey committed
464
465
466
467
468
		return -EINVAL;

	return 0;
}

469
SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
Linus Torvalds's avatar
Linus Torvalds committed
470
{
Kirill Korotaev's avatar
Kirill Korotaev committed
471
	struct ipc_namespace *ns;
Nadia Derbey's avatar
Nadia Derbey committed
472
473
	struct ipc_ops sem_ops;
	struct ipc_params sem_params;
Kirill Korotaev's avatar
Kirill Korotaev committed
474
475

	ns = current->nsproxy->ipc_ns;
Linus Torvalds's avatar
Linus Torvalds committed
476

Kirill Korotaev's avatar
Kirill Korotaev committed
477
	if (nsems < 0 || nsems > ns->sc_semmsl)
Linus Torvalds's avatar
Linus Torvalds committed
478
		return -EINVAL;
Nadia Derbey's avatar
Nadia Derbey committed
479

Nadia Derbey's avatar
Nadia Derbey committed
480
481
482
483
484
485
486
	sem_ops.getnew = newary;
	sem_ops.associate = sem_security;
	sem_ops.more_checks = sem_more_checks;

	sem_params.key = key;
	sem_params.flg = semflg;
	sem_params.u.nsems = nsems;
Linus Torvalds's avatar
Linus Torvalds committed
487

Nadia Derbey's avatar
Nadia Derbey committed
488
	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
Linus Torvalds's avatar
Linus Torvalds committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
}

/*
 * Determine whether a sequence of semaphore operations would succeed
 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
 */

static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
			     int nsops, struct sem_undo *un, int pid)
{
	int result, sem_op;
	struct sembuf *sop;
	struct sem * curr;

	for (sop = sops; sop < sops + nsops; sop++) {
		curr = sma->sem_base + sop->sem_num;
		sem_op = sop->sem_op;
		result = curr->semval;
  
		if (!sem_op && result)
			goto would_block;

		result += sem_op;
		if (result < 0)
			goto would_block;
		if (result > SEMVMX)
			goto out_of_range;
		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;
			/*
	 		 *	Exceeding the undo range is an error.
			 */
			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
				goto out_of_range;
		}
		curr->semval = result;
	}

	sop--;
	while (sop >= sops) {
		sma->sem_base[sop->sem_num].sempid = pid;
		if (sop->sem_flg & SEM_UNDO)
			un->semadj[sop->sem_num] -= sop->sem_op;
		sop--;
	}
	
	return 0;

out_of_range:
	result = -ERANGE;
	goto undo;

would_block:
	if (sop->sem_flg & IPC_NOWAIT)
		result = -EAGAIN;
	else
		result = 1;

undo:
	sop--;
	while (sop >= sops) {
		sma->sem_base[sop->sem_num].semval -= sop->sem_op;
		sop--;
	}

	return result;
}

557
558
559
560
561
/** wake_up_sem_queue_prepare(q, error): Prepare wake-up
 * @q: queue entry that must be signaled
 * @error: Error value for the signal
 *
 * Prepare the wake-up of the queue entry q.
Nick Piggin's avatar
Nick Piggin committed
562
 */
563
564
static void wake_up_sem_queue_prepare(struct list_head *pt,
				struct sem_queue *q, int error)
Nick Piggin's avatar
Nick Piggin committed
565
{
566
567
568
569
570
571
572
	if (list_empty(pt)) {
		/*
		 * Hold preempt off so that we don't get preempted and have the
		 * wakee busy-wait until we're scheduled back on.
		 */
		preempt_disable();
	}
Nick Piggin's avatar
Nick Piggin committed
573
	q->status = IN_WAKEUP;
574
575
	q->pid = error;

576
	list_add_tail(&q->list, pt);
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
}

/**
 * wake_up_sem_queue_do(pt) - do the actual wake-up
 * @pt: list of tasks to be woken up
 *
 * Do the actual wake-up.
 * The function is called without any locks held, thus the semaphore array
 * could be destroyed already and the tasks can disappear as soon as the
 * status is set to the actual return code.
 */
static void wake_up_sem_queue_do(struct list_head *pt)
{
	struct sem_queue *q, *t;
	int did_something;

	did_something = !list_empty(pt);
594
	list_for_each_entry_safe(q, t, pt, list) {
595
596
597
598
599
600
601
		wake_up_process(q->sleeper);
		/* q can disappear immediately after writing q->status. */
		smp_wmb();
		q->status = q->pid;
	}
	if (did_something)
		preempt_enable();
Nick Piggin's avatar
Nick Piggin committed
602
603
}

604
605
606
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
{
	list_del(&q->list);
607
	if (q->nsops > 1)
608
609
610
		sma->complex_count--;
}

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
/** check_restart(sma, q)
 * @sma: semaphore array
 * @q: the operation that just completed
 *
 * update_queue is O(N^2) when it restarts scanning the whole queue of
 * waiting operations. Therefore this function checks if the restart is
 * really necessary. It is called after a previously waiting operation
 * was completed.
 */
static int check_restart(struct sem_array *sma, struct sem_queue *q)
{
	struct sem *curr;
	struct sem_queue *h;

	/* if the operation didn't modify the array, then no restart */
	if (q->alter == 0)
		return 0;

	/* pending complex operations are too difficult to analyse */
	if (sma->complex_count)
		return 1;

	/* we were a sleeping complex operation. Too difficult */
	if (q->nsops > 1)
		return 1;

	curr = sma->sem_base + q->sops[0].sem_num;

	/* No-one waits on this queue */
	if (list_empty(&curr->sem_pending))
		return 0;

	/* the new semaphore value */
	if (curr->semval) {
		/* It is impossible that someone waits for the new value:
		 * - q is a previously sleeping simple operation that
		 *   altered the array. It must be a decrement, because
		 *   simple increments never sleep.
		 * - The value is not 0, thus wait-for-zero won't proceed.
		 * - If there are older (higher priority) decrements
		 *   in the queue, then they have observed the original
		 *   semval value and couldn't proceed. The operation
		 *   decremented to value - thus they won't proceed either.
		 */
		BUG_ON(q->sops[0].sem_op >= 0);
		return 0;
	}
	/*
	 * semval is 0. Check if there are wait-for-zero semops.
660
	 * They must be the first entries in the per-semaphore queue
661
	 */
662
	h = list_first_entry(&curr->sem_pending, struct sem_queue, list);
663
664
665
666
667
668
669
670
671
672
673
	BUG_ON(h->nsops != 1);
	BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);

	/* Yes, there is a wait-for-zero semop. Restart */
	if (h->sops[0].sem_op == 0)
		return 1;

	/* Again - no-one is waiting for the new value. */
	return 0;
}

674
675
676
677
678

/**
 * update_queue(sma, semnum): Look for tasks that can be completed.
 * @sma: semaphore array.
 * @semnum: semaphore that was modified.
679
 * @pt: list head for the tasks that must be woken up.
680
681
 *
 * update_queue must be called after a semaphore in a semaphore array
682
683
684
 * was modified. If multiple semaphores were modified, update_queue must
 * be called with semnum = -1, as well as with the number of each modified
 * semaphore.
685
686
687
 * The tasks that must be woken up are added to @pt. The return code
 * is stored in q->pid.
 * The function return 1 if at least one semop was completed successfully.
Linus Torvalds's avatar
Linus Torvalds committed
688
 */
689
static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
Linus Torvalds's avatar
Linus Torvalds committed
690
{
691
692
693
	struct sem_queue *q;
	struct list_head *walk;
	struct list_head *pending_list;
694
	int semop_completed = 0;
695

696
	if (semnum == -1)
697
		pending_list = &sma->sem_pending;
698
	else
699
		pending_list = &sma->sem_base[semnum].sem_pending;
700
701

again:
702
703
	walk = pending_list->next;
	while (walk != pending_list) {
704
		int error, restart;
705

706
		q = container_of(walk, struct sem_queue, list);
707
		walk = walk->next;
Linus Torvalds's avatar
Linus Torvalds committed
708

709
710
711
712
713
714
715
716
717
718
719
		/* If we are scanning the single sop, per-semaphore list of
		 * one semaphore and that semaphore is 0, then it is not
		 * necessary to scan the "alter" entries: simple increments
		 * that affect only one entry succeed immediately and cannot
		 * be in the  per semaphore pending queue, and decrements
		 * cannot be successful if the value is already 0.
		 */
		if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
				q->alter)
			break;

Linus Torvalds's avatar
Linus Torvalds committed
720
721
722
723
		error = try_atomic_semop(sma, q->sops, q->nsops,
					 q->undo, q->pid);

		/* Does q->sleeper still need to sleep? */
724
725
726
		if (error > 0)
			continue;

727
		unlink_queue(sma, q);
728

729
		if (error) {
730
			restart = 0;
731
732
		} else {
			semop_completed = 1;
733
			restart = check_restart(sma, q);
734
		}
735

736
		wake_up_sem_queue_prepare(pt, q, error);
737
		if (restart)
738
			goto again;
Linus Torvalds's avatar
Linus Torvalds committed
739
	}
740
	return semop_completed;
Linus Torvalds's avatar
Linus Torvalds committed
741
742
}

743
744
/**
 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
745
746
747
 * @sma: semaphore array
 * @sops: operations that were performed
 * @nsops: number of operations
748
749
 * @otime: force setting otime
 * @pt: list head of the tasks that must be woken up.
750
751
752
 *
 * do_smart_update() does the required called to update_queue, based on the
 * actual changes that were performed on the semaphore array.
753
754
755
 * Note that the function does not do the actual wake-up: the caller is
 * responsible for calling wake_up_sem_queue_do(@pt).
 * It is safe to perform this call after dropping all locks.
756
 */
757
758
static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
			int otime, struct list_head *pt)
759
760
761
762
{
	int i;

	if (sma->complex_count || sops == NULL) {
763
764
		if (update_queue(sma, -1, pt))
			otime = 1;
765
766
767
768
769
770
771
772
	}

	if (!sops) {
		/* No semops; something special is going on. */
		for (i = 0; i < sma->sem_nsems; i++) {
			if (update_queue(sma, i, pt))
				otime = 1;
		}
773
		goto done;
774
775
	}

776
	/* Check the semaphores that were modified. */
777
778
779
780
	for (i = 0; i < nsops; i++) {
		if (sops[i].sem_op > 0 ||
			(sops[i].sem_op < 0 &&
				sma->sem_base[sops[i].sem_num].semval == 0))
781
782
			if (update_queue(sma, sops[i].sem_num, pt))
				otime = 1;
783
	}
784
785
786
done:
	if (otime)
		sma->sem_otime = get_seconds();
787
788
789
}


Linus Torvalds's avatar
Linus Torvalds committed
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
/* The following counts are associated to each semaphore:
 *   semncnt        number of tasks waiting on semval being nonzero
 *   semzcnt        number of tasks waiting on semval being zero
 * This model assumes that a task waits on exactly one semaphore.
 * Since semaphore operations are to be performed atomically, tasks actually
 * wait on a whole sequence of semaphores simultaneously.
 * The counts we return here are a rough approximation, but still
 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
 */
static int count_semncnt (struct sem_array * sma, ushort semnum)
{
	int semncnt;
	struct sem_queue * q;

	semncnt = 0;
805
	list_for_each_entry(q, &sma->sem_pending, list) {
Linus Torvalds's avatar
Linus Torvalds committed
806
807
808
809
810
811
812
813
814
815
816
		struct sembuf * sops = q->sops;
		int nsops = q->nsops;
		int i;
		for (i = 0; i < nsops; i++)
			if (sops[i].sem_num == semnum
			    && (sops[i].sem_op < 0)
			    && !(sops[i].sem_flg & IPC_NOWAIT))
				semncnt++;
	}
	return semncnt;
}
817

Linus Torvalds's avatar
Linus Torvalds committed
818
819
820
821
822
823
static int count_semzcnt (struct sem_array * sma, ushort semnum)
{
	int semzcnt;
	struct sem_queue * q;

	semzcnt = 0;
824
	list_for_each_entry(q, &sma->sem_pending, list) {
Linus Torvalds's avatar
Linus Torvalds committed
825
826
827
828
829
830
831
832
833
834
835
836
		struct sembuf * sops = q->sops;
		int nsops = q->nsops;
		int i;
		for (i = 0; i < nsops; i++)
			if (sops[i].sem_num == semnum
			    && (sops[i].sem_op == 0)
			    && !(sops[i].sem_flg & IPC_NOWAIT))
				semzcnt++;
	}
	return semzcnt;
}

Nadia Derbey's avatar
Nadia Derbey committed
837
838
839
/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
 * remains locked on exit.
Linus Torvalds's avatar
Linus Torvalds committed
840
 */
841
static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
Linus Torvalds's avatar
Linus Torvalds committed
842
{
843
844
	struct sem_undo *un, *tu;
	struct sem_queue *q, *tq;
845
	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
846
	struct list_head tasks;
847
	int i;
Linus Torvalds's avatar
Linus Torvalds committed
848

849
	/* Free the existing undo structures for this semaphore set.  */
850
	assert_spin_locked(&sma->sem_perm.lock);
851
852
853
	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
		list_del(&un->list_id);
		spin_lock(&un->ulp->lock);
Linus Torvalds's avatar
Linus Torvalds committed
854
		un->semid = -1;
855
856
		list_del_rcu(&un->list_proc);
		spin_unlock(&un->ulp->lock);
857
		kfree_rcu(un, rcu);
858
	}
Linus Torvalds's avatar
Linus Torvalds committed
859
860

	/* Wake up all pending processes and let them fail with EIDRM. */
861
	INIT_LIST_HEAD(&tasks);
862
	list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
863
		unlink_queue(sma, q);
864
		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
Linus Torvalds's avatar
Linus Torvalds committed
865
	}
866
867
868
869
870
871
872
	for (i = 0; i < sma->sem_nsems; i++) {
		struct sem *sem = sma->sem_base + i;
		list_for_each_entry_safe(q, tq, &sem->sem_pending, list) {
			unlink_queue(sma, q);
			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
873

Nadia Derbey's avatar
Nadia Derbey committed
874
875
	/* Remove the semaphore set from the IDR */
	sem_rmid(ns, sma);
876
	sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
877

878
	wake_up_sem_queue_do(&tasks);
Kirill Korotaev's avatar
Kirill Korotaev committed
879
	ns->used_sems -= sma->sem_nsems;
Linus Torvalds's avatar
Linus Torvalds committed
880
881
882
883
884
885
886
887
888
889
890
891
892
	security_sem_free(sma);
	ipc_rcu_putref(sma);
}

static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
{
	switch(version) {
	case IPC_64:
		return copy_to_user(buf, in, sizeof(*in));
	case IPC_OLD:
	    {
		struct semid_ds out;

893
894
		memset(&out, 0, sizeof(out));

Linus Torvalds's avatar
Linus Torvalds committed
895
896
897
898
899
900
901
902
903
904
905
906
907
		ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);

		out.sem_otime	= in->sem_otime;
		out.sem_ctime	= in->sem_ctime;
		out.sem_nsems	= in->sem_nsems;

		return copy_to_user(buf, &out, sizeof(out));
	    }
	default:
		return -EINVAL;
	}
}

908
static int semctl_nolock(struct ipc_namespace *ns, int semid,
909
			 int cmd, int version, void __user *p)
Linus Torvalds's avatar
Linus Torvalds committed
910
{
911
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
912
913
914
915
916
917
918
919
920
921
922
923
924
925
	struct sem_array *sma;

	switch(cmd) {
	case IPC_INFO:
	case SEM_INFO:
	{
		struct seminfo seminfo;
		int max_id;

		err = security_sem_semctl(NULL, cmd);
		if (err)
			return err;
		
		memset(&seminfo,0,sizeof(seminfo));
Kirill Korotaev's avatar
Kirill Korotaev committed
926
927
928
929
		seminfo.semmni = ns->sc_semmni;
		seminfo.semmns = ns->sc_semmns;
		seminfo.semmsl = ns->sc_semmsl;
		seminfo.semopm = ns->sc_semopm;
Linus Torvalds's avatar
Linus Torvalds committed
930
931
932
933
		seminfo.semvmx = SEMVMX;
		seminfo.semmnu = SEMMNU;
		seminfo.semmap = SEMMAP;
		seminfo.semume = SEMUME;
Nadia Derbey's avatar
Nadia Derbey committed
934
		down_read(&sem_ids(ns).rw_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
935
		if (cmd == SEM_INFO) {
Kirill Korotaev's avatar
Kirill Korotaev committed
936
937
			seminfo.semusz = sem_ids(ns).in_use;
			seminfo.semaem = ns->used_sems;
Linus Torvalds's avatar
Linus Torvalds committed
938
939
940
941
		} else {
			seminfo.semusz = SEMUSZ;
			seminfo.semaem = SEMAEM;
		}
Nadia Derbey's avatar
Nadia Derbey committed
942
		max_id = ipc_get_maxid(&sem_ids(ns));
Nadia Derbey's avatar
Nadia Derbey committed
943
		up_read(&sem_ids(ns).rw_mutex);
944
		if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 
Linus Torvalds's avatar
Linus Torvalds committed
945
946
947
			return -EFAULT;
		return (max_id < 0) ? 0: max_id;
	}
948
	case IPC_STAT:
Linus Torvalds's avatar
Linus Torvalds committed
949
950
951
	case SEM_STAT:
	{
		struct semid64_ds tbuf;
952
953
954
		int id = 0;

		memset(&tbuf, 0, sizeof(tbuf));
Linus Torvalds's avatar
Linus Torvalds committed
955

956
		if (cmd == SEM_STAT) {
957
958
959
960
961
962
			rcu_read_lock();
			sma = sem_obtain_object(ns, semid);
			if (IS_ERR(sma)) {
				err = PTR_ERR(sma);
				goto out_unlock;
			}
963
964
			id = sma->sem_perm.id;
		} else {
965
966
967
968
969
970
			rcu_read_lock();
			sma = sem_obtain_object_check(ns, semid);
			if (IS_ERR(sma)) {
				err = PTR_ERR(sma);
				goto out_unlock;
			}
971
		}
Linus Torvalds's avatar
Linus Torvalds committed
972
973

		err = -EACCES;
974
		if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
Linus Torvalds's avatar
Linus Torvalds committed
975
976
977
978
979
980
981
982
983
984
			goto out_unlock;

		err = security_sem_semctl(sma, cmd);
		if (err)
			goto out_unlock;

		kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
		tbuf.sem_otime  = sma->sem_otime;
		tbuf.sem_ctime  = sma->sem_ctime;
		tbuf.sem_nsems  = sma->sem_nsems;
985
		rcu_read_unlock();
986
		if (copy_semid_to_user(p, &tbuf, version))
Linus Torvalds's avatar
Linus Torvalds committed
987
988
989
990
991
992
993
			return -EFAULT;
		return id;
	}
	default:
		return -EINVAL;
	}
out_unlock:
994
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
995
996
997
	return err;
}

998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
		unsigned long arg)
{
	struct sem_undo *un;
	struct sem_array *sma;
	struct sem* curr;
	int err;
	struct list_head tasks;
	int val;
#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
	/* big-endian 64bit */
	val = arg >> 32;
#else
	/* 32bit or little-endian 64bit */
	val = arg;
#endif

1015
1016
	if (val > SEMVMX || val < 0)
		return -ERANGE;
1017
1018
1019

	INIT_LIST_HEAD(&tasks);

1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
	rcu_read_lock();
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
		return PTR_ERR(sma);
	}

	if (semnum < 0 || semnum >= sma->sem_nsems) {
		rcu_read_unlock();
		return -EINVAL;
	}


	if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
		rcu_read_unlock();
		return -EACCES;
	}
1037
1038

	err = security_sem_semctl(sma, SETVAL);
1039
1040
1041
1042
	if (err) {
		rcu_read_unlock();
		return -EACCES;
	}
1043

1044
	sem_lock(sma, NULL, -1);
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056

	curr = &sma->sem_base[semnum];

	assert_spin_locked(&sma->sem_perm.lock);
	list_for_each_entry(un, &sma->list_id, list_id)
		un->semadj[semnum] = 0;

	curr->semval = val;
	curr->sempid = task_tgid_vnr(current);
	sma->sem_ctime = get_seconds();
	/* maybe some queued-up processes were waiting for this */
	do_smart_update(sma, NULL, 0, 0, &tasks);
1057
	sem_unlock(sma, -1);
1058
	wake_up_sem_queue_do(&tasks);
1059
	return 0;
1060
1061
}

Kirill Korotaev's avatar
Kirill Korotaev committed
1062
static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1063
		int cmd, void __user *p)
Linus Torvalds's avatar
Linus Torvalds committed
1064
1065
1066
{
	struct sem_array *sma;
	struct sem* curr;
1067
	int err, nsems;
Linus Torvalds's avatar
Linus Torvalds committed
1068
1069
	ushort fast_sem_io[SEMMSL_FAST];
	ushort* sem_io = fast_sem_io;
1070
	struct list_head tasks;
Linus Torvalds's avatar
Linus Torvalds committed
1071

1072
1073
1074
1075
1076
1077
	INIT_LIST_HEAD(&tasks);

	rcu_read_lock();
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
1078
		return PTR_ERR(sma);
1079
	}
Linus Torvalds's avatar
Linus Torvalds committed
1080
1081
1082
1083

	nsems = sma->sem_nsems;

	err = -EACCES;
1084
	if (ipcperms(ns, &sma->sem_perm,
1085
1086
1087
1088
			cmd == SETALL ? S_IWUGO : S_IRUGO)) {
		rcu_read_unlock();
		goto out_wakeup;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1089
1090

	err = security_sem_semctl(sma, cmd);
1091
1092
1093
1094
	if (err) {
		rcu_read_unlock();
		goto out_wakeup;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1095
1096
1097
1098
1099

	err = -EACCES;
	switch (cmd) {
	case GETALL:
	{
1100
		ushort __user *array = p;
Linus Torvalds's avatar
Linus Torvalds committed
1101
1102
		int i;

1103
		sem_lock(sma, NULL, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1104
		if(nsems > SEMMSL_FAST) {
1105
1106
1107
1108
1109
1110
			if (!ipc_rcu_getref(sma)) {
				sem_unlock(sma, -1);
				err = -EIDRM;
				goto out_free;
			}
			sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1111
1112
			sem_io = ipc_alloc(sizeof(ushort)*nsems);
			if(sem_io == NULL) {
1113
				sem_putref(sma);
Linus Torvalds's avatar
Linus Torvalds committed
1114
1115
1116
				return -ENOMEM;
			}

1117
			sem_lock_and_putref(sma);
Linus Torvalds's avatar
Linus Torvalds committed
1118
			if (sma->sem_perm.deleted) {
1119
				sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1120
1121
1122
				err = -EIDRM;
				goto out_free;
			}
1123
		}
Linus Torvalds's avatar
Linus Torvalds committed
1124
1125
		for (i = 0; i < sma->sem_nsems; i++)
			sem_io[i] = sma->sem_base[i].semval;
1126
		sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
		err = 0;
		if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
			err = -EFAULT;
		goto out_free;
	}
	case SETALL:
	{
		int i;
		struct sem_undo *un;

1137
1138
1139
1140
		if (!ipc_rcu_getref(sma)) {
			rcu_read_unlock();
			return -EIDRM;
		}
1141
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1142
1143
1144
1145

		if(nsems > SEMMSL_FAST) {
			sem_io = ipc_alloc(sizeof(ushort)*nsems);
			if(sem_io == NULL) {
1146
				sem_putref(sma);
Linus Torvalds's avatar
Linus Torvalds committed
1147
1148
1149
1150
				return -ENOMEM;
			}
		}

1151
		if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
1152
			sem_putref(sma);
Linus Torvalds's avatar
Linus Torvalds committed
1153
1154
1155
1156
1157
1158
			err = -EFAULT;
			goto out_free;
		}

		for (i = 0; i < nsems; i++) {
			if (sem_io[i] > SEMVMX) {
1159
				sem_putref(sma);
Linus Torvalds's avatar
Linus Torvalds committed
1160
1161
1162
1163
				err = -ERANGE;
				goto out_free;
			}
		}
1164
		sem_lock_and_putref(sma);
Linus Torvalds's avatar
Linus Torvalds committed
1165
		if (sma->sem_perm.deleted) {
1166
			sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1167
1168
1169
1170
1171
1172
			err = -EIDRM;
			goto out_free;
		}

		for (i = 0; i < nsems; i++)
			sma->sem_base[i].semval = sem_io[i];
1173
1174
1175

		assert_spin_locked(&sma->sem_perm.lock);
		list_for_each_entry(un, &sma->list_id, list_id) {
Linus Torvalds's avatar
Linus Torvalds committed
1176
1177
			for (i = 0; i < nsems; i++)
				un->semadj[i] = 0;
1178
		}
Linus Torvalds's avatar
Linus Torvalds committed
1179
1180
		sma->sem_ctime = get_seconds();
		/* maybe some queued-up processes were waiting for this */
1181
		do_smart_update(sma, NULL, 0, 0, &tasks);
Linus Torvalds's avatar
Linus Torvalds committed
1182
1183
1184
		err = 0;
		goto out_unlock;
	}
1185
	/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
Linus Torvalds's avatar
Linus Torvalds committed
1186
1187
	}
	err = -EINVAL;
1188
1189
1190
1191
	if (semnum < 0 || semnum >= nsems) {
		rcu_read_unlock();
		goto out_wakeup;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1192

1193
	sem_lock(sma, NULL, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
	curr = &sma->sem_base[semnum];

	switch (cmd) {
	case GETVAL:
		err = curr->semval;
		goto out_unlock;
	case GETPID:
		err = curr->sempid;
		goto out_unlock;
	case GETNCNT:
		err = count_semncnt(sma,semnum);
		goto out_unlock;
	case GETZCNT:
		err = count_semzcnt(sma,semnum);
		goto out_unlock;
	}
1210

Linus Torvalds's avatar
Linus Torvalds committed
1211
out_unlock:
1212
	sem_unlock(sma, -1);
1213
out_wakeup:
1214
	wake_up_sem_queue_do(&tasks);
Linus Torvalds's avatar
Linus Torvalds committed
1215
1216
1217
1218
1219
1220
out_free:
	if(sem_io != fast_sem_io)
		ipc_free(sem_io, sizeof(ushort)*nsems);
	return err;
}

1221
1222
static inline unsigned long
copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
Linus Torvalds's avatar
Linus Torvalds committed
1223
1224
1225
{
	switch(version) {
	case IPC_64:
1226
		if (copy_from_user(out, buf, sizeof(*out)))
Linus Torvalds's avatar
Linus Torvalds committed
1227
1228
1229
1230
1231
1232
1233
1234
1235
			return -EFAULT;
		return 0;
	case IPC_OLD:
	    {
		struct semid_ds tbuf_old;

		if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
			return -EFAULT;

1236
1237
1238
		out->sem_perm.uid	= tbuf_old.sem_perm.uid;
		out->sem_perm.gid	= tbuf_old.sem_perm.gid;
		out->sem_perm.mode	= tbuf_old.sem_perm.mode;
Linus Torvalds's avatar
Linus Torvalds committed
1239
1240
1241
1242
1243
1244
1245
1246

		return 0;
	    }
	default:
		return -EINVAL;
	}
}

1247
1248
1249
1250
1251
/*
 * This function handles some semctl commands which require the rw_mutex
 * to be held in write mode.
 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
 */
1252
static int semctl_down(struct ipc_namespace *ns, int semid,
1253
		       int cmd, int version, void __user *p)
Linus Torvalds's avatar
Linus Torvalds committed
1254
1255
1256
{
	struct sem_array *sma;
	int err;
1257
	struct semid64_ds semid64;
Linus Torvalds's avatar
Linus Torvalds committed
1258
1259
1260
	struct kern_ipc_perm *ipcp;

	if(cmd == IPC_SET) {
1261
		if (copy_semid_from_user(&semid64, p, version))
Linus Torvalds's avatar
Linus Torvalds committed
1262
1263
			return -EFAULT;
	}
Steve Grubb's avatar
Steve Grubb committed
1264

1265
1266
	ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
				      &semid64.sem_perm, 0);
1267
1268
	if (IS_ERR(ipcp))
		return PTR_ERR(ipcp);
Steve Grubb's avatar
Steve Grubb committed
1269

1270
	sma = container_of(ipcp, struct sem_array, sem_perm);
Linus Torvalds's avatar
Linus Torvalds committed
1271
1272

	err = security_sem_semctl(sma, cmd);
1273
1274
	if (err) {
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1275
		goto out_unlock;
1276
	}
Linus Torvalds's avatar
Linus Torvalds committed
1277
1278
1279

	switch(cmd){
	case IPC_RMID:
1280
		sem_lock(sma, NULL, -1);
1281
		freeary(ns, ipcp);
1282
		goto out_up;
Linus Torvalds's avatar
Linus Torvalds committed
1283
	case IPC_SET:
1284
		sem_lock(sma, NULL, -1);
1285
1286
1287
		err = ipc_update_perm(&semid64.sem_perm, ipcp);
		if (err)
			goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
1288
1289
1290
		sma->sem_ctime = get_seconds();
		break;
	default:
1291
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1292
		err = -EINVAL;
1293
		goto out_up;
Linus Torvalds's avatar
Linus Torvalds committed
1294
1295
1296
	}

out_unlock:
1297
	sem_unlock(sma, -1);
1298
1299
out_up:
	up_write(&sem_ids(ns).rw_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
1300
1301
1302
	return err;
}

1303
SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
Linus Torvalds's avatar
Linus Torvalds committed
1304
1305
{
	int version;
Kirill Korotaev's avatar
Kirill Korotaev committed
1306
	struct ipc_namespace *ns;
1307
	void __user *p = (void __user *)arg;
Linus Torvalds's avatar
Linus Torvalds committed
1308
1309
1310
1311
1312

	if (semid < 0)
		return -EINVAL;

	version = ipc_parse_version(&cmd);
Kirill Korotaev's avatar
Kirill Korotaev committed
1313
	ns = current->nsproxy->ipc_ns;
Linus Torvalds's avatar
Linus Torvalds committed
1314
1315
1316
1317

	switch(cmd) {
	case IPC_INFO:
	case SEM_INFO:
1318
	case IPC_STAT:
Linus Torvalds's avatar
Linus Torvalds committed
1319
	case SEM_STAT:
1320
		return semctl_nolock(ns, semid, cmd, version, p);
Linus Torvalds's avatar
Linus Torvalds committed
1321
1322
1323
1324
1325
1326
	case GETALL:
	case GETVAL:
	case GETPID:
	case GETNCNT:
	case GETZCNT:
	case SETALL:
1327
1328
1329
		return semctl_main(ns, semid, semnum, cmd, p);
	case SETVAL:
		return semctl_setval(ns, semid, semnum, arg);
Linus Torvalds's avatar
Linus Torvalds committed
1330
1331
	case IPC_RMID:
	case IPC_SET:
1332
		return semctl_down(ns, semid, cmd, version, p);
Linus Torvalds's avatar
Linus Torvalds committed
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347