Skip to content
Snippets Groups Projects
posix-cpu-timers.c 41 KiB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
			break;
		}
		t->firing = 1;
		list_move_tail(&t->entry, firing);
	}
}

/*
 * Check for any per-thread CPU timers that have fired and move them
 * off the tsk->*_timers list onto the firing list.  Per-thread timers
 * have already been taken off.
 */
static void check_process_timers(struct task_struct *tsk,
				 struct list_head *firing)
{
Linus Torvalds's avatar
Linus Torvalds committed
	struct signal_struct *const sig = tsk->signal;
	cputime_t utime, stime, ptime, virt_expires, prof_expires;
	unsigned long long sched_time, sched_expires;
	struct task_struct *t;
	struct list_head *timers = sig->cpu_timers;

	/*
	 * Don't sample the current process CPU clocks if there are no timers.
	 */
	if (list_empty(&timers[CPUCLOCK_PROF]) &&
	    cputime_eq(sig->it_prof_expires, cputime_zero) &&
	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
	    list_empty(&timers[CPUCLOCK_VIRT]) &&
	    cputime_eq(sig->it_virt_expires, cputime_zero) &&
	    list_empty(&timers[CPUCLOCK_SCHED]))
		return;

	/*
	 * Collect the current process totals.
	 */
	utime = sig->utime;
	stime = sig->stime;
	sched_time = sig->sched_time;
	t = tsk;
	do {
		utime = cputime_add(utime, t->utime);
		stime = cputime_add(stime, t->stime);
		sched_time += t->sched_time;
		t = next_thread(t);
	} while (t != tsk);
	ptime = cputime_add(utime, stime);

Linus Torvalds's avatar
Linus Torvalds committed
	prof_expires = cputime_zero;
	while (!list_empty(timers)) {
		struct cpu_timer_list *t = list_entry(timers->next,
						      struct cpu_timer_list,
						      entry);
		if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) {
Linus Torvalds's avatar
Linus Torvalds committed
			prof_expires = t->expires.cpu;
			break;
		}
		t->firing = 1;
		list_move_tail(&t->entry, firing);
	}

	++timers;
Linus Torvalds's avatar
Linus Torvalds committed
	virt_expires = cputime_zero;
	while (!list_empty(timers)) {
		struct cpu_timer_list *t = list_entry(timers->next,
						      struct cpu_timer_list,
						      entry);
		if (!--maxfire || cputime_lt(utime, t->expires.cpu)) {
Linus Torvalds's avatar
Linus Torvalds committed
			virt_expires = t->expires.cpu;
			break;
		}
		t->firing = 1;
		list_move_tail(&t->entry, firing);
	}

	++timers;
Linus Torvalds's avatar
Linus Torvalds committed
	sched_expires = 0;
	while (!list_empty(timers)) {
		struct cpu_timer_list *t = list_entry(timers->next,
						      struct cpu_timer_list,
						      entry);
		if (!--maxfire || sched_time < t->expires.sched) {
Linus Torvalds's avatar
Linus Torvalds committed
			sched_expires = t->expires.sched;
			break;
		}
		t->firing = 1;
		list_move_tail(&t->entry, firing);
	}

	/*
	 * Check for the special case process timers.
	 */
	if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
		if (cputime_ge(ptime, sig->it_prof_expires)) {
			/* ITIMER_PROF fires and reloads.  */
			sig->it_prof_expires = sig->it_prof_incr;
			if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
				sig->it_prof_expires = cputime_add(
					sig->it_prof_expires, ptime);
			}
			__group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
		}
		if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
		    (cputime_eq(prof_expires, cputime_zero) ||
		     cputime_lt(sig->it_prof_expires, prof_expires))) {
			prof_expires = sig->it_prof_expires;
		}
	}
	if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
		if (cputime_ge(utime, sig->it_virt_expires)) {
			/* ITIMER_VIRTUAL fires and reloads.  */
			sig->it_virt_expires = sig->it_virt_incr;
			if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
				sig->it_virt_expires = cputime_add(
					sig->it_virt_expires, utime);
			}
			__group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
		}
		if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
		    (cputime_eq(virt_expires, cputime_zero) ||
		     cputime_lt(sig->it_virt_expires, virt_expires))) {
			virt_expires = sig->it_virt_expires;
		}
	}
	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
		unsigned long psecs = cputime_to_secs(ptime);
		cputime_t x;
		if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
			/*
			 * At the hard limit, we just die.
			 * No need to calculate anything else now.
			 */
			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
			return;
		}
		if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
			/*
			 * At the soft limit, send a SIGXCPU every second.
			 */
			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
			if (sig->rlim[RLIMIT_CPU].rlim_cur
			    < sig->rlim[RLIMIT_CPU].rlim_max) {
				sig->rlim[RLIMIT_CPU].rlim_cur++;
			}
		}
		x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
		if (cputime_eq(prof_expires, cputime_zero) ||
		    cputime_lt(x, prof_expires)) {
			prof_expires = x;
		}
	}

	if (!cputime_eq(prof_expires, cputime_zero) ||
	    !cputime_eq(virt_expires, cputime_zero) ||
	    sched_expires != 0) {
		/*
		 * Rebalance the threads' expiry times for the remaining
		 * process CPU timers.
		 */

		cputime_t prof_left, virt_left, ticks;
		unsigned long long sched_left, sched;
		const unsigned int nthreads = atomic_read(&sig->live);

Linus Torvalds's avatar
Linus Torvalds committed
		prof_left = cputime_sub(prof_expires, utime);
		prof_left = cputime_sub(prof_left, stime);
		prof_left = cputime_div(prof_left, nthreads);
		virt_left = cputime_sub(virt_expires, utime);
		virt_left = cputime_div(virt_left, nthreads);
		if (sched_expires) {
			sched_left = sched_expires - sched_time;
			do_div(sched_left, nthreads);
		} else {
			sched_left = 0;
		}
		t = tsk;
		do {
			ticks = cputime_add(cputime_add(t->utime, t->stime),
					    prof_left);
			if (!cputime_eq(prof_expires, cputime_zero) &&
			    (cputime_eq(t->it_prof_expires, cputime_zero) ||
			     cputime_gt(t->it_prof_expires, ticks))) {
				t->it_prof_expires = ticks;
			}

			ticks = cputime_add(t->utime, virt_left);
			if (!cputime_eq(virt_expires, cputime_zero) &&
			    (cputime_eq(t->it_virt_expires, cputime_zero) ||
			     cputime_gt(t->it_virt_expires, ticks))) {
				t->it_virt_expires = ticks;
			}

			sched = t->sched_time + sched_left;
			if (sched_expires && (t->it_sched_expires == 0 ||
					      t->it_sched_expires > sched)) {
				t->it_sched_expires = sched;
			}

			do {
				t = next_thread(t);
			} while (unlikely(t->exit_state));
		} while (t != tsk);
	}
}

/*
 * This is called from the signal code (via do_schedule_next_timer)
 * when the last timer signal was delivered and we have to reload the timer.
 */
void posix_cpu_timer_schedule(struct k_itimer *timer)
{
	struct task_struct *p = timer->it.cpu.task;
	union cpu_time_count now;

	if (unlikely(p == NULL))
		/*
		 * The task was cleaned up already, no future firings.
		 */
		return;

	/*
	 * Fetch the current sample and update the timer's expiry time.
	 */
	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
		cpu_clock_sample(timer->it_clock, p, &now);
		bump_cpu_timer(timer, now);
		if (unlikely(p->exit_state)) {
			clear_dead_task(timer, now);
			return;
		}
		read_lock(&tasklist_lock); /* arm_timer needs it.  */
	} else {
		read_lock(&tasklist_lock);
		if (unlikely(p->signal == NULL)) {
			/*
			 * The process has been reaped.
			 * We can't even collect a sample any more.
			 */
			put_task_struct(p);
			timer->it.cpu.task = p = NULL;
			timer->it.cpu.expires.sched = 0;
			read_unlock(&tasklist_lock);
			return;
		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
			/*
			 * We've noticed that the thread is dead, but
			 * not yet reaped.  Take this opportunity to
			 * drop our task ref.
			 */
			clear_dead_task(timer, now);
			read_unlock(&tasklist_lock);
			return;
		}
		cpu_clock_sample_group(timer->it_clock, p, &now);
		bump_cpu_timer(timer, now);
		/* Leave the tasklist_lock locked for the call below.  */
	}

	/*
	 * Now re-arm for the new expiry time.
	 */
	arm_timer(timer, now);

	read_unlock(&tasklist_lock);
}

/*
 * This is called from the timer interrupt handler.  The irq handler has
 * already updated our counts.  We need to check if any timers fire now.
 * Interrupts are disabled.
 */
void run_posix_cpu_timers(struct task_struct *tsk)
{
	LIST_HEAD(firing);
	struct k_itimer *timer, *next;

	BUG_ON(!irqs_disabled());

#define UNEXPIRED(clock) \
		(cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
		 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))

	if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
	    (tsk->it_sched_expires == 0 ||
	     tsk->sched_time < tsk->it_sched_expires))
		return;

#undef	UNEXPIRED

	/*
	 * Double-check with locks held.
	 */
	read_lock(&tasklist_lock);
	if (likely(tsk->signal != NULL)) {
		spin_lock(&tsk->sighand->siglock);
Linus Torvalds's avatar
Linus Torvalds committed

		/*
		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
		 * all the timers that are firing, and put them on the firing list.
		 */
		check_thread_timers(tsk, &firing);
		check_process_timers(tsk, &firing);
Linus Torvalds's avatar
Linus Torvalds committed

		/*
		 * We must release these locks before taking any timer's lock.
		 * There is a potential race with timer deletion here, as the
		 * siglock now protects our private firing list.  We have set
		 * the firing flag in each timer, so that a deletion attempt
		 * that gets the timer lock before we do will give it up and
		 * spin until we've taken care of that timer below.
		 */
		spin_unlock(&tsk->sighand->siglock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
	read_unlock(&tasklist_lock);

	/*
	 * Now that all the timers on our list have the firing flag,
	 * noone will touch their list entries but us.  We'll take
	 * each timer's lock before clearing its firing flag, so no
	 * timer call will interfere.
	 */
	list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
		int firing;
		spin_lock(&timer->it_lock);
		list_del_init(&timer->it.cpu.entry);
		firing = timer->it.cpu.firing;
		timer->it.cpu.firing = 0;
		/*
		 * The firing flag is -1 if we collided with a reset
		 * of the timer, which already reported this
		 * almost-firing as an overrun.  So don't generate an event.
		 */
		if (likely(firing >= 0)) {
			cpu_timer_fire(timer);
		}
		spin_unlock(&timer->it_lock);
	}
}

/*
 * Set one of the process-wide special case CPU timers.
 * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
 * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
 * absolute; non-null for ITIMER_*, where *newval is relative and we update
 * it to be absolute, *oldval is absolute and we update it to be relative.
 */
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
			   cputime_t *newval, cputime_t *oldval)
{
	union cpu_time_count now;
	struct list_head *head;

	BUG_ON(clock_idx == CPUCLOCK_SCHED);
	cpu_clock_sample_group_locked(clock_idx, tsk, &now);

	if (oldval) {
		if (!cputime_eq(*oldval, cputime_zero)) {
			if (cputime_le(*oldval, now.cpu)) {
				/* Just about to fire. */
				*oldval = jiffies_to_cputime(1);
			} else {
				*oldval = cputime_sub(*oldval, now.cpu);
			}
		}

		if (cputime_eq(*newval, cputime_zero))
			return;
		*newval = cputime_add(*newval, now.cpu);

		/*
		 * If the RLIMIT_CPU timer will expire before the
		 * ITIMER_PROF timer, we have nothing else to do.
		 */
		if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
		    < cputime_to_secs(*newval))
			return;
	}

	/*
	 * Check whether there are any process timers already set to fire
	 * before this one.  If so, we don't have anything more to do.
	 */
	head = &tsk->signal->cpu_timers[clock_idx];
	if (list_empty(head) ||
	    cputime_ge(list_entry(head->next,
				  struct cpu_timer_list, entry)->expires.cpu,
		       *newval)) {
		/*
		 * Rejigger each thread's expiry time so that one will
		 * notice before we hit the process-cumulative expiry time.
		 */
		union cpu_time_count expires = { .sched = 0 };
		expires.cpu = *newval;
		process_timer_rebalance(tsk, clock_idx, expires, now);
	}
}

static long posix_cpu_clock_nanosleep_restart(struct restart_block *);

int posix_cpu_nsleep(clockid_t which_clock, int flags,
		     struct timespec *rqtp)
{
	struct restart_block *restart_block =
	    &current_thread_info()->restart_block;
	struct k_itimer timer;
	int error;

	/*
	 * Diagnose required errors first.
	 */
	if (CPUCLOCK_PERTHREAD(which_clock) &&
	    (CPUCLOCK_PID(which_clock) == 0 ||
	     CPUCLOCK_PID(which_clock) == current->pid))
		return -EINVAL;

	/*
	 * Set up a temporary timer and then wait for it to go off.
	 */
	memset(&timer, 0, sizeof timer);
	spin_lock_init(&timer.it_lock);
	timer.it_clock = which_clock;
	timer.it_overrun = -1;
	error = posix_cpu_timer_create(&timer);
	timer.it_process = current;
	if (!error) {
		struct timespec __user *rmtp;
		static struct itimerspec zero_it;
		struct itimerspec it = { .it_value = *rqtp,
					 .it_interval = {} };

		spin_lock_irq(&timer.it_lock);
		error = posix_cpu_timer_set(&timer, flags, &it, NULL);
		if (error) {
			spin_unlock_irq(&timer.it_lock);
			return error;
		}

		while (!signal_pending(current)) {
			if (timer.it.cpu.expires.sched == 0) {
				/*
				 * Our timer fired and was reset.
				 */
				spin_unlock_irq(&timer.it_lock);
				return 0;
			}

			/*
			 * Block until cpu_timer_fire (or a signal) wakes us.
			 */
			__set_current_state(TASK_INTERRUPTIBLE);
			spin_unlock_irq(&timer.it_lock);
			schedule();
			spin_lock_irq(&timer.it_lock);
		}

		/*
		 * We were interrupted by a signal.
		 */
		sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
		posix_cpu_timer_set(&timer, 0, &zero_it, &it);
		spin_unlock_irq(&timer.it_lock);

		if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
			/*
			 * It actually did fire already.
			 */
			return 0;
		}

		/*
		 * Report back to the user the time still remaining.
		 */
		rmtp = (struct timespec __user *) restart_block->arg1;
		if (rmtp != NULL && !(flags & TIMER_ABSTIME) &&
		    copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
			return -EFAULT;

		restart_block->fn = posix_cpu_clock_nanosleep_restart;
		/* Caller already set restart_block->arg1 */
		restart_block->arg0 = which_clock;
		restart_block->arg2 = rqtp->tv_sec;
		restart_block->arg3 = rqtp->tv_nsec;

		error = -ERESTART_RESTARTBLOCK;
	}

	return error;
}

static long
posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block)
{
	clockid_t which_clock = restart_block->arg0;
	struct timespec t = { .tv_sec = restart_block->arg2,
			      .tv_nsec = restart_block->arg3 };
	restart_block->fn = do_no_restart_syscall;
	return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t);
}


#define PROCESS_CLOCK	MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
#define THREAD_CLOCK	MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)

static int process_cpu_clock_getres(clockid_t which_clock, struct timespec *tp)
{
	return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
}
static int process_cpu_clock_get(clockid_t which_clock, struct timespec *tp)
{
	return posix_cpu_clock_get(PROCESS_CLOCK, tp);
}
static int process_cpu_timer_create(struct k_itimer *timer)
{
	timer->it_clock = PROCESS_CLOCK;
	return posix_cpu_timer_create(timer);
}
static int process_cpu_nsleep(clockid_t which_clock, int flags,
			      struct timespec *rqtp)
{
	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
}
static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp)
{
	return posix_cpu_clock_getres(THREAD_CLOCK, tp);
}
static int thread_cpu_clock_get(clockid_t which_clock, struct timespec *tp)
{
	return posix_cpu_clock_get(THREAD_CLOCK, tp);
}
static int thread_cpu_timer_create(struct k_itimer *timer)
{
	timer->it_clock = THREAD_CLOCK;
	return posix_cpu_timer_create(timer);
}
static int thread_cpu_nsleep(clockid_t which_clock, int flags,
			      struct timespec *rqtp)
{
	return -EINVAL;
}

static __init int init_posix_cpu_timers(void)
{
	struct k_clock process = {
		.clock_getres = process_cpu_clock_getres,
		.clock_get = process_cpu_clock_get,
		.clock_set = do_posix_clock_nosettime,
		.timer_create = process_cpu_timer_create,
		.nsleep = process_cpu_nsleep,
	};
	struct k_clock thread = {
		.clock_getres = thread_cpu_clock_getres,
		.clock_get = thread_cpu_clock_get,
		.clock_set = do_posix_clock_nosettime,
		.timer_create = thread_cpu_timer_create,
		.nsleep = thread_cpu_nsleep,
	};

	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);

	return 0;
}
__initcall(init_posix_cpu_timers);