Commit 164d44fd authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  clocksource: Add clocksource_register_hz/khz interface
  posix-cpu-timers: Optimize run_posix_cpu_timers()
  time: Remove xtime_cache
  mqueue: Convert message queue timeout to use hrtimers
  hrtimers: Provide schedule_hrtimeout for CLOCK_REALTIME
  timers: Introduce the concept of timer slack for legacy timers
  ntp: Remove tickadj
  ntp: Make time_adjust static
  time: Add xtime, wall_to_monotonic to feature-removal-schedule
  timer: Try to survive timer callback preempt_count leak
  timer: Split out timer function call
  timer: Print function name for timer callbacks modifying preemption count
  time: Clean up warp_clock()
  cpu-timers: Avoid iterating over all threads in fastpath_timer_check()
  cpu-timers: Change SIGEV_NONE timer implementation
  cpu-timers: Return correct previous timer reload value
  cpu-timers: Cleanup arm_timer()
  cpu-timers: Simplify RLIMIT_CPU handling
parents 5bfec46b d7e81c26
......@@ -541,6 +541,16 @@ Who: Avi Kivity <avi@redhat.com>
----------------------------
What: xtime, wall_to_monotonic
When: 2.6.36+
Files: kernel/time/timekeeping.c include/linux/time.h
Why: Cleaning up timekeeping internal values. Please use
existing timekeeping accessor functions to access
the equivalent functionality.
Who: John Stultz <johnstul@us.ibm.com>
----------------------------
What: KVM kernel-allocated memory slots
When: July 2010
Why: Since 2.6.25, kvm supports user-allocated memory slots, which are
......
......@@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
}
/* used to install a new clocksource */
extern int clocksource_register(struct clocksource*);
extern void clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void);
......@@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs);
extern void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
/*
* Don't call __clocksource_register_scale directly, use
* clocksource_register_hz/khz
*/
extern int
__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
return __clocksource_register_scale(cs, 1, hz);
}
static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
{
return __clocksource_register_scale(cs, 1000, khz);
}
static inline void
clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
{
......
......@@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode);
extern int schedule_hrtimeout_range_clock(ktime_t *expires,
unsigned long delta, const enum hrtimer_mode mode, int clock);
extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
/* Soft interrupt function to run the hrtimer queues: */
......
......@@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
extern void update_wall_time(void);
extern void update_xtime_cache(u64 nsec);
extern void timekeeping_leap_insert(int leapsecond);
struct tms;
......
......@@ -10,13 +10,19 @@
struct tvec_base;
struct timer_list {
/*
* All fields that change during normal runtime grouped to the
* same cacheline
*/
struct list_head entry;
unsigned long expires;
struct tvec_base *base;
void (*function)(unsigned long);
unsigned long data;
struct tvec_base *base;
int slack;
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
......@@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
extern void set_timer_slack(struct timer_list *time, int slack_hz);
#define TIMER_NOT_PINNED 0
#define TIMER_PINNED 1
/*
......
......@@ -232,13 +232,11 @@ struct timex {
*/
extern unsigned long tick_usec; /* USER_HZ period (usec) */
extern unsigned long tick_nsec; /* ACTHZ period (nsec) */
extern int tickadj; /* amount of adjustment per tick */
/*
* phase-lock loop variables
*/
extern int time_status; /* clock synchronization status bits */
extern long time_adjust; /* The amount of adjtime left */
extern void ntp_init(void);
extern void ntp_clear(void);
......@@ -271,9 +269,6 @@ extern void second_overflow(void);
extern void update_ntp_one_tick(void);
extern int do_adjtimex(struct timex *);
/* Don't use! Compatibility define for existing users. */
#define tickadj (500/HZ ? : 1)
int read_current_timer(unsigned long *timer_val);
/* The clock frequency of the i8253/i8254 PIT */
......
......@@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
* sr: SEND or RECV
*/
static int wq_sleep(struct mqueue_inode_info *info, int sr,
long timeout, struct ext_wait_queue *ewp)
ktime_t *timeout, struct ext_wait_queue *ewp)
{
int retval;
signed long time;
......@@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&info->lock);
time = schedule_timeout(timeout);
time = schedule_hrtimeout_range_clock(timeout,
HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
while (ewp->state == STATE_PENDING)
cpu_relax();
......@@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info)
wake_up(&info->wait_q);
}
static long prepare_timeout(struct timespec *p)
static int prepare_timeout(const struct timespec __user *u_abs_timeout,
ktime_t *expires, struct timespec *ts)
{
struct timespec nowts;
long timeout;
if (p) {
if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
|| p->tv_nsec >= NSEC_PER_SEC))
return -EINVAL;
nowts = CURRENT_TIME;
/* first subtract as jiffies can't be too big */
p->tv_sec -= nowts.tv_sec;
if (p->tv_nsec < nowts.tv_nsec) {
p->tv_nsec += NSEC_PER_SEC;
p->tv_sec--;
}
p->tv_nsec -= nowts.tv_nsec;
if (p->tv_sec < 0)
return 0;
timeout = timespec_to_jiffies(p) + 1;
} else
return MAX_SCHEDULE_TIMEOUT;
if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec)))
return -EFAULT;
if (!timespec_valid(ts))
return -EINVAL;
return timeout;
*expires = timespec_to_ktime(*ts);
return 0;
}
static void remove_notification(struct mqueue_inode_info *info)
......@@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
struct ext_wait_queue *receiver;
struct msg_msg *msg_ptr;
struct mqueue_inode_info *info;
struct timespec ts, *p = NULL;
long timeout;
ktime_t expires, *timeout = NULL;
struct timespec ts;
int ret;
if (u_abs_timeout) {
if (copy_from_user(&ts, u_abs_timeout,
sizeof(struct timespec)))
return -EFAULT;
p = &ts;
int res = prepare_timeout(u_abs_timeout, &expires, &ts);
if (res)
return res;
timeout = &expires;
}
if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
return -EINVAL;
audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
timeout = prepare_timeout(p);
audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
filp = fget(mqdes);
if (unlikely(!filp)) {
......@@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
if (filp->f_flags & O_NONBLOCK) {
spin_unlock(&info->lock);
ret = -EAGAIN;
} else if (unlikely(timeout < 0)) {
spin_unlock(&info->lock);
ret = timeout;
} else {
wait.task = current;
wait.msg = (void *) msg_ptr;
......@@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
size_t, msg_len, unsigned int __user *, u_msg_prio,
const struct timespec __user *, u_abs_timeout)
{
long timeout;
ssize_t ret;
struct msg_msg *msg_ptr;
struct file *filp;
struct inode *inode;
struct mqueue_inode_info *info;
struct ext_wait_queue wait;
struct timespec ts, *p = NULL;
ktime_t expires, *timeout = NULL;
struct timespec ts;
if (u_abs_timeout) {
if (copy_from_user(&ts, u_abs_timeout,
sizeof(struct timespec)))
return -EFAULT;
p = &ts;
int res = prepare_timeout(u_abs_timeout, &expires, &ts);
if (res)
return res;
timeout = &expires;
}
audit_mq_sendrecv(mqdes, msg_len, 0, p);
timeout = prepare_timeout(p);
audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
filp = fget(mqdes);
if (unlikely(!filp)) {
......@@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
if (filp->f_flags & O_NONBLOCK) {
spin_unlock(&info->lock);
ret = -EAGAIN;
msg_ptr = NULL;
} else if (unlikely(timeout < 0)) {
spin_unlock(&info->lock);
ret = timeout;
msg_ptr = NULL;
} else {
wait.task = current;
wait.state = STATE_NONE;
......
......@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
}
/**
* schedule_hrtimeout_range - sleep until timeout
* schedule_hrtimeout_range_clock - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
*
* Make the current task sleep until the given expiry time has
* elapsed. The routine will return immediately unless
* the current task state has been set (see set_current_state()).
*
* The @delta argument gives the kernel the freedom to schedule the
* actual wakeup to a time that is both power and performance friendly.
* The kernel give the normal best effort behavior for "@expires+@delta",
* but may decide to fire the timer earlier, but no earlier than @expires.
*
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
* pass before the routine returns.
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
* delivered to the current task.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
*
* Returns 0 when the timer has expired otherwise -EINTR
* @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
*/
int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode)
int __sched
schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode, int clock)
{
struct hrtimer_sleeper t;
......@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return -EINTR;
}
hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
hrtimer_init_on_stack(&t.timer, clock, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
hrtimer_init_sleeper(&t, current);
......@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return !t.task ? 0 : -EINTR;
}
/**
* schedule_hrtimeout_range - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
*
* Make the current task sleep until the given expiry time has
* elapsed. The routine will return immediately unless
* the current task state has been set (see set_current_state()).
*
* The @delta argument gives the kernel the freedom to schedule the
* actual wakeup to a time that is both power and performance friendly.
* The kernel give the normal best effort behavior for "@expires+@delta",
* but may decide to fire the timer earlier, but no earlier than @expires.
*
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
* pass before the routine returns.
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
* delivered to the current task.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
*
* Returns 0 when the timer has expired otherwise -EINTR
*/
int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode)
{
return schedule_hrtimeout_range_clock(expires, delta, mode,
CLOCK_MONOTONIC);
}
EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
/**
......
<
......@@ -11,19 +11,18 @@
#include <trace/events/timer.h>
/*
* Called after updating RLIMIT_CPU to set timer expiration if necessary.
* Called after updating RLIMIT_CPU to run cpu timer and update
* tsk->signal->cputime_expires expiration cache if necessary. Needs
* siglock protection since other code may update expiration cache as
* well.
*/
void update_rlimit_cpu(unsigned long rlim_new)
{
cputime_t cputime = secs_to_cputime(rlim_new);
struct signal_struct *const sig = current->signal;
if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
spin_lock_irq(&current->sighand->siglock);
set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&current->sighand->siglock);
}
spin_lock_irq(&current->sighand->siglock);
set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&current->sighand->siglock);
}
static int check_clock(const clockid_t which_clock)
......@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
cputime_gt(expires, new_exp);
}
static inline int expires_le(cputime_t expires, cputime_t new_exp)
{
return !cputime_eq(expires, cputime_zero) &&
cputime_le(expires, new_exp);
}
/*
* Insert the timer on the appropriate list before any timers that
* expire later. This must be called with the tasklist_lock held
* for reading, and interrupts disabled.
* for reading, interrupts disabled and p->sighand->siglock taken.
*/
static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
static void arm_timer(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos;
struct task_cputime *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next;
unsigned long i;
head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
p->cpu_timers : p->signal->cpu_timers);
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
head = p->cpu_timers;
cputime_expires = &p->cputime_expires;
} else {
head = p->signal->cpu_timers;
cputime_expires = &p->signal->cputime_expires;
}
head += CPUCLOCK_WHICH(timer->it_clock);
BUG_ON(!irqs_disabled());
spin_lock(&p->sighand->siglock);
listpos = head;
if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
list_for_each_entry(next, head, entry) {
if (next->expires.sched > nt->expires.sched)
break;
listpos = &next->entry;
}
} else {
list_for_each_entry(next, head, entry) {
if (cputime_gt(next->expires.cpu, nt->expires.cpu))
break;
listpos = &next->entry;
}
list_for_each_entry(next, head, entry) {
if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
break;
listpos = &next->entry;
}
list_add(&nt->entry, listpos);
if (listpos == head) {
union cpu_time_count *exp = &nt->expires;
/*
* We are the new earliest-expiring timer.
* If we are a thread timer, there can always
* be a process timer telling us to stop earlier.
* We are the new earliest-expiring POSIX 1.b timer, hence
* need to update expiration cache. Take into account that
* for process timers we share expiration cache with itimers
* and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
*/
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
union cpu_time_count *exp = &nt->expires;
switch (CPUCLOCK_WHICH(timer->it_clock)) {
default:
BUG();
case CPUCLOCK_PROF:
if (expires_gt(p->cputime_expires.prof_exp,
exp->cpu))
p->cputime_expires.prof_exp = exp->cpu;
break;
case CPUCLOCK_VIRT:
if (expires_gt(p->cputime_expires.virt_exp,
exp->cpu))
p->cputime_expires.virt_exp = exp->cpu;
break;
case CPUCLOCK_SCHED:
if (p->cputime_expires.sched_exp == 0 ||
p->cputime_expires.sched_exp > exp->sched)
p->cputime_expires.sched_exp =
exp->sched;
break;
}
} else {
struct signal_struct *const sig = p->signal;
union cpu_time_count *exp = &timer->it.cpu.expires;
/*
* For a process timer, set the cached expiration time.
*/
switch (CPUCLOCK_WHICH(timer->it_clock)) {
default:
BUG();
case CPUCLOCK_VIRT:
if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
exp->cpu))
break;
sig->cputime_expires.virt_exp = exp->cpu;
break;
case CPUCLOCK_PROF:
if (expires_le(sig->it[CPUCLOCK_PROF].expires,
exp->cpu))
break;
i = sig->rlim[RLIMIT_CPU].rlim_cur;
if (i != RLIM_INFINITY &&
i <= cputime_to_secs(exp->cpu))
break;
sig->cputime_expires.prof_exp = exp->cpu;
break;
case CPUCLOCK_SCHED:
sig->cputime_expires.sched_exp = exp->sched;
break;
}
switch (CPUCLOCK_WHICH(timer->it_clock)) {
case CPUCLOCK_PROF:
if (expires_gt(cputime_expires->prof_exp, exp->cpu))
cputime_expires->prof_exp = exp->cpu;
break;
case CPUCLOCK_VIRT:
if (expires_gt(cputime_expires->virt_exp, exp->cpu))
cputime_expires->virt_exp = exp->cpu;
break;
case CPUCLOCK_SCHED:
if (cputime_expires->sched_exp == 0 ||
cputime_expires->sched_exp > exp->sched)
cputime_expires->sched_exp = exp->sched;
break;
}
}
spin_unlock(&p->sighand->siglock);
}
/*
......@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
*/
static void cpu_timer_fire(struct k_itimer *timer)
{
if (unlikely(timer->sigq == NULL)) {
if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
/*
* User don't want any signal.
*/
timer->it.cpu.expires.sched = 0;
} else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
......@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
union cpu_time_count old_expires, new_expires, val;
union cpu_time_count old_expires, new_expires, old_incr, val;
int ret;
if (unlikely(p == NULL)) {
......@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
BUG_ON(!irqs_disabled());
ret = 0;
old_incr = timer->it.cpu.incr;
spin_lock(&p->sighand->siglock);
old_expires = timer->it.cpu.expires;
if (unlikely(timer->it.cpu.firing)) {
......@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
ret = TIMER_RETRY;
} else
list_del_init(&timer->it.cpu.entry);
spin_unlock(&p->sighand->siglock);
/*
* We need to sample the current value to convert the new
......@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* disable this firing since we are already reporting
* it as an overrun (thanks to bump_cpu_timer above).
*/
spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock);
goto out;
}
......@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
*/
timer->it.cpu.expires = new_expires;
if (new_expires.sched != 0 &&
(timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
cpu_time_before(timer->it_clock, val, new_expires)) {
arm_timer(timer, val);
arm_timer(timer);
}
spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock);
/*
......@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
timer->it_overrun = -1;
if (new_expires.sched != 0 &&
(timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
!cpu_time_before(timer->it_clock, val, new_expires)) {
/*
* The designated time already passed, so we notify
......@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
out:
if (old) {
sample_to_timespec(timer->it_clock,
timer->it.cpu.incr, &old->it_interval);
old_incr, &old->it_interval);
}
return ret;
}
......@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
read_unlock(&tasklist_lock);
}
if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
if (timer->it.cpu.incr.sched == 0 &&
cpu_time_before(timer->it_clock,
timer->it.cpu.expires, now)) {
/*
* Do-nothing timer expired and has no reload,
* so it's as if it was never set.
*/
timer->it.cpu.expires.sched = 0;
itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
return;
}
/*
* Account for any expirations and reloads that should