Commit 79bf2bb3 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Linus Torvalds
Browse files

[PATCH] tick-management: dyntick / highres functionality



With Ingo Molnar <mingo@elte.hu>

Add functions to provide dynamic ticks and high resolution timers.  The code
which keeps track of jiffies and handles the long idle periods is shared
between tick based and high resolution timer based dynticks.  The dyntick
functionality can be disabled on the kernel commandline.  Provide also the
infrastructure to support high resolution timers.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f8381cba
......@@ -1078,6 +1078,10 @@ and is between 256 and 4096 characters. It is defined in the file
in certain environments such as networked servers or
real-time systems.
nohz= [KNL] Boottime enable/disable dynamic ticks
Valid arguments: on, off
Default: on
noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing
noirqdebug [IA-32] Disables the code which attempts to detect and
......
......@@ -106,6 +106,16 @@ static inline void account_system_vtime(struct task_struct *tsk)
* always balanced, so the interrupted value of ->hardirq_context
* will always be restored.
*/
#define __irq_enter() \
do { \
account_system_vtime(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
/*
* Enter irq context (on NO_HZ, update jiffies):
*/
extern void irq_enter(void);
/*
......@@ -123,7 +133,7 @@ extern void irq_enter(void);
*/
extern void irq_exit(void);
#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0)
#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
#endif /* LINUX_HARDIRQ_H */
......@@ -201,4 +201,10 @@ extern void hrtimer_run_queues(void);
/* Bootup initialization: */
extern void __init hrtimers_init(void);
#if BITS_PER_LONG < 64
extern unsigned long ktime_divns(const ktime_t kt, s64 div);
#else /* BITS_PER_LONG < 64 */
# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div))
#endif
#endif
......@@ -20,12 +20,79 @@ struct tick_device {
enum tick_device_mode mode;
};
enum tick_nohz_mode {
NOHZ_MODE_INACTIVE,
NOHZ_MODE_LOWRES,
NOHZ_MODE_HIGHRES,
};
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
* @sched_timer: hrtimer to schedule the periodic tick in high
* resolution mode
* @idle_tick: Store the last idle tick expiry time when the tick
* timer is modified for idle sleeps. This is necessary
* to resume the tick timer operation in the timeline
* when the CPU returns from idle
* @tick_stopped: Indicator that the idle tick has been stopped
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_calls: Total number of idle calls
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
* @idle_entrytime: Time when the idle call was entered
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
*/
struct tick_sched {
struct hrtimer sched_timer;
unsigned long check_clocks;
enum tick_nohz_mode nohz_mode;
ktime_t idle_tick;
int tick_stopped;
unsigned long idle_jiffies;
unsigned long idle_calls;
unsigned long idle_sleeps;
ktime_t idle_entrytime;
ktime_t idle_sleeptime;
unsigned long last_jiffies;
unsigned long next_jiffies;
ktime_t idle_expires;
};
extern void __init tick_init(void);
extern int tick_is_oneshot_available(void);
# ifdef CONFIG_HIGH_RES_TIMERS
extern int tick_init_highres(void);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_setup_sched_timer(void);
extern void tick_cancel_sched_timer(int cpu);
# else
static inline void tick_cancel_sched_timer(int cpu) { }
# endif /* HIGHRES */
#else
# ifdef CONFIG_TICK_ONESHOT
extern void tick_clock_notify(void);
extern int tick_check_oneshot_change(int allow_nohz);
extern struct tick_sched *tick_get_tick_sched(int cpu);
# else
static inline void tick_clock_notify(void) { }
static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
# endif
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_cancel_sched_timer(int cpu) { }
static inline void tick_clock_notify(void) { }
static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
#endif
# ifdef CONFIG_NO_HZ
extern void tick_nohz_stop_sched_tick(void);
extern void tick_nohz_restart_sched_tick(void);
extern void tick_nohz_update_jiffies(void);
# else
static inline void tick_nohz_stop_sched_tick(void) { }
static inline void tick_nohz_restart_sched_tick(void) { }
static inline void tick_nohz_update_jiffies(void) { }
# endif /* !NO_HZ */
#endif
......@@ -2,8 +2,8 @@
* linux/kernel/hrtimer.c
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* High-resolution kernel timers
*
......@@ -38,6 +38,7 @@
#include <linux/notifier.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/tick.h>
#include <asm/uaccess.h>
......@@ -288,7 +289,7 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
/*
* Divide a ktime value by a nanosecond value
*/
static unsigned long ktime_divns(const ktime_t kt, s64 div)
unsigned long ktime_divns(const ktime_t kt, s64 div)
{
u64 dclc, inc, dns;
int sft = 0;
......@@ -305,9 +306,6 @@ static unsigned long ktime_divns(const ktime_t kt, s64 div)
return (unsigned long) dclc;
}
#else /* BITS_PER_LONG < 64 */
# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div))
#endif /* BITS_PER_LONG >= 64 */
/*
......@@ -682,6 +680,16 @@ void hrtimer_run_queues(void)
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
int i;
/*
* This _is_ ugly: We have to check in the softirq context,
* whether we can switch to highres and / or nohz mode. The
* clocksource switch happens in the timer interrupt with
* xtime_lock held. Notification from there only sets the
* check bit in the tick_oneshot code, otherwise we might
* deadlock vs. xtime_lock.
*/
tick_check_oneshot_change(1);
hrtimer_get_softirq_time(cpu_base);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
......
......@@ -17,6 +17,7 @@
#include <linux/kthread.h>
#include <linux/rcupdate.h>
#include <linux/smp.h>
#include <linux/tick.h>
#include <asm/irq.h>
/*
......@@ -278,9 +279,11 @@ EXPORT_SYMBOL(do_softirq);
*/
void irq_enter(void)
{
account_system_vtime(current);
add_preempt_count(HARDIRQ_OFFSET);
trace_hardirq_enter();
__irq_enter();
#ifdef CONFIG_NO_HZ
if (idle_cpu(smp_processor_id()))
tick_nohz_update_jiffies();
#endif
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
......@@ -299,6 +302,12 @@ void irq_exit(void)
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
tick_nohz_stop_sched_tick();
#endif
preempt_enable_no_resched();
}
......
#
# Timer subsystem related configuration options
#
config TICK_ONESHOT
bool
default n
config NO_HZ
bool "Tickless System (Dynamic Ticks)"
depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
help
This option enables a tickless system: timer interrupts will
only trigger on an as-needed basis both when the system is
busy and when the system is idle.
......@@ -3,3 +3,5 @@ obj-y += ntp.o clocksource.o jiffies.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
......@@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
#include <linux/tick.h>
/* XXX - Would like a better way for initializing curr_clocksource */
extern struct clocksource clocksource_jiffies;
......@@ -109,6 +110,13 @@ static void clocksource_watchdog(unsigned long data)
if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/*
* We just marked the clocksource as
* highres-capable, notify the rest of the
* system as well so that we transition
* into high-res mode:
*/
tick_clock_notify();
}
cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = csnow;
......
......@@ -29,7 +29,7 @@
struct tick_device tick_broadcast_device;
static cpumask_t tick_broadcast_mask;
DEFINE_SPINLOCK(tick_broadcast_lock);
static DEFINE_SPINLOCK(tick_broadcast_lock);
/*
* Start the device in periodic mode
......@@ -215,6 +215,8 @@ static void tick_do_broadcast_on_off(void *why)
else {
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
tick_broadcast_setup_oneshot(bc);
}
out:
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
......@@ -268,3 +270,190 @@ void tick_shutdown_broadcast(unsigned int *cpup)
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
#ifdef CONFIG_TICK_ONESHOT
static cpumask_t tick_broadcast_oneshot_mask;
static int tick_broadcast_set_event(ktime_t expires, int force)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
ktime_t now = ktime_get();
int res;
for(;;) {
res = clockevents_program_event(bc, expires, now);
if (!res || !force)
return res;
now = ktime_get();
expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
}
}
/*
* Reprogram the broadcast device:
*
* Called with tick_broadcast_lock held and interrupts disabled.
*/
static int tick_broadcast_reprogram(void)
{
ktime_t expires = { .tv64 = KTIME_MAX };
struct tick_device *td;
int cpu;
/*
* Find the event which expires next:
*/
for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 < expires.tv64)
expires = td->evtdev->next_event;
}
if (expires.tv64 == KTIME_MAX)
return 0;
return tick_broadcast_set_event(expires, 0);
}
/*
* Handle oneshot mode broadcasting
*/
static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
{
struct tick_device *td;
cpumask_t mask;
ktime_t now;
int cpu;
spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
mask = CPU_MASK_NONE;
now = ktime_get();
/* Find all expired events */
for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64)
cpu_set(cpu, mask);
}
/*
* Wakeup the cpus which have an expired event. The broadcast
* device is reprogrammed in the return from idle code.
*/
if (!tick_do_broadcast(mask)) {
/*
* The global event did not expire any CPU local
* events. This happens in dyntick mode, as the
* maximum PIT delta is quite small.
*/
if (tick_broadcast_reprogram())
goto again;
}
spin_unlock(&tick_broadcast_lock);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
void tick_broadcast_oneshot_control(unsigned long reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
int cpu;
spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Periodic mode does not care about the enter/exit of power
* states
*/
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
goto out;
bc = tick_broadcast_device.evtdev;
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
goto out;
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
cpu_set(cpu, tick_broadcast_oneshot_mask);
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
if (dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(dev->next_event, 1);
}
} else {
if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
cpu_clear(cpu, tick_broadcast_oneshot_mask);
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
if (dev->next_event.tv64 != KTIME_MAX)
tick_program_event(dev->next_event, 1);
}
}
out:
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/**
* tick_broadcast_setup_highres - setup the broadcast device for highres
*/
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
if (bc->mode != CLOCK_EVT_MODE_ONESHOT) {
bc->event_handler = tick_handle_oneshot_broadcast;
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
bc->next_event.tv64 = KTIME_MAX;
}
}
/*
* Select oneshot operating mode for the broadcast device
*/
void tick_broadcast_switch_to_oneshot(void)
{
struct clock_event_device *bc;
unsigned long flags;
spin_lock_irqsave(&tick_broadcast_lock, flags);
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Remove a dead CPU from broadcasting
*/
void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
{
struct clock_event_device *bc;
unsigned long flags;
unsigned int cpu = *cpup;
spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpu_clear(cpu, tick_broadcast_oneshot_mask);
if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) {
if (bc && cpus_empty(tick_broadcast_oneshot_mask))
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
}
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
#endif
......@@ -34,6 +34,16 @@ ktime_t tick_period;
static int tick_do_timer_cpu = -1;
DEFINE_SPINLOCK(tick_device_lock);
/**
* tick_is_oneshot_available - check for a oneshot capable event device
*/
int tick_is_oneshot_available(void)
{
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
}
/*
* Periodic tick
*/
......@@ -162,6 +172,8 @@ static void tick_setup_device(struct tick_device *td,
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);
else
tick_setup_oneshot(newdev, handler, next_event);
}
/*
......@@ -208,6 +220,12 @@ static int tick_check_new_device(struct clock_event_device *newdev)
* feature.
*/
if (curdev) {
/*
* Prefer one shot capable devices !
*/
if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
!(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
goto out_bc;
/*
* Check the rating
*/
......@@ -226,6 +244,8 @@ static int tick_check_new_device(struct clock_event_device *newdev)
}
clockevents_exchange_device(curdev, newdev);
tick_setup_device(td, newdev, cpu, cpumask);
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
spin_unlock_irqrestore(&tick_device_lock, flags);
return NOTIFY_STOP;
......@@ -285,7 +305,13 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
tick_broadcast_on_off(reason, dev);
break;
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
tick_broadcast_oneshot_control(reason);
break;
case CLOCK_EVT_NOTIFY_CPU_DEAD:
tick_shutdown_broadcast_oneshot(dev);
tick_shutdown_broadcast(dev);
tick_shutdown(dev);
break;
......
......@@ -9,13 +9,58 @@ extern ktime_t tick_period;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
/*
* NO_HZ / high resolution timer shared code
*/
#ifdef CONFIG_TICK_ONESHOT
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
# else /* BROADCAST */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
# endif /* !BROADCAST */
#else /* !ONESHOT */
static inline
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt)
{
BUG();
}
static inline int tick_program_event(ktime_t expires, int force)
{
return 0;
}
static inline void tick_oneshot_notify(void) { }
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
#endif /* !TICK_ONESHOT */
/*
* Broadcasting support
*/
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern int tick_do_broadcast(cpumask_t mask);
extern struct tick_device tick_broadcast_device;
extern spinlock_t tick_broadcast_lock;
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern int tick_check_broadcast_device(struct clock_event_device *dev);
......
/*
* linux/kernel/time/tick-oneshot.c
*
* This file contains functions which manage high resolution tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/irq.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include "tick-internal.h"
/**
* tick_program_event
*/
int tick_program_event(ktime_t expires, int force)