Commit b2c77a57 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'full-dynticks-cputime-for-mingo' of...

Merge tag 'full-dynticks-cputime-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks

 into sched/core

Pull full-dynticks (user-space execution is undisturbed and
receives no timer IRQs) preparation changes that convert the
cputime accounting code to be full-dynticks ready,
from Frederic Weisbecker:

 "This implements the cputime accounting on full dynticks CPUs.

  Typical cputime stats infrastructure relies on the timer tick and
  its periodic polling on the CPU to account the amount of time
  spent by the CPUs and the tasks per high level domains such as
  userspace, kernelspace, guest, ...

  Now we are preparing to implement full dynticks capability on
  Linux for Real Time and HPC users who want full CPU isolation.
  This feature requires a cputime accounting that doesn't depend
  on the timer tick.

  To implement it, this new cputime infrastructure plugs into
  kernel/user/guest boundaries to take snapshots of cputime and
  flush these to the stats when needed. This performs pretty
  much like CONFIG_VIRT_CPU_ACCOUNTING except that context location
  and cputime snaphots are synchronized between write and read
  side such that the latter can safely retrieve the pending tickless
  cputime of a task and add it to its latest cputime snapshot to
  return the correct result to the user."
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents c3c18640 6a61671b
......@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
struct kmem_cache *dtl_cache;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Allocate space for the dispatch trace log for all possible cpus
* and register the buffers with the hypervisor. This is used for
......@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
return 0;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline int alloc_dispatch_logs(void)
{
return 0;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int alloc_dispatch_log_kmem_cache(void)
{
......
......@@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
void vtime_account(struct task_struct *tsk)
void vtime_account_irq_enter(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;
......@@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)
virt_timer_forward(system);
}
EXPORT_SYMBOL_GPL(vtime_account);
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
void vtime_account_system(struct task_struct *tsk)
__attribute__((alias("vtime_account")));
__attribute__((alias("vtime_account_irq_enter")));
EXPORT_SYMBOL_GPL(vtime_account_system);
void __kprobes vtime_stop_cpu(void)
......
......@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
cputime_t stime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
......@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc:
task_cputime(current, NULL, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
last_jiffies = jiffies;
last_stime = current->stime;
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
idle_percentage = current->stime - last_stime;
idle_percentage = stime - last_stime;
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
if (apm_info.forbid_idle)
use_apm_idle = 0;
last_jiffies = jiffies;
last_stime = current->stime;
}
last_jiffies = jiffies;
last_stime = stime;
bucket = IDLE_LEAKY_MAX;
while (!need_resched()) {
......
......@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/mISDNif.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include "core.h"
static u_int *debug;
......@@ -202,6 +203,9 @@ static int
mISDNStackd(void *data)
{
struct mISDNstack *st = data;
#ifdef MISDN_MSG_STATS
cputime_t utime, stime;
#endif
int err = 0;
sigfillset(&current->blocked);
......@@ -303,9 +307,10 @@ mISDNStackd(void *data)
"msg %d sleep %d stopped\n",
dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
st->stopped_cnt);
task_cputime(st->thread, &utime, &stime);
printk(KERN_DEBUG
"mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
dev_name(&st->dev->dev), st->thread->utime, st->thread->stime);
dev_name(&st->dev->dev), utime, stime);
printk(KERN_DEBUG
"mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
......
......@@ -33,6 +33,7 @@
#include <linux/elf.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
......@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
cputime_to_timeval(p->utime, &prstatus->pr_utime);
cputime_to_timeval(p->stime, &prstatus->pr_stime);
cputime_t utime, stime;
task_cputime(p, &utime, &stime);
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
......
......@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
cputime_to_timeval(p->utime, &prstatus->pr_utime);
cputime_to_timeval(p->stime, &prstatus->pr_stime);
cputime_t utime, stime;
task_cputime(p, &utime, &stime);
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
......
......@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime += t->gtime;
gtime += task_gtime(t);
t = next_thread(t);
} while (t != task);
......@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt = task->min_flt;
maj_flt = task->maj_flt;
task_cputime_adjusted(task, &utime, &stime);
gtime = task->gtime;
gtime = task_gtime(task);
}
/* scale priority and nice values from timeslices to -20..20 */
......
......@@ -4,66 +4,12 @@
#include <linux/time.h>
#include <linux/jiffies.h>
typedef unsigned long __nocast cputime_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
typedef u64 __nocast cputime64_t;
#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
#define nsecs_to_cputime64(__ct) \
jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
/*
* Convert cputime to microseconds and back.
*/
#define cputime_to_usecs(__ct) \
jiffies_to_usecs(cputime_to_jiffies(__ct))
#define usecs_to_cputime(__usec) \
jiffies_to_cputime(usecs_to_jiffies(__usec))
#define usecs_to_cputime64(__usec) \
jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
/*
* Convert cputime to seconds and back.
*/
#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
/*
* Convert cputime to timespec and back.
*/
#define timespec_to_cputime(__val) \
jiffies_to_cputime(timespec_to_jiffies(__val))
#define cputime_to_timespec(__ct,__val) \
jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to timeval and back.
*/
#define timeval_to_cputime(__val) \
jiffies_to_cputime(timeval_to_jiffies(__val))
#define cputime_to_timeval(__ct,__val) \
jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to clock and back.
*/
#define cputime_to_clock_t(__ct) \
jiffies_to_clock_t(cputime_to_jiffies(__ct))
#define clock_t_to_cputime(__x) \
jiffies_to_cputime(clock_t_to_jiffies(__x))
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
# include <asm-generic/cputime_jiffies.h>
#endif
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# include <asm-generic/cputime_nsecs.h>
#endif
#endif
#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
#define _ASM_GENERIC_CPUTIME_JIFFIES_H
typedef unsigned long __nocast cputime_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
typedef u64 __nocast cputime64_t;
#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
/*
* Convert nanoseconds to cputime
*/
#define nsecs_to_cputime64(__nsec) \
jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
#define nsecs_to_cputime(__nsec) \
jiffies_to_cputime(nsecs_to_jiffies(__nsec))
/*
* Convert cputime to microseconds and back.
*/
#define cputime_to_usecs(__ct) \
jiffies_to_usecs(cputime_to_jiffies(__ct))
#define usecs_to_cputime(__usec) \
jiffies_to_cputime(usecs_to_jiffies(__usec))
#define usecs_to_cputime64(__usec) \
jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
/*
* Convert cputime to seconds and back.
*/
#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
/*
* Convert cputime to timespec and back.
*/
#define timespec_to_cputime(__val) \
jiffies_to_cputime(timespec_to_jiffies(__val))
#define cputime_to_timespec(__ct,__val) \
jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to timeval and back.
*/
#define timeval_to_cputime(__val) \
jiffies_to_cputime(timeval_to_jiffies(__val))
#define cputime_to_timeval(__ct,__val) \
jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to clock and back.
*/
#define cputime_to_clock_t(__ct) \
jiffies_to_clock_t(cputime_to_jiffies(__ct))
#define clock_t_to_cputime(__x) \
jiffies_to_cputime(clock_t_to_jiffies(__x))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
#endif
/*
* Definitions for measuring cputime in nsecs resolution.
*
* Based on <arch/ia64/include/asm/cputime.h>
*
* Copyright (C) 2007 FUJITSU LIMITED
* Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
#define _ASM_GENERIC_CPUTIME_NSECS_H
typedef u64 __nocast cputime_t;
typedef u64 __nocast cputime64_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
/*
* Convert cputime <-> jiffies (HZ)
*/
#define cputime_to_jiffies(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__jif) \
(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define jiffies64_to_cputime64(__jif) \
(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> nanoseconds
*/
#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
/*
* Convert cputime <-> microseconds
*/
#define cputime_to_usecs(__ct) \
((__force u64)(__ct) / NSEC_PER_USEC)
#define usecs_to_cputime(__usecs) \
(__force cputime_t)((__usecs) * NSEC_PER_USEC)
#define usecs_to_cputime64(__usecs) \
(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
/*
* Convert cputime <-> seconds
*/
#define cputime_to_secs(__ct) \
((__force u64)(__ct) / NSEC_PER_SEC)
#define secs_to_cputime(__secs) \
(__force cputime_t)((__secs) * NSEC_PER_SEC)
/*
* Convert cputime <-> timespec (nsec)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *val)
{
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
return (__force cputime_t) ret;
}
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
{
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
}
/*
* Convert cputime <-> timeval (msec)
*/
static inline cputime_t timeval_to_cputime(struct timeval *val)
{
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
return (__force cputime_t) ret;
}
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
{
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
}
/*
* Convert cputime <-> clock (USER_HZ)
*/
#define cputime_to_clock_t(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
#define clock_t_to_cputime(__x) \
(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
cputime_to_clock_t((__force cputime_t)__ct)
#endif
......@@ -3,12 +3,40 @@
#ifdef CONFIG_CONTEXT_TRACKING
#include <linux/sched.h>
#include <linux/percpu.h>
struct context_tracking {
/*
* When active is false, probes are unset in order
* to minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
bool active;
enum {
IN_KERNEL = 0,
IN_USER,
} state;
};
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_in_user(void)
{
return __this_cpu_read(context_tracking.state) == IN_USER;
}
static inline bool context_tracking_active(void)
{
return __this_cpu_read(context_tracking.active);
}
extern void user_enter(void);
extern void user_exit(void);
extern void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline void context_tracking_task_switch(struct task_struct *prev,
......
......@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
vtime_account_irq_enter(current); \
account_irq_enter_time(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
......@@ -169,7 +169,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
vtime_account_irq_exit(current); \
account_irq_exit_time(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)
......
......@@ -10,6 +10,7 @@
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/securebits.h>
#include <linux/seqlock.h>
#include <net/net_namespace.h>
#ifdef CONFIG_SMP
......@@ -141,6 +142,15 @@ extern struct task_group root_task_group;
# define INIT_PERF_EVENTS(tsk)
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \
.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
.vtime_snap = 0, \
.vtime_snap_whence = VTIME_SYS,
#else
# define INIT_VTIME(tsk)
#endif
#define INIT_TASK_COMM "swapper"
/*
......@@ -210,6 +220,7 @@ extern struct task_group root_task_group;
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ \
INIT_VTIME(tsk) \
}
......
......@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
......
......@@ -22,6 +22,7 @@
#include <linux/rcupdate.h>
#include <linux/ratelimit.h>
#include <linux/err.h>
#include <linux/irqflags.h>
#include <asm/signal.h>
#include <linux/kvm.h>
......@@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
}
#endif /* CONFIG_IOMMU_API */
static inline void kvm_guest_enter(void)
static inline void __guest_enter(void)
{
BUG_ON(preemptible());
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system_irqsafe(current);
vtime_account_system(current);
current->flags |= PF_VCPU;
}
static inline void __guest_exit(void)
{
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system(current);
current->flags &= ~PF_VCPU;
}
#ifdef CONFIG_CONTEXT_TRACKING
extern void guest_enter(void);
extern void guest_exit(void);
#else /* !CONFIG_CONTEXT_TRACKING */
static inline void guest_enter(void)
{
__guest_enter();
}
static inline void guest_exit(void)
{
__guest_exit();
}
#endif /* !CONFIG_CONTEXT_TRACKING */
static inline void kvm_guest_enter(void)
{
unsigned long flags;
BUG_ON(preemptible());
local_irq_save(flags);
guest_enter();
local_irq_restore(flags);
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspase from rcu point of view. In
......@@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
static inline void kvm_guest_exit(void)
{
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system_irqsafe(current);
current->flags &= ~PF_VCPU;
unsigned long flags;
local_irq_save(flags);
guest_exit();
local_irq_restore(flags);
}
/*
......
......@@ -1368,6 +1368,15 @@ struct task_struct {
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
struct cputime prev_cputime;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_t vtime_seqlock;
unsigned long long vtime_snap;
enum {
VTIME_SLEEPING = 0,
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
......@@ -1793,6 +1802,37 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}