Commit abf917cd authored by Frederic Weisbecker's avatar Frederic Weisbecker
Browse files

cputime: Generic on-demand virtual cputime accounting



If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.

Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.

However implementing CONFIG_VIRT_CPU_ACCOUNTING require
low level hooks and involves more overhead. But we already
have a generic context tracking subsystem that is required
for RCU needs by archs which plan to shut down the tick
outside idle.

This patch implements a generic virtual based cputime
accounting that relies on these generic kernel/user hooks.

There are some upsides of doing this:

- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if context tracking is already built (already necessary for RCU in full
tickless mode).

- We can rely on the generic context tracking subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.

And one downside:

- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
parent ae8dda5c
......@@ -11,19 +11,19 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
* If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
* Otherwise we measure cpu time in jiffies using the generic definitions.
*/
#ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h>
#else
# include <asm/processor.h>
# include <asm-generic/cputime_nsecs.h>
extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __IA64_CPUTIME_H */
......@@ -31,7 +31,7 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
struct restart_block restart_block;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
__u64 ac_stamp;
__u64 ac_leave;
__u64 ac_stime;
......@@ -69,7 +69,7 @@ struct thread_info {
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->ac_stime = 0; \
......
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */
#define XEN_ACCOUNT_GET_STAMP \
MOV_FROM_ITC(pUStk, p6, r20, r2);
......
......@@ -41,7 +41,7 @@ void foo(void)
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
......
......@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
#endif
.global __paravirt_work_processed_syscall;
__paravirt_work_processed_syscall:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
adds r2=PT(LOADRS)+16,r12
MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
......@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
......@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) st1 [r15]=r17 // M2|3
#else
(pUStk) st1 [r14]=r17 // M2|3
......@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
COVER // B add current frame into dirty partition & set cr.ifs
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
mov r19=ar.bsp // M2 get new backing store pointer
st8 [r14]=r22 // M save time at leave
mov f10=f0 // F clear f10
......@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
.pred.rel.mutex pUStk,pKStk
MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave
......@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
......@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
#endif
;;
......@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
// mib : mov add br -> mib : ld8 add br
// bbb_ : br nop cover;; mbb_ : mov br cover;;
......
......@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
#else
nop.m 0
......@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
......
......@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
sched_clock = ia64_native_sched_clock
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
GLOBAL_ENTRY(cycle_to_cputime)
alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
......@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
END(cycle_to_cputime)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU
......
......@@ -784,7 +784,7 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
;;
mov b6=r30 // I0 setup syscall handler branch reg early
#else
......@@ -801,7 +801,7 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting
#else
mov b6=r30 // I0 setup syscall handler branch reg early
......@@ -817,7 +817,7 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
......@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
DBG_FAULT(16)
FAULT(16)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
/*
* There is no particular reason for this code to be here, other than
* that there happens to be space here that would go unused otherwise.
......
......@@ -4,7 +4,7 @@
#include "entry.h"
#include "paravirt_inst.h"
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc;
......
......@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
};
static struct clocksource *itc_clocksource;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/kernel_stat.h>
......@@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(vtime_delta(tsk));
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
......
CONFIG_PPC64=y
CONFIG_PPC_BOOK3E_64=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y
CONFIG_NR_CPUS=256
CONFIG_EXPERIMENTAL=y
......
CONFIG_PPC64=y
CONFIG_PPC_BOOK3E_64=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y
......
CONFIG_PPC64=y
CONFIG_ALTIVEC=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y
......
......@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
* If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
* the same units as the timebase. Otherwise we measure cpu time
* in jiffies using the generic definitions.
*/
......@@ -16,7 +16,7 @@
#ifndef __POWERPC_CPUTIME_H
#define __POWERPC_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm-generic/cputime.h>
#ifdef __KERNEL__
static inline void setup_cputime_one_jiffy(void) { }
......@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */
......@@ -145,7 +145,7 @@ struct dtl_entry {
extern struct kmem_cache *dtl_cache;
/*
* When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
* When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
* reading from the dispatch trace log. If other code wants to consume
* DTL entries, it can set this pointer to a function that will get
* called once for each DTL entry that gets processed.
......
......@@ -24,7 +24,7 @@
* user_time and system_time fields in the paca.
*/
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
#define ACCOUNT_CPU_USER_EXIT(ra, rb)
#define ACCOUNT_STOLEN_TIME
......@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_PPC_SPLPAR */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
* Macros for storing registers into and loading registers from
......
......@@ -94,7 +94,7 @@ system_call_common:
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
/* if from user, see if there are any DTL entries to process */
......@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
addi r9,r1,STACK_FRAME_OVERHEAD
33:
END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
/*
* A syscall should always be called with interrupts enabled
......
......@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Factors for converting from cputime_t (timebase ticks) to
* jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
......@@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk)
account_user_time(tsk, utime, utimescaled);
}
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#define calc_cputime_factors()
#endif
......
......@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
*/
static int dtl_buf_entries = N_DISPATCH_LOG;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct dtl_ring {
u64 write_index;
struct dtl_entry *write_ptr;
......@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
return per_cpu(dtl_rings, dtl->cpu).write_index;
}
#else /* CONFIG_VIRT_CPU_ACCOUNTING */
#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int dtl_start(struct dtl *dtl)
{
......@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
{
return lppaca_of(dtl->cpu).dtl_idx;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int dtl_enable(struct dtl *dtl)
{
......
......@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
struct kmem_cache *dtl_cache;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Allocate space for the dispatch trace log for all possible cpus
* and register the buffers with the hypervisor. This is used for
......@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
return 0;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline int alloc_dispatch_logs(void)
{
return 0;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int alloc_dispatch_log_kmem_cache(void)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment