Commit 9f0c1e56 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: rt-group: interface

Change the rt_ratio interface to rt_runtime_us, to match rt_period_us.
This avoids picking a granularity for the ratio.

Extend the /sys/kernel/uids/<uid>/ interface to allow setting
the group's rt_runtime.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 23b0fdfc
Real-Time group scheduling.
The problem space:
In order to schedule multiple groups of realtime tasks each group must
be assigned a fixed portion of the CPU time available. Without a minimum
guarantee a realtime group can obviously fall short. A fuzzy upper limit
is of no use since it cannot be relied upon. Which leaves us with just
the single fixed portion.
CPU time is divided by means of specifying how much time can be spent
running in a given period. Say a frame fixed realtime renderer must
deliver 25 frames a second, which yields a period of 0.04s. Now say
it will also have to play some music and respond to input, leaving it
with around 80% for the graphics. We can then give this group a runtime
of 0.8 * 0.04s = 0.032s.
This way the graphics group will have a 0.04s period with a 0.032s runtime
limit.
Now if the audio thread needs to refill the DMA buffer every 0.005s, but
needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
= 0.00015s.
The Interface:
system wide:
/proc/sys/kernel/sched_rt_period_ms
/proc/sys/kernel/sched_rt_runtime_us
CONFIG_FAIR_USER_SCHED
/sys/kernel/uids/<uid>/cpu_rt_runtime_us
or
CONFIG_FAIR_CGROUP_SCHED
/cgroup/<cgroup>/cpu.rt_runtime_us
[ time is specified in us because the interface is s32; this gives an
operating range of ~35m to 1us ]
The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
A runtime of -1 specifies runtime == period, ie. no limit.
New groups get the period from /proc/sys/kernel/sched_rt_period_us and
a runtime of 0.
Settings are constrained to:
\Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
in order to keep the configuration schedulable.
......@@ -1541,8 +1541,6 @@ extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_rt_period;
extern unsigned int sysctl_sched_rt_ratio;
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
extern unsigned int sysctl_sched_min_bal_int_shares;
extern unsigned int sysctl_sched_max_bal_int_shares;
......@@ -1552,6 +1550,8 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
loff_t *ppos);
#endif
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
extern unsigned int sysctl_sched_compat_yield;
......@@ -2036,6 +2036,9 @@ extern void sched_destroy_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern unsigned long sched_group_shares(struct task_group *tg);
extern int sched_group_set_rt_runtime(struct task_group *tg,
long rt_runtime_us);
extern long sched_group_rt_runtime(struct task_group *tg);
#endif
......
......@@ -176,7 +176,7 @@ struct task_group {
struct sched_rt_entity **rt_se;
struct rt_rq **rt_rq;
unsigned int rt_ratio;
u64 rt_runtime;
/*
* shares assigned to a task group governs how much of cpu bandwidth
......@@ -642,19 +642,21 @@ const_debug unsigned int sysctl_sched_features =
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
* period over which we measure -rt task cpu usage in ms.
* period over which we measure -rt task cpu usage in us.
* default: 1s
*/
const_debug unsigned int sysctl_sched_rt_period = 1000;
unsigned int sysctl_sched_rt_period = 1000000;
#define SCHED_RT_FRAC_SHIFT 16
#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT)
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
*/
int sysctl_sched_rt_runtime = 950000;
/*
* ratio of time -rt tasks may consume.
* default: 95%
* single value that denotes runtime == period, ie unlimited time.
*/
const_debug unsigned int sysctl_sched_rt_ratio = 62259;
#define RUNTIME_INF ((u64)~0ULL)
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
......@@ -7187,7 +7189,8 @@ void __init sched_init(void)
&per_cpu(init_cfs_rq, i),
&per_cpu(init_sched_entity, i), i, 1);
init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
init_task_group.rt_runtime =
sysctl_sched_rt_runtime * NSEC_PER_USEC;
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
init_tg_rt_entry(rq, &init_task_group,
&per_cpu(init_rt_rq, i),
......@@ -7583,7 +7586,7 @@ struct task_group *sched_create_group(void)
goto err;
tg->shares = NICE_0_LOAD;
tg->rt_ratio = 0; /* XXX */
tg->rt_runtime = 0;
for_each_possible_cpu(i) {
rq = cpu_rq(i);
......@@ -7785,30 +7788,76 @@ unsigned long sched_group_shares(struct task_group *tg)
}
/*
* Ensure the total rt_ratio <= sysctl_sched_rt_ratio
* Ensure that the real time constraints are schedulable.
*/
int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
static DEFINE_MUTEX(rt_constraints_mutex);
static unsigned long to_ratio(u64 period, u64 runtime)
{
if (runtime == RUNTIME_INF)
return 1ULL << 16;
runtime *= (1ULL << 16);
div64_64(runtime, period);
return runtime;
}
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
struct task_group *tgi;
unsigned long total = 0;
unsigned long global_ratio =
to_ratio(sysctl_sched_rt_period,
sysctl_sched_rt_runtime < 0 ?
RUNTIME_INF : sysctl_sched_rt_runtime);
rcu_read_lock();
list_for_each_entry_rcu(tgi, &task_groups, list)
total += tgi->rt_ratio;
rcu_read_unlock();
list_for_each_entry_rcu(tgi, &task_groups, list) {
if (tgi == tg)
continue;
if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
return -EINVAL;
total += to_ratio(period, tgi->rt_runtime);
}
rcu_read_unlock();
tg->rt_ratio = rt_ratio;
return 0;
return total + to_ratio(period, runtime) < global_ratio;
}
unsigned long sched_group_rt_ratio(struct task_group *tg)
int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
{
return tg->rt_ratio;
u64 rt_runtime, rt_period;
int err = 0;
rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
if (rt_runtime_us == -1)
rt_runtime = rt_period;
mutex_lock(&rt_constraints_mutex);
if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
err = -EINVAL;
goto unlock;
}
if (rt_runtime_us == -1)
rt_runtime = RUNTIME_INF;
tg->rt_runtime = rt_runtime;
unlock:
mutex_unlock(&rt_constraints_mutex);
return err;
}
long sched_group_rt_runtime(struct task_group *tg)
{
u64 rt_runtime_us;
if (tg->rt_runtime == RUNTIME_INF)
return -1;
rt_runtime_us = tg->rt_runtime;
do_div(rt_runtime_us, NSEC_PER_USEC);
return rt_runtime_us;
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_FAIR_CGROUP_SCHED
......@@ -7884,17 +7933,49 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
return (u64) tg->shares;
}
static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
u64 rt_ratio_val)
static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
char buffer[64];
int retval = 0;
s64 val;
char *end;
if (!nbytes)
return -EINVAL;
if (nbytes >= sizeof(buffer))
return -E2BIG;
if (copy_from_user(buffer, userbuf, nbytes))
return -EFAULT;
buffer[nbytes] = 0; /* nul-terminate */
/* strip newline if necessary */
if (nbytes && (buffer[nbytes-1] == '\n'))
buffer[nbytes-1] = 0;
val = simple_strtoll(buffer, &end, 0);
if (*end)
return -EINVAL;
/* Pass to subsystem */
retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
if (!retval)
retval = nbytes;
return retval;
}
static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos)
{
struct task_group *tg = cgroup_tg(cgrp);
char tmp[64];
long val = sched_group_rt_runtime(cgroup_tg(cgrp));
int len = sprintf(tmp, "%ld\n", val);
return (u64) tg->rt_ratio;
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static struct cftype cpu_files[] = {
......@@ -7904,9 +7985,9 @@ static struct cftype cpu_files[] = {
.write_uint = cpu_shares_write_uint,
},
{
.name = "rt_ratio",
.read_uint = cpu_rt_ratio_read_uint,
.write_uint = cpu_rt_ratio_write_uint,
.name = "rt_runtime_us",
.read = cpu_rt_runtime_read,
.write = cpu_rt_runtime_write,
},
};
......
......@@ -57,12 +57,12 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
#ifdef CONFIG_FAIR_GROUP_SCHED
static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
if (!rt_rq->tg)
return SCHED_RT_FRAC;
return RUNTIME_INF;
return rt_rq->tg->rt_ratio;
return rt_rq->tg->rt_runtime;
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
......@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
struct sched_rt_entity *rt_se = rt_rq->rt_se;
......@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
}
}
static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
struct sched_rt_entity *rt_se = rt_rq->rt_se;
......@@ -129,9 +129,12 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
#else
static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
return sysctl_sched_rt_ratio;
if (sysctl_sched_rt_runtime == -1)
return RUNTIME_INF;
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
......@@ -158,11 +161,11 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
return NULL;
}
static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
}
static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
}
......@@ -184,28 +187,24 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
return rt_task_of(rt_se)->prio;
}
static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
unsigned int rt_ratio = sched_rt_ratio(rt_rq);
u64 period, ratio;
u64 runtime = sched_rt_runtime(rt_rq);
if (rt_ratio == SCHED_RT_FRAC)
if (runtime == RUNTIME_INF)
return 0;
if (rt_rq->rt_throttled)
return rt_rq_throttled(rt_rq);
period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
if (rt_rq->rt_time > ratio) {
if (rt_rq->rt_time > runtime) {
struct rq *rq = rq_of_rt_rq(rt_rq);
rq->rt_throttled = 1;
rt_rq->rt_throttled = 1;
if (rt_rq_throttled(rt_rq)) {
sched_rt_ratio_dequeue(rt_rq);
sched_rt_rq_dequeue(rt_rq);
return 1;
}
}
......@@ -219,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq)
u64 period;
while (rq->clock > rq->rt_period_expire) {
period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
rq->rt_period_expire += period;
for_each_leaf_rt_rq(rt_rq, rq) {
unsigned long rt_ratio = sched_rt_ratio(rt_rq);
u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
u64 runtime = sched_rt_runtime(rt_rq);
rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
if (rt_rq->rt_throttled) {
rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
rt_rq->rt_throttled = 0;
sched_rt_ratio_enqueue(rt_rq);
sched_rt_rq_enqueue(rt_rq);
}
}
......@@ -262,12 +260,7 @@ static void update_curr_rt(struct rq *rq)
cpuacct_charge(curr, delta_exec);
rt_rq->rt_time += delta_exec;
/*
* might make it a tad more accurate:
*
* update_sched_rt_period(rq);
*/
if (sched_rt_ratio_exceeded(rt_rq))
if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
}
......
......@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_rt_period_ms",
.data = &sysctl_sched_rt_period,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_rt_ratio",
.data = &sysctl_sched_rt_ratio,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
{
.ctl_name = CTL_UNNUMBERED,
......@@ -346,6 +330,22 @@ static struct ctl_table kern_table[] = {
},
#endif
#endif
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_rt_runtime_us",
.data = &sysctl_sched_rt_runtime,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
......
......@@ -164,9 +164,37 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
static struct kobj_attribute cpu_share_attr =
__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
}
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t size)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
unsigned long rt_runtime;
int rc;
sscanf(buf, "%lu", &rt_runtime);
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
return (rc ? rc : size);
}
static struct kobj_attribute cpu_rt_runtime_attr =
__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
/* default attributes per uid directory */
static struct attribute *uids_attributes[] = {
&cpu_share_attr.attr,
&cpu_rt_runtime_attr.attr,
NULL
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment