Commit ca78f6ba authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq

* master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq:
  Move workqueue exports to where the functions are defined.
  [CPUFREQ] Misc cleanups in ondemand.
  [CPUFREQ] Make ondemand sampling per CPU and remove the mutex usage in sampling path.
  [CPUFREQ] Add queue_delayed_work_on() interface for workqueues.
  [CPUFREQ] Remove slowdown from ondemand sampling path.
parents 7ad7153b ae90dd5d
......@@ -12,22 +12,11 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/ctype.h>
#include <linux/cpufreq.h>
#include <linux/sysctl.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/kmod.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
/*
......@@ -56,16 +45,15 @@ static unsigned int def_sampling_rate;
#define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
#define MAX_SAMPLING_RATE (500 * def_sampling_rate)
#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000)
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10)
#define TRANSITION_LATENCY_LIMIT (10 * 1000)
static void do_dbs_timer(void *data);
struct cpu_dbs_info_s {
cputime64_t prev_cpu_idle;
cputime64_t prev_cpu_wall;
struct cpufreq_policy *cur_policy;
unsigned int prev_cpu_idle_up;
unsigned int prev_cpu_idle_down;
struct work_struct work;
unsigned int enable;
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
......@@ -80,31 +68,32 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
* cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
* is recursive for the same process. -Venki
*/
static DEFINE_MUTEX (dbs_mutex);
static DECLARE_WORK (dbs_work, do_dbs_timer, NULL);
static DEFINE_MUTEX(dbs_mutex);
static struct workqueue_struct *dbs_workq;
static struct workqueue_struct *kondemand_wq;
struct dbs_tuners {
unsigned int sampling_rate;
unsigned int sampling_down_factor;
unsigned int up_threshold;
unsigned int ignore_nice;
};
static struct dbs_tuners dbs_tuners_ins = {
.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
.ignore_nice = 0,
};
static inline unsigned int get_cpu_idle_time(unsigned int cpu)
static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
{
return kstat_cpu(cpu).cpustat.idle +
kstat_cpu(cpu).cpustat.iowait +
( dbs_tuners_ins.ignore_nice ?
kstat_cpu(cpu).cpustat.nice :
0);
cputime64_t retval;
retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
kstat_cpu(cpu).cpustat.iowait);
if (dbs_tuners_ins.ignore_nice)
retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
return retval;
}
/************************** sysfs interface ************************/
......@@ -133,35 +122,15 @@ static ssize_t show_##file_name \
return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
}
show_one(sampling_rate, sampling_rate);
show_one(sampling_down_factor, sampling_down_factor);
show_one(up_threshold, up_threshold);
show_one(ignore_nice_load, ignore_nice);
static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
const char *buf, size_t count)
{
unsigned int input;
int ret;
ret = sscanf (buf, "%u", &input);
if (ret != 1 )
return -EINVAL;
if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
return -EINVAL;
mutex_lock(&dbs_mutex);
dbs_tuners_ins.sampling_down_factor = input;
mutex_unlock(&dbs_mutex);
return count;
}
static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
const char *buf, size_t count)
{
unsigned int input;
int ret;
ret = sscanf (buf, "%u", &input);
ret = sscanf(buf, "%u", &input);
mutex_lock(&dbs_mutex);
if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) {
......@@ -180,7 +149,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused,
{
unsigned int input;
int ret;
ret = sscanf (buf, "%u", &input);
ret = sscanf(buf, "%u", &input);
mutex_lock(&dbs_mutex);
if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
......@@ -203,7 +172,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
unsigned int j;
ret = sscanf (buf, "%u", &input);
ret = sscanf(buf, "%u", &input);
if ( ret != 1 )
return -EINVAL;
......@@ -217,12 +186,12 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
}
dbs_tuners_ins.ignore_nice = input;
/* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) {
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
struct cpu_dbs_info_s *dbs_info;
dbs_info = &per_cpu(cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
dbs_info->prev_cpu_wall = get_jiffies_64();
}
mutex_unlock(&dbs_mutex);
......@@ -234,7 +203,6 @@ static struct freq_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)
define_one_rw(sampling_rate);
define_one_rw(sampling_down_factor);
define_one_rw(up_threshold);
define_one_rw(ignore_nice_load);
......@@ -242,7 +210,6 @@ static struct attribute * dbs_attributes[] = {
&sampling_rate_max.attr,
&sampling_rate_min.attr,
&sampling_rate.attr,
&sampling_down_factor.attr,
&up_threshold.attr,
&ignore_nice_load.attr,
NULL
......@@ -255,26 +222,27 @@ static struct attribute_group dbs_attr_group = {
/************************** sysfs end ************************/
static void dbs_check_cpu(int cpu)
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
{
unsigned int idle_ticks, up_idle_ticks, total_ticks;
unsigned int freq_next;
unsigned int freq_down_sampling_rate;
static int down_skip[NR_CPUS];
struct cpu_dbs_info_s *this_dbs_info;
unsigned int idle_ticks, total_ticks;
unsigned int load;
cputime64_t cur_jiffies;
struct cpufreq_policy *policy;
unsigned int j;
this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
if (!this_dbs_info->enable)
return;
policy = this_dbs_info->cur_policy;
cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
this_dbs_info->prev_cpu_wall);
this_dbs_info->prev_cpu_wall = cur_jiffies;
/*
* Every sampling_rate, we check, if current idle time is less
* than 20% (default), then we try to increase frequency
* Every sampling_rate*sampling_down_factor, we look for a the lowest
* Every sampling_rate, we look for a the lowest
* frequency which can sustain the load while keeping idle time over
* 30%. If such a frequency exist, we try to decrease to this frequency.
*
......@@ -283,36 +251,26 @@ static void dbs_check_cpu(int cpu)
* 5% (default) of current frequency
*/
/* Check for frequency increase */
/* Get Idle Time */
idle_ticks = UINT_MAX;
for_each_cpu_mask(j, policy->cpus) {
unsigned int tmp_idle_ticks, total_idle_ticks;
cputime64_t total_idle_ticks;
unsigned int tmp_idle_ticks;
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
total_idle_ticks = get_cpu_idle_time(j);
tmp_idle_ticks = total_idle_ticks -
j_dbs_info->prev_cpu_idle_up;
j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
j_dbs_info->prev_cpu_idle);
j_dbs_info->prev_cpu_idle = total_idle_ticks;
if (tmp_idle_ticks < idle_ticks)
idle_ticks = tmp_idle_ticks;
}
load = (100 * (total_ticks - idle_ticks)) / total_ticks;
/* Scale idle ticks by 100 and compare with up and down ticks */
idle_ticks *= 100;
up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
if (idle_ticks < up_idle_ticks) {
down_skip[cpu] = 0;
for_each_cpu_mask(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
j_dbs_info->prev_cpu_idle_down =
j_dbs_info->prev_cpu_idle_up;
}
/* Check for frequency increase */
if (load > dbs_tuners_ins.up_threshold) {
/* if we are already at full speed then break out early */
if (policy->cur == policy->max)
return;
......@@ -323,83 +281,49 @@ static void dbs_check_cpu(int cpu)
}
/* Check for frequency decrease */
down_skip[cpu]++;
if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
return;
idle_ticks = UINT_MAX;
for_each_cpu_mask(j, policy->cpus) {
unsigned int tmp_idle_ticks, total_idle_ticks;
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
/* Check for frequency decrease */
total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
tmp_idle_ticks = total_idle_ticks -
j_dbs_info->prev_cpu_idle_down;
j_dbs_info->prev_cpu_idle_down = total_idle_ticks;
if (tmp_idle_ticks < idle_ticks)
idle_ticks = tmp_idle_ticks;
}
down_skip[cpu] = 0;
/* if we cannot reduce the frequency anymore, break out early */
if (policy->cur == policy->min)
return;
/* Compute how many ticks there are between two measurements */
freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
dbs_tuners_ins.sampling_down_factor;
total_ticks = usecs_to_jiffies(freq_down_sampling_rate);
/*
* The optimal frequency is the frequency that is the lowest that
* can support the current CPU usage without triggering the up
* policy. To be safe, we focus 10 points under the threshold.
*/
freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks;
freq_next = (freq_next * policy->cur) /
if (load < (dbs_tuners_ins.up_threshold - 10)) {
unsigned int freq_next;
freq_next = (policy->cur * load) /
(dbs_tuners_ins.up_threshold - 10);
if (freq_next < policy->min)
freq_next = policy->min;
if (freq_next <= ((policy->cur * 95) / 100))
__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
}
}
static void do_dbs_timer(void *data)
{
int i;
lock_cpu_hotplug();
mutex_lock(&dbs_mutex);
for_each_online_cpu(i)
dbs_check_cpu(i);
queue_delayed_work(dbs_workq, &dbs_work,
usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
mutex_unlock(&dbs_mutex);
unlock_cpu_hotplug();
unsigned int cpu = smp_processor_id();
struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
dbs_check_cpu(dbs_info);
queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
}
static inline void dbs_timer_init(void)
static inline void dbs_timer_init(unsigned int cpu)
{
INIT_WORK(&dbs_work, do_dbs_timer, NULL);
if (!dbs_workq)
dbs_workq = create_singlethread_workqueue("ondemand");
if (!dbs_workq) {
printk(KERN_ERR "ondemand: Cannot initialize kernel thread\n");
return;
}
queue_delayed_work(dbs_workq, &dbs_work,
usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
return;
}
static inline void dbs_timer_exit(void)
static inline void dbs_timer_exit(unsigned int cpu)
{
if (dbs_workq)
cancel_rearming_delayed_workqueue(dbs_workq, &dbs_work);
struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
cancel_rearming_delayed_workqueue(kondemand_wq, &dbs_info->work);
}
static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
......@@ -413,8 +337,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
switch (event) {
case CPUFREQ_GOV_START:
if ((!cpu_online(cpu)) ||
(!policy->cur))
if ((!cpu_online(cpu)) || (!policy->cur))
return -EINVAL;
if (policy->cpuinfo.transition_latency >
......@@ -427,18 +350,26 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
break;
mutex_lock(&dbs_mutex);
dbs_enable++;
if (dbs_enable == 1) {
kondemand_wq = create_workqueue("kondemand");
if (!kondemand_wq) {
printk(KERN_ERR "Creation of kondemand failed\n");
dbs_enable--;
mutex_unlock(&dbs_mutex);
return -ENOSPC;
}
}
for_each_cpu_mask(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
j_dbs_info->cur_policy = policy;
j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
j_dbs_info->prev_cpu_idle_down
= j_dbs_info->prev_cpu_idle_up;
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
j_dbs_info->prev_cpu_wall = get_jiffies_64();
}
this_dbs_info->enable = 1;
sysfs_create_group(&policy->kobj, &dbs_attr_group);
dbs_enable++;
/*
* Start the timerschedule work, when this governor
* is used for first time
......@@ -457,23 +388,20 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
def_sampling_rate = MIN_STAT_SAMPLING_RATE;
dbs_tuners_ins.sampling_rate = def_sampling_rate;
dbs_timer_init();
}
dbs_timer_init(policy->cpu);
mutex_unlock(&dbs_mutex);
break;
case CPUFREQ_GOV_STOP:
mutex_lock(&dbs_mutex);
dbs_timer_exit(policy->cpu);
this_dbs_info->enable = 0;
sysfs_remove_group(&policy->kobj, &dbs_attr_group);
dbs_enable--;
/*
* Stop the timerschedule work, when this governor
* is used for first time
*/
if (dbs_enable == 0)
dbs_timer_exit();
destroy_workqueue(kondemand_wq);
mutex_unlock(&dbs_mutex);
......@@ -483,13 +411,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
lock_cpu_hotplug();
mutex_lock(&dbs_mutex);
if (policy->max < this_dbs_info->cur_policy->cur)
__cpufreq_driver_target(
this_dbs_info->cur_policy,
policy->max, CPUFREQ_RELATION_H);
__cpufreq_driver_target(this_dbs_info->cur_policy,
policy->max,
CPUFREQ_RELATION_H);
else if (policy->min > this_dbs_info->cur_policy->cur)
__cpufreq_driver_target(
this_dbs_info->cur_policy,
policy->min, CPUFREQ_RELATION_L);
__cpufreq_driver_target(this_dbs_info->cur_policy,
policy->min,
CPUFREQ_RELATION_L);
mutex_unlock(&dbs_mutex);
unlock_cpu_hotplug();
break;
......@@ -498,9 +426,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
}
static struct cpufreq_governor cpufreq_gov_dbs = {
.name = "ondemand",
.governor = cpufreq_governor_dbs,
.owner = THIS_MODULE,
.name = "ondemand",
.governor = cpufreq_governor_dbs,
.owner = THIS_MODULE,
};
static int __init cpufreq_gov_dbs_init(void)
......@@ -510,21 +438,15 @@ static int __init cpufreq_gov_dbs_init(void)
static void __exit cpufreq_gov_dbs_exit(void)
{
/* Make sure that the scheduled work is indeed not running.
Assumes the timer has been cancelled first. */
if (dbs_workq) {
flush_workqueue(dbs_workq);
destroy_workqueue(dbs_workq);
}
cpufreq_unregister_governor(&cpufreq_gov_dbs);
}
MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for "
"Low Latency Frequency Transition capable processors");
MODULE_LICENSE ("GPL");
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
"Low Latency Frequency Transition capable processors");
MODULE_LICENSE("GPL");
module_init(cpufreq_gov_dbs_init);
module_exit(cpufreq_gov_dbs_exit);
......@@ -24,7 +24,9 @@ typedef u64 cputime64_t;
#define cputime64_zero (0ULL)
#define cputime64_add(__a, __b) ((__a) + (__b))
#define cputime64_sub(__a, __b) ((__a) - (__b))
#define cputime64_to_jiffies64(__ct) (__ct)
#define jiffies64_to_cputime64(__jif) (__jif)
#define cputime_to_cputime64(__ct) ((u64) __ct)
......
......@@ -63,6 +63,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq);
extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work, unsigned long delay);
extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
extern int FASTCALL(schedule_work(struct work_struct *work));
......
......@@ -114,6 +114,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
put_cpu();
return ret;
}
EXPORT_SYMBOL_GPL(queue_work);
static void delayed_work_timer_fn(unsigned long __data)
{
......@@ -147,6 +148,29 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
}
return ret;
}
EXPORT_SYMBOL_GPL(queue_delayed_work);
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work, unsigned long delay)
{
int ret = 0;
struct timer_list *timer = &work->timer;
if (!test_and_set_bit(0, &work->pending)) {
BUG_ON(timer_pending(timer));
BUG_ON(!list_empty(&work->entry));
/* This stores wq for the moment, for the timer_fn */
work->wq_data = wq;
timer->expires = jiffies + delay;
timer->data = (unsigned long)work;
timer->function = delayed_work_timer_fn;
add_timer_on(timer, cpu);
ret = 1;
}
return ret;
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
......@@ -281,6 +305,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
unlock_cpu_hotplug();
}
}
EXPORT_SYMBOL_GPL(flush_workqueue);
static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
int cpu)
......@@ -358,6 +383,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
}
return wq;
}
EXPORT_SYMBOL_GPL(__create_workqueue);
static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
{
......@@ -395,6 +421,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
free_percpu(wq->cpu_wq);
kfree(wq);
}
EXPORT_SYMBOL_GPL(destroy_workqueue);
static struct workqueue_struct *keventd_wq;
......@@ -402,31 +429,20 @@ int fastcall schedule_work(struct work_struct *work)
{
return queue_work(keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work);
int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
{
return queue_delayed_work(keventd_wq, work, delay);
}
EXPORT_SYMBOL(schedule_delayed_work);
int schedule_delayed_work_on(int cpu,
struct work_struct *work, unsigned long delay)
{
int ret = 0;
struct timer_list *timer = &work->timer;
if (!test_and_set_bit(0, &work->pending)) {
BUG_ON(timer_pending(timer));
BUG_ON(!list_empty(&work->entry));
/* This stores keventd_wq for the moment, for the timer_fn */
work->wq_data = keventd_wq;
timer->expires = jiffies + delay;
timer->data = (unsigned long)work;
timer->function = delayed_work_timer_fn;
add_timer_on(timer, cpu);
ret = 1;
}
return ret;
return queue_delayed_work_on(cpu, keventd_wq, work, delay);
}
EXPORT_SYMBOL(schedule_delayed_work_on);
/**
* schedule_on_each_cpu - call a function on each online CPU from keventd
......@@ -463,6 +479,7 @@ void flush_scheduled_work(void)
{
flush_workqueue(keventd_wq);
}
EXPORT_SYMBOL(flush_scheduled_work);
/**
* cancel_rearming_delayed_workqueue - reliably kill off a delayed
......@@ -619,13 +636,3 @@ void init_workqueues(void)
BUG_ON(!keventd_wq);
}
EXPORT_SYMBOL_GPL(__create_workqueue);
EXPORT_SYMBOL_GPL(queue_work);
EXPORT_SYMBOL_GPL(queue_delayed_work);
EXPORT_SYMBOL_GPL(flush_workqueue);
EXPORT_SYMBOL_GPL(destroy_workqueue);
EXPORT_SYMBOL(schedule_work);
EXPORT_SYMBOL(schedule_delayed_work);
EXPORT_SYMBOL(schedule_delayed_work_on);
EXPORT_SYMBOL(flush_scheduled_work);