Commit 0356dbb7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq

parents e28cc715 95235ca2
......@@ -27,6 +27,7 @@ Contents:
2.2 Powersave
2.3 Userspace
2.4 Ondemand
2.5 Conservative
3. The Governor Interface in the CPUfreq Core
......@@ -110,9 +111,64 @@ directory.
The CPUfreq govenor "ondemand" sets the CPU depending on the
current usage. To do this the CPU must have the capability to
switch the frequency very fast.
switch the frequency very quickly. There are a number of sysfs file
accessible parameters:
sampling_rate: measured in uS (10^-6 seconds), this is how often you
want the kernel to look at the CPU usage and to make decisions on
what to do about the frequency. Typically this is set to values of
around '10000' or more.
show_sampling_rate_(min|max): the minimum and maximum sampling rates
available that you may set 'sampling_rate' to.
up_threshold: defines what the average CPU usaged between the samplings
of 'sampling_rate' needs to be for the kernel to make a decision on
whether it should increase the frequency. For example when it is set
to its default value of '80' it means that between the checking
intervals the CPU needs to be on average more than 80% in use to then
decide that the CPU frequency needs to be increased.
sampling_down_factor: this parameter controls the rate that the CPU
makes a decision on when to decrease the frequency. When set to its
default value of '5' it means that at 1/5 the sampling_rate the kernel
makes a decision to lower the frequency. Five "lower rate" decisions
have to be made in a row before the CPU frequency is actually lower.
If set to '1' then the frequency decreases as quickly as it increases,
if set to '2' it decreases at half the rate of the increase.
ignore_nice_load: this parameter takes a value of '0' or '1', when set
to '0' (its default) then all processes are counted towards towards the
'cpu utilisation' value. When set to '1' then processes that are
run with a 'nice' value will not count (and thus be ignored) in the
overal usage calculation. This is useful if you are running a CPU
intensive calculation on your laptop that you do not care how long it
takes to complete as you can 'nice' it and prevent it from taking part
in the deciding process of whether to increase your CPU frequency.
2.5 Conservative
----------------
The CPUfreq governor "conservative", much like the "ondemand"
governor, sets the CPU depending on the current usage. It differs in
behaviour in that it gracefully increases and decreases the CPU speed
rather than jumping to max speed the moment there is any load on the
CPU. This behaviour more suitable in a battery powered environment.
The governor is tweaked in the same manner as the "ondemand" governor
through sysfs with the addition of:
freq_step: this describes what percentage steps the cpu freq should be
increased and decreased smoothly by. By default the cpu frequency will
increase in 5% chunks of your maximum cpu frequency. You can change this
value to anywhere between 0 and 100 where '0' will effectively lock your
CPU at a speed regardless of its load whilst '100' will, in theory, make
it behave identically to the "ondemand" governor.
down_threshold: same as the 'up_threshold' found for the "ondemand"
governor but for the opposite direction. For example when set to its
default value of '20' it means that if the CPU usage needs to be below
20% between samples to have the frequency decreased.
3. The Governor Interface in the CPUfreq Core
=============================================
......
......@@ -177,9 +177,10 @@ static unsigned int nforce2_fsb_read(int bootfsb)
*/
static int nforce2_set_fsb(unsigned int fsb)
{
u32 pll, temp = 0;
u32 temp = 0;
unsigned int tfsb;
int diff;
int pll = 0;
if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
......
......@@ -45,7 +45,7 @@
#define PFX "powernow-k8: "
#define BFX PFX "BIOS error: "
#define VERSION "version 1.50.4"
#define VERSION "version 1.60.0"
#include "powernow-k8.h"
/* serialize freq changes */
......@@ -216,10 +216,10 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
do {
wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
if (i++ > 100) {
printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
return 1;
}
if (i++ > 100) {
printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
return 1;
}
} while (query_current_values_with_pending_wait(data));
if (savefid != data->currfid) {
......@@ -336,7 +336,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
/* Phase 2 - core frequency transition */
static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
{
u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid;
u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
......@@ -359,9 +359,11 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
: vcoreqfid - vcocurrfid;
while (vcofiddiff > 2) {
(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
if (reqfid > data->currfid) {
if (data->currfid > LO_FID_TABLE_TOP) {
if (write_new_fid(data, data->currfid + 2)) {
if (write_new_fid(data, data->currfid + fid_interval)) {
return 1;
}
} else {
......@@ -371,7 +373,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
}
}
} else {
if (write_new_fid(data, data->currfid - 2))
if (write_new_fid(data, data->currfid - fid_interval))
return 1;
}
......@@ -464,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu)
set_cpus_allowed(current, cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
goto out;
}
......@@ -474,7 +476,7 @@ static int check_supported_cpu(unsigned int cpu)
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) {
((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) {
printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
goto out;
}
......@@ -517,22 +519,24 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j);
return -ENODEV;
}
if ((pst[j].fid > MAX_FID)
|| (pst[j].fid & 1)
|| (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) {
if (pst[j].fid > MAX_FID) {
printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j);
return -ENODEV;
}
if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
/* Only first fid is allowed to be in "low" range */
printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
return -EINVAL;
}
if (pst[j].fid < lastfid)
lastfid = pst[j].fid;
}
if (lastfid & 1) {
printk(KERN_ERR PFX "lastfid invalid\n");
printk(KERN_ERR BFX "lastfid invalid\n");
return -EINVAL;
}
if (lastfid > LO_FID_TABLE_TOP)
printk(KERN_INFO PFX "first fid not from lo freq table\n");
printk(KERN_INFO BFX "first fid not from lo freq table\n");
return 0;
}
......@@ -631,7 +635,7 @@ static int find_psb_table(struct powernow_k8_data *data)
dprintk("table vers: 0x%x\n", psb->tableversion);
if (psb->tableversion != PSB_VERSION_1_4) {
printk(KERN_INFO BFX "PSB table is not v1.4\n");
printk(KERN_ERR BFX "PSB table is not v1.4\n");
return -ENODEV;
}
......@@ -689,7 +693,7 @@ static int find_psb_table(struct powernow_k8_data *data)
* BIOS and Kernel Developer's Guide, which is available on
* www.amd.com
*/
printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n");
printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
return -ENODEV;
}
......@@ -912,7 +916,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
goto err_out;
}
......@@ -982,6 +986,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
cpumask_t oldmask = CPU_MASK_ALL;
int rc, i;
if (!cpu_online(pol->cpu))
return -ENODEV;
if (!check_supported_cpu(pol->cpu))
return -ENODEV;
......@@ -1021,7 +1028,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
goto err_out;
}
......@@ -1162,10 +1169,9 @@ static void __exit powernowk8_exit(void)
cpufreq_unregister_driver(&cpufreq_amd64_driver);
}
MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com.");
MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
MODULE_LICENSE("GPL");
late_initcall(powernowk8_init);
module_exit(powernowk8_exit);
......@@ -42,7 +42,7 @@ struct powernow_k8_data {
#define CPUID_XFAM 0x0ff00000 /* extended family */
#define CPUID_XFAM_K8 0
#define CPUID_XMOD 0x000f0000 /* extended model */
#define CPUID_XMOD_REV_F 0x00040000
#define CPUID_XMOD_REV_G 0x00060000
#define CPUID_USE_XFAM_XMOD 0x00000f00
#define CPUID_GET_MAX_CAPABILITIES 0x80000000
#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
......@@ -86,13 +86,14 @@ struct powernow_k8_data {
* low fid table
* - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
* in the low fid table
* - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid
* - the parts can only step at <= 200 MHz intervals, odd fid values are
* supported in revision G and later revisions.
* - lowest frequency must be >= interprocessor hypertransport link speed
* (only applies to MP systems obviously)
*/
/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */
#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */
#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */
#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */
......@@ -106,7 +107,7 @@ struct powernow_k8_data {
#define MIN_FREQ 800 /* Min and max freqs, per spec */
#define MAX_FREQ 5000
#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */
#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */
#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
#define VID_OFF 0x3f
......
......@@ -40,6 +40,7 @@ static struct pci_dev *speedstep_chipset_dev;
*/
static unsigned int speedstep_processor = 0;
static u32 pmbase;
/*
* There are only two frequency states for each processor. Values
......@@ -56,34 +57,47 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
/**
* speedstep_set_state - set the SpeedStep state
* @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
* speedstep_find_register - read the PMBASE address
*
* Tries to change the SpeedStep state.
* Returns: -ENODEV if no register could be found
*/
static void speedstep_set_state (unsigned int state)
static int speedstep_find_register (void)
{
u32 pmbase;
u8 pm2_blk;
u8 value;
unsigned long flags;
if (!speedstep_chipset_dev || (state > 0x1))
return;
if (!speedstep_chipset_dev)
return -ENODEV;
/* get PMBASE */
pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
if (!(pmbase & 0x01)) {
printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
return;
return -ENODEV;
}
pmbase &= 0xFFFFFFFE;
if (!pmbase) {
printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
return;
return -ENODEV;
}
dprintk("pmbase is 0x%x\n", pmbase);
return 0;
}
/**
* speedstep_set_state - set the SpeedStep state
* @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
*
* Tries to change the SpeedStep state.
*/
static void speedstep_set_state (unsigned int state)
{
u8 pm2_blk;
u8 value;
unsigned long flags;
if (state > 0x1)
return;
/* Disable IRQs */
local_irq_save(flags);
......@@ -315,10 +329,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
cpus_allowed = current->cpus_allowed;
set_cpus_allowed(current, policy->cpus);
/* detect low and high frequency */
/* detect low and high frequency and transition latency */
result = speedstep_get_freqs(speedstep_processor,
&speedstep_freqs[SPEEDSTEP_LOW].frequency,
&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
&policy->cpuinfo.transition_latency,
&speedstep_set_state);
set_cpus_allowed(current, cpus_allowed);
if (result)
......@@ -335,7 +350,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
/* cpuinfo and default policy values */
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
policy->cur = speed;
result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
......@@ -400,6 +414,9 @@ static int __init speedstep_init(void)
return -EINVAL;
}
if (speedstep_find_register())
return -ENODEV;
return cpufreq_register_driver(&speedstep_driver);
}
......
......@@ -320,11 +320,13 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor);
unsigned int speedstep_get_freqs(unsigned int processor,
unsigned int *low_speed,
unsigned int *high_speed,
unsigned int *transition_latency,
void (*set_state) (unsigned int state))
{
unsigned int prev_speed;
unsigned int ret = 0;
unsigned long flags;
struct timeval tv1, tv2;
if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
return -EINVAL;
......@@ -337,7 +339,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
return -EIO;
dprintk("previous speed is %u\n", prev_speed);
local_irq_save(flags);
/* switch to low state */
......@@ -350,8 +352,17 @@ unsigned int speedstep_get_freqs(unsigned int processor,
dprintk("low speed is %u\n", *low_speed);
/* start latency measurement */
if (transition_latency)
do_gettimeofday(&tv1);
/* switch to high state */
set_state(SPEEDSTEP_HIGH);
/* end latency measurement */
if (transition_latency)
do_gettimeofday(&tv2);
*high_speed = speedstep_get_processor_frequency(processor);
if (!*high_speed) {
ret = -EIO;
......@@ -369,6 +380,25 @@ unsigned int speedstep_get_freqs(unsigned int processor,
if (*high_speed != prev_speed)
set_state(SPEEDSTEP_LOW);
if (transition_latency) {
*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
tv2.tv_usec - tv1.tv_usec;
dprintk("transition latency is %u uSec\n", *transition_latency);
/* convert uSec to nSec and add 20% for safety reasons */
*transition_latency *= 1200;
/* check if the latency measurement is too high or too low
* and set it to a safe value (500uSec) in that case
*/
if (*transition_latency > 10000000 || *transition_latency < 50000) {
printk (KERN_WARNING "speedstep: frequency transition measured seems out of "
"range (%u nSec), falling back to a safe one of %u nSec.\n",
*transition_latency, 500000);
*transition_latency = 500000;
}
}
out:
local_irq_restore(flags);
return (ret);
......
......@@ -44,4 +44,5 @@ extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
extern unsigned int speedstep_get_freqs(unsigned int processor,
unsigned int *low_speed,
unsigned int *high_speed,
unsigned int *transition_latency,
void (*set_state) (unsigned int state));
......@@ -269,6 +269,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
result = speedstep_get_freqs(speedstep_processor,
&speedstep_freqs[SPEEDSTEP_LOW].frequency,
&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
NULL,
&speedstep_set_state);
if (result) {
......
......@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <asm/semaphore.h>
#include <linux/seq_file.h>
#include <linux/cpufreq.h>
/*
* Get CPU information for use by the procfs.
......@@ -86,8 +87,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "stepping\t: unknown\n");
if ( cpu_has(c, X86_FEATURE_TSC) ) {
unsigned int freq = cpufreq_quick_get(n);
if (!freq)
freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
cpu_khz / 1000, (cpu_khz % 1000));
freq / 1000, (freq % 1000));
}
/* Cache size */
......
......@@ -43,6 +43,7 @@
#include <linux/initrd.h>
#include <linux/platform.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
......@@ -517,6 +518,7 @@ show_cpuinfo (struct seq_file *m, void *v)
char family[32], features[128], *cp, sep;
struct cpuinfo_ia64 *c = v;
unsigned long mask;
unsigned int proc_freq;
int i;
mask = c->features;
......@@ -549,6 +551,10 @@ show_cpuinfo (struct seq_file *m, void *v)
sprintf(cp, " 0x%lx", mask);
}
proc_freq = cpufreq_quick_get(cpunum);
if (!proc_freq)
proc_freq = c->proc_freq / 1000;
seq_printf(m,
"processor : %d\n"
"vendor : %s\n"
......@@ -565,7 +571,7 @@ show_cpuinfo (struct seq_file *m, void *v)
"BogoMIPS : %lu.%02lu\n",
cpunum, c->vendor, family, c->model, c->revision, c->archrev,
features, c->ppn, c->number,
c->proc_freq / 1000000, c->proc_freq % 1000000,
proc_freq / 1000, proc_freq % 1000,
c->itc_freq / 1000000, c->itc_freq % 1000000,
lpj*HZ/500000, (lpj*HZ/5000) % 100);
#ifdef CONFIG_SMP
......
......@@ -42,6 +42,7 @@
#include <linux/edd.h>
#include <linux/mmzone.h>
#include <linux/kexec.h>
#include <linux/cpufreq.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
......@@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "stepping\t: unknown\n");
if (cpu_has(c,X86_FEATURE_TSC)) {
unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
if (!freq)
freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
cpu_khz / 1000, (cpu_khz % 1000));
freq / 1000, (freq % 1000));
}
/* Cache size */
......
......@@ -822,6 +822,30 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne
}
/**
* cpufreq_quick_get - get the CPU frequency (in kHz) frpm policy->cur
* @cpu: CPU number
*
* This is the last known freq, without actually getting it from the driver.
* Return value will be same as what is shown in scaling_cur_freq in sysfs.
*/
unsigned int cpufreq_quick_get(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
unsigned int ret = 0;
if (policy) {
down(&policy->lock);
ret = policy->cur;
up(&policy->lock);
cpufreq_cpu_put(policy);
}
return (ret);
}
EXPORT_SYMBOL(cpufreq_quick_get);
/**
* cpufreq_get - get the current CPU frequency (in kHz)
* @cpu: CPU number
......
......@@ -93,7 +93,7 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu)
{
return kstat_cpu(cpu).cpustat.idle +
kstat_cpu(cpu).cpustat.iowait +
( !dbs_tuners_ins.ignore_nice ?
( dbs_tuners_ins.ignore_nice ?
kstat_cpu(cpu).cpustat.nice :
0);
}
......@@ -127,7 +127,7 @@ show_one(sampling_rate, sampling_rate);
show_one(sampling_down_factor, sampling_down_factor);
show_one(up_threshold, up_threshold);
show_one(down_threshold, down_threshold);
show_one(ignore_nice, ignore_nice);
show_one(ignore_nice_load, ignore_nice);
show_one(freq_step, freq_step);
static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
......@@ -207,7 +207,7 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused,
return count;
}
static ssize_t store_ignore_nice(struct cpufreq_policy *policy,
static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
unsigned int input;
......@@ -272,7 +272,7 @@ define_one_rw(sampling_rate);
define_one_rw(sampling_down_factor);
define_one_rw(up_threshold);
define_one_rw(down_threshold);
define_one_rw(ignore_nice);
define_one_rw(ignore_nice_load);
define_one_rw(freq_step);
static struct attribute * dbs_attributes[] = {
......@@ -282,7 +282,7 @@ static struct attribute * dbs_attributes[] = {
&sampling_down_factor.attr,
&up_threshold.attr,
&down_threshold.attr,
&ignore_nice.attr,
&ignore_nice_load.attr,
&freq_step.attr,
NULL
};
......
......@@ -89,7 +89,7 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu)
{
return kstat_cpu(cpu).cpustat.idle +
kstat_cpu(cpu).cpustat.iowait +
( !dbs_tuners_ins.ignore_nice ?