aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c107
-rw-r--r--drivers/cpufreq/cpufreq.c20
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c9
-rw-r--r--include/asm-i386/msr.h3
-rw-r--r--include/asm-x86_64/msr.h3
-rw-r--r--include/linux/cpufreq.h3
6 files changed, 143 insertions, 2 deletions
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b0c7db85a4..f8a8e46acb7 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -58,10 +58,12 @@ enum {
};
#define INTEL_MSR_RANGE (0xffff)
+#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
struct cpufreq_frequency_table *freq_table;
+ unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
};
@@ -258,6 +260,100 @@ static u32 get_cur_val(cpumask_t mask)
return cmd.val;
}
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+static unsigned int get_measured_perf(unsigned int cpu)
+{
+ union {
+ struct {
+ u32 lo;
+ u32 hi;
+ } split;
+ u64 whole;
+ } aperf_cur, mperf_cur;
+
+ cpumask_t saved_mask;
+ unsigned int perf_percent;
+ unsigned int retval;
+
+ saved_mask = current->cpus_allowed;
+ set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ if (get_cpu() != cpu) {
+ /* We were not able to run on requested processor */
+ put_cpu();
+ return 0;
+ }
+
+ rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
+ rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
+
+ wrmsr(MSR_IA32_APERF, 0,0);
+ wrmsr(MSR_IA32_MPERF, 0,0);
+
+#ifdef __i386__
+ /*
+ * We dont want to do 64 bit divide with 32 bit kernel
+ * Get an approximate value. Return failure in case we cannot get
+ * an approximate value.
+ */
+ if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
+ int shift_count;
+ u32 h;
+
+ h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
+ shift_count = fls(h);
+
+ aperf_cur.whole >>= shift_count;
+ mperf_cur.whole >>= shift_count;
+ }
+
+ if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
+ int shift_count = 7;
+ aperf_cur.split.lo >>= shift_count;
+ mperf_cur.split.lo >>= shift_count;
+ }
+
+ if (aperf_cur.split.lo && mperf_cur.split.lo) {
+ perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
+ } else {
+ perf_percent = 0;
+ }
+
+#else
+ if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
+ int shift_count = 7;
+ aperf_cur.whole >>= shift_count;
+ mperf_cur.whole >>= shift_count;
+ }
+
+ if (aperf_cur.whole && mperf_cur.whole) {
+ perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
+ } else {
+ perf_percent = 0;
+ }
+
+#endif
+
+ retval = drv_data[cpu]->max_freq * perf_percent / 100;
+
+ put_cpu();
+ set_cpus_allowed(current, saved_mask);
+
+ dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
+ return retval;
+}
+
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
struct acpi_cpufreq_data *data = drv_data[cpu];
@@ -497,7 +593,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int valid_states = 0;
unsigned int cpu = policy->cpu;
struct acpi_cpufreq_data *data;
- unsigned int l, h;
unsigned int result = 0;
struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
struct acpi_processor_performance *perf;
@@ -591,6 +686,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+ data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
for (i = 0; i < perf->state_count; i++) {
if (i > 0 && perf->states[i].core_frequency ==
@@ -625,6 +721,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ /* Check for APERF/MPERF support in hardware */
+ if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
+ unsigned int ecx;
+ ecx = cpuid_ecx(6);
+ if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+ acpi_cpufreq_driver.getavg = get_measured_perf;
+ }
+ }
+
dprintk("CPU%u - ACPI performance management activated.\n", cpu);
for (i = 0; i < perf->state_count; i++)
dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 86e69b7f912..56c433e64d5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1274,6 +1274,26 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);
+int cpufreq_driver_getavg(struct cpufreq_policy *policy)
+{
+ int ret = 0;
+
+ policy = cpufreq_cpu_get(policy->cpu);
+ if (!policy)
+ return -EINVAL;
+
+ mutex_lock(&policy->lock);
+
+ if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
+ ret = cpufreq_driver->getavg(policy->cpu);
+
+ mutex_unlock(&policy->lock);
+
+ cpufreq_cpu_put(policy);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_getavg);
+
/*
* Locking: Must be called with the lock_cpu_hotplug() lock held
* when "event" is CPUFREQ_GOV_LIMITS
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index bf8aa45d4f0..291cfe9400a 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -393,8 +393,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
* policy. To be safe, we focus 10 points under the threshold.
*/
if (load < (dbs_tuners_ins.up_threshold - 10)) {
- unsigned int freq_next = (policy->cur * load) /
+ unsigned int freq_next, freq_cur;
+
+ freq_cur = cpufreq_driver_getavg(policy);
+ if (!freq_cur)
+ freq_cur = policy->cur;
+
+ freq_next = (freq_cur * load) /
(dbs_tuners_ins.up_threshold - 10);
+
if (!dbs_tuners_ins.powersave_bias) {
__cpufreq_driver_target(policy, freq_next,
CPUFREQ_RELATION_L);
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h
index 62b76cd9695..0aa15fc8d91 100644
--- a/include/asm-i386/msr.h
+++ b/include/asm-i386/msr.h
@@ -125,6 +125,9 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val)
#define MSR_IA32_PERF_STATUS 0x198
#define MSR_IA32_PERF_CTL 0x199
+#define MSR_IA32_MPERF 0xE7
+#define MSR_IA32_APERF 0xE8
+
#define MSR_IA32_THERM_CONTROL 0x19a
#define MSR_IA32_THERM_INTERRUPT 0x19b
#define MSR_IA32_THERM_STATUS 0x19c
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 37e194169fa..e6158228873 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -307,6 +307,9 @@ static inline unsigned int cpuid_edx(unsigned int op)
#define MSR_IA32_PERF_STATUS 0x198
#define MSR_IA32_PERF_CTL 0x199
+#define MSR_IA32_MPERF 0xE7
+#define MSR_IA32_APERF 0xE8
+
#define MSR_IA32_THERM_CONTROL 0x19a
#define MSR_IA32_THERM_INTERRUPT 0x19b
#define MSR_IA32_THERM_STATUS 0x19c
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 4ea39fee99c..7f008f6bfdc 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -172,6 +172,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int relation);
+extern int cpufreq_driver_getavg(struct cpufreq_policy *policy);
+
int cpufreq_register_governor(struct cpufreq_governor *governor);
void cpufreq_unregister_governor(struct cpufreq_governor *governor);
@@ -204,6 +206,7 @@ struct cpufreq_driver {
unsigned int (*get) (unsigned int cpu);
/* optional */
+ unsigned int (*getavg) (unsigned int cpu);
int (*exit) (struct cpufreq_policy *policy);
int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg);
int (*resume) (struct cpufreq_policy *policy);