aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/power/energy-model.rst24
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c4
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c4
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c4
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c4
-rw-r--r--drivers/idle/intel_idle.c133
-rw-r--r--drivers/opp/of.c6
-rw-r--r--drivers/powercap/dtpm_cpu.c2
-rw-r--r--drivers/thermal/cpufreq_cooling.c2
-rw-r--r--drivers/thermal/devfreq_cooling.c8
-rw-r--r--include/linux/energy_model.h35
-rw-r--r--kernel/power/energy_model.c65
12 files changed, 240 insertions, 51 deletions
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index 49549aab41b4..feb257b7f350 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
(static + dynamic). These power values might be coming directly from
experiments and measurements.
+Registration of 'artificial' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is an option to provide a custom callback for drivers missing detailed
+knowledge about power value for each performance state. The callback
+.get_cost() is optional and provides the 'cost' values used by the EAS.
+This is useful for platforms that only provide information on relative
+efficiency between CPU types, where one could use the information to
+create an abstract power model. But even an abstract power model can
+sometimes be hard to fit in, given the input power value size restrictions.
+The .get_cost() allows to provide the 'cost' values which reflect the
+efficiency of the CPUs. This would allow to provide EAS information which
+has different relation than what would be forced by the EM internal
+formulas calculating 'cost' values. To register an EM for such platform, the
+driver must set the flag 'milliwatts' to 0, provide .get_power() callback
+and provide .get_cost() callback. The EM framework would handle such platform
+properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
+platform. Special care should be taken by other frameworks which are using EM
+to test and treat this flag properly.
+
Registration of 'simple' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -181,8 +201,8 @@ EM framework::
-> drivers/cpufreq/foo_cpufreq.c
- 01 static int est_power(unsigned long *mW, unsigned long *KHz,
- 02 struct device *dev)
+ 01 static int est_power(struct device *dev, unsigned long *mW,
+ 02 unsigned long *KHz)
03 {
04 long freq, power;
05
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index 0a94c56ddad2..813cccbfe934 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};
static int __maybe_unused
-mtk_cpufreq_get_cpu_power(unsigned long *mW,
- unsigned long *KHz, struct device *cpu_dev)
+mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
+ unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
struct cpufreq_policy *policy;
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 919fa6e3f462..6d2a4cf46db7 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
}
static int __maybe_unused
-scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
- struct device *cpu_dev)
+scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
+ unsigned long *KHz)
{
unsigned long Hz;
int ret, domain;
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 755bbdfc5b82..3db4fca1172b 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
struct generic_pm_domain *pd;
struct psci_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
if (!pd)
@@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
pd->flags |= GENPD_FLAG_ALWAYS_ON;
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
if (ret)
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 5c852e671992..1151e5e2ba82 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np)
struct generic_pm_domain *pd;
struct sbi_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
if (!pd)
@@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np)
pd->flags |= GENPD_FLAG_ALWAYS_ON;
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
if (ret)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 47551ab73ca8..b9bb94bd0f67 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -765,6 +765,106 @@ static struct cpuidle_state icx_cstates[] __initdata = {
};
/*
+ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
+ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
+ * But in this case there is effectively no C1, because C1 requests are
+ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
+ * and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1E and disable C1 by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state adl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 220,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 280,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 680,
+ .target_residency = 2000,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state adl_l_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 170,
+ .target_residency = 500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 200,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 230,
+ .target_residency = 700,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+/*
* On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
* versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
* MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
@@ -1147,6 +1247,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
.use_acpi = true,
};
+static const struct idle_cpu idle_cpu_adl __initconst = {
+ .state_table = adl_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_l __initconst = {
+ .state_table = adl_l_cstates,
+};
+
static const struct idle_cpu idle_cpu_spr __initconst = {
.state_table = spr_cstates,
.disable_promotion_to_c1e = true,
@@ -1215,6 +1323,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
@@ -1574,6 +1684,25 @@ static void __init skx_idle_state_table_update(void)
}
/**
+ * adl_idle_state_table_update - Adjust AlderLake idle states table.
+ */
+static void __init adl_idle_state_table_update(void)
+{
+ /* Check if user prefers C1 over C1E. */
+ if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
+ cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+ cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
+
+ /* Disable C1E by clearing the "C1E promotion" bit. */
+ c1e_promotion = C1E_PROMOTION_DISABLE;
+ return;
+ }
+
+ /* Make sure C1E is enabled by default */
+ c1e_promotion = C1E_PROMOTION_ENABLE;
+}
+
+/**
* spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
*/
static void __init spr_idle_state_table_update(void)
@@ -1642,6 +1771,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_FAM6_SAPPHIRERAPIDS_X:
spr_idle_state_table_update();
break;
+ case INTEL_FAM6_ALDERLAKE:
+ case INTEL_FAM6_ALDERLAKE_L:
+ adl_idle_state_table_update();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 440ab5a03df9..485ea980bde7 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
-_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
+_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
@@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
-static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
- struct device *dev)
+static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
+ unsigned long *kHz)
{
struct dev_pm_opp *opp;
struct device_node *np;
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index bca2f912d349..f5eced0842b3 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
return 0;
pd = em_cpu_get(cpu);
- if (!pd)
+ if (!pd || em_is_artificial(pd))
return -EINVAL;
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 0bfb8eebd126..b8151d95a806 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
struct cpufreq_policy *policy;
unsigned int nr_levels;
- if (!em)
+ if (!em || em_is_artificial(em))
return false;
policy = cpufreq_cdev->policy;
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4310cb342a9f..b04dcbbf721a 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
+ struct em_perf_domain *em;
char *name;
int err, num_opps;
@@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
dfc->devfreq = df;
- dfc->em_pd = em_pd_get(dev);
- if (dfc->em_pd) {
+ em = em_pd_get(dev);
+ if (em && !em_is_artificial(em)) {
+ dfc->em_pd = em;
devfreq_cooling_ops.get_requested_power =
devfreq_cooling_get_requested_power;
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
@@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
num_opps = em_pd_nr_perf_states(dfc->em_pd);
} else {
/* Backward compatibility for drivers which do not use IPA */
- dev_dbg(dev, "missing EM for cooling device\n");
+ dev_dbg(dev, "missing proper EM for cooling device\n");
num_opps = dev_pm_opp_get_opp_count(dev);
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 9f3c400bc52d..8419bffb4398 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -67,11 +67,16 @@ struct em_perf_domain {
*
* EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
* energy consumption.
+ *
+ * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be
+ * created by platform missing real power information
*/
#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
+#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2)
#define em_span_cpus(em) (to_cpumask((em)->cpus))
+#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL)
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
@@ -96,11 +101,11 @@ struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
* a device
+ * @dev : Device for which we do this operation (can be a CPU)
* @power : Active power at the performance state
* (modified)
* @freq : Frequency at the performance state in kHz
* (modified)
- * @dev : Device for which we do this operation (can be a CPU)
*
* active_power() must find the lowest performance state of 'dev' above
* 'freq' and update 'power' and 'freq' to the matching active power
@@ -112,11 +117,32 @@ struct em_data_callback {
*
* Return 0 on success.
*/
- int (*active_power)(unsigned long *power, unsigned long *freq,
- struct device *dev);
+ int (*active_power)(struct device *dev, unsigned long *power,
+ unsigned long *freq);
+
+ /**
+ * get_cost() - Provide the cost at the given performance state of
+ * a device
+ * @dev : Device for which we do this operation (can be a CPU)
+ * @freq : Frequency at the performance state in kHz
+ * @cost : The cost value for the performance state
+ * (modified)
+ *
+ * In case of CPUs, the cost is the one of a single CPU in the domain.
+ * It is expected to fit in the [0, EM_MAX_POWER] range due to internal
+ * usage in EAS calculation.
+ *
+ * Return 0 on success, or appropriate error value in case of failure.
+ */
+ int (*get_cost)(struct device *dev, unsigned long freq,
+ unsigned long *cost);
};
-#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb)
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \
+ { .active_power = _active_power_cb, \
+ .get_cost = _cost_cb }
+#define EM_DATA_CB(_active_power_cb) \
+ EM_ADV_DATA_CB(_active_power_cb, NULL)
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
@@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
#else
struct em_data_callback {};
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
#define EM_DATA_CB(_active_power_cb) { }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0)
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 0153b0ca7b23..6c373f2960e7 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
-static int em_debug_units_show(struct seq_file *s, void *unused)
+static int em_debug_flags_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
- char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
- "milliWatts" : "bogoWatts";
- seq_printf(s, "%s\n", units);
+ seq_printf(s, "%#lx\n", pd->flags);
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(em_debug_units);
-
-static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
-{
- struct em_perf_domain *pd = s->private;
- int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
-
- seq_printf(s, "%d\n", enabled);
-
- return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
+DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
static void em_debug_create_pd(struct device *dev)
{
@@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev)
debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
&em_debug_cpus_fops);
- debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
- debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
- &em_debug_skip_inefficiencies_fops);
+ debugfs_create_file("flags", 0444, d, dev->em_pd,
+ &em_debug_flags_fops);
/* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
@@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {}
#endif
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
- int nr_states, struct em_data_callback *cb)
+ int nr_states, struct em_data_callback *cb,
+ unsigned long flags)
{
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
@@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
* lowest performance state of 'dev' above 'freq' and updates
* 'power' and 'freq' accordingly.
*/
- ret = cb->active_power(&power, &freq, dev);
+ ret = cb->active_power(dev, &power, &freq);
if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret);
@@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = nr_states - 1; i >= 0; i--) {
- unsigned long power_res = em_scale_power(table[i].power);
+ unsigned long power_res, cost;
+
+ if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
+ ret = cb->get_cost(dev, table[i].frequency, &cost);
+ if (ret || !cost || cost > EM_MAX_POWER) {
+ dev_err(dev, "EM: invalid cost %lu %d\n",
+ cost, ret);
+ goto free_ps_table;
+ }
+ } else {
+ power_res = em_scale_power(table[i].power);
+ cost = div64_u64(fmax * power_res, table[i].frequency);
+ }
+
+ table[i].cost = cost;
- table[i].cost = div64_u64(fmax * power_res,
- table[i].frequency);
if (table[i].cost >= prev_cost) {
table[i].flags = EM_PERF_STATE_INEFFICIENT;
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
@@ -197,7 +196,8 @@ free_ps_table:
}
static int em_create_pd(struct device *dev, int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus)
+ struct em_data_callback *cb, cpumask_t *cpus,
+ unsigned long flags)
{
struct em_perf_domain *pd;
struct device *cpu_dev;
@@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states,
return -ENOMEM;
}
- ret = em_create_perf_table(dev, pd, nr_states, cb);
+ ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
if (ret) {
kfree(pd);
return ret;
@@ -259,6 +259,8 @@ static void em_cpufreq_update_efficiencies(struct device *dev)
found++;
}
+ cpufreq_cpu_put(policy);
+
if (!found)
return;
@@ -332,6 +334,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
bool milliwatts)
{
unsigned long cap, prev_cap = 0;
+ unsigned long flags = 0;
int cpu, ret;
if (!dev || !nr_states || !cb)
@@ -378,12 +381,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
}
}
- ret = em_create_pd(dev, nr_states, cb, cpus);
+ if (milliwatts)
+ flags |= EM_PERF_DOMAIN_MILLIWATTS;
+ else if (cb->get_cost)
+ flags |= EM_PERF_DOMAIN_ARTIFICIAL;
+
+ ret = em_create_pd(dev, nr_states, cb, cpus, flags);
if (ret)
goto unlock;
- if (milliwatts)
- dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
+ dev->em_pd->flags |= flags;
em_cpufreq_update_efficiencies(dev);