From 645698bcb5450ccb7939b753fa261b8e185b9111 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 13 Dec 2012 13:43:10 +0530 Subject: cpufreq: arm_big_little: add in-kernel switching(IKS) support This patch adds IKS (In Kernel Switcher) support to cpufreq driver. This creates separate freq table for A7-A15 cpu pair. A7 frequency is virtualized and is halved, so that it touches boundaries with A7 frequencies. Based on Earlier Work from Sudeep. Signed-off-by: Sudeep KarkadaNagesha Signed-off-by: Viresh Kumar --- drivers/cpufreq/arm_big_little.c | 319 ++++++++++++++++++++++++++++++++++++--- drivers/cpufreq/arm_big_little.h | 14 +- 2 files changed, 311 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 5d7f53fcd6f5..6c359a37a373 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -24,27 +24,136 @@ #include #include #include +#include #include #include #include #include #include +#include #include "arm_big_little.h" -/* Currently we support only two clusters */ -#define MAX_CLUSTERS 2 +#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) +#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) static struct cpufreq_arm_bL_ops *arm_bL_ops; static struct clk *clk[MAX_CLUSTERS]; -static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS]; -static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)}; +static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; +static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0), + ATOMIC_INIT(0)}; -static unsigned int bL_cpufreq_get(unsigned int cpu) +static unsigned int clk_big_min; /* (Big) clock frequencies */ +static unsigned int clk_little_max; /* Maximum clock frequency (Little) */ + +static DEFINE_PER_CPU(unsigned int, physical_cluster); +static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq); + +static struct mutex cluster_lock[MAX_CLUSTERS]; + +static unsigned int find_cluster_maxfreq(int cluster) +{ + int j; + u32 max_freq = 0, cpu_freq; + + for_each_online_cpu(j) { + cpu_freq = per_cpu(cpu_last_req_freq, j); + + if ((cluster == per_cpu(physical_cluster, j)) && + (max_freq < cpu_freq)) + max_freq = cpu_freq; + } + + pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster, + max_freq); + + return max_freq; +} + +static unsigned int clk_get_cpu_rate(unsigned int cpu) +{ + u32 cur_cluster = per_cpu(physical_cluster, cpu); + u32 rate = clk_get_rate(clk[cur_cluster]) / 1000; + + /* For switcher we use virtual A15 clock rates */ + if (is_bL_switching_enabled()) + rate = VIRT_FREQ(cur_cluster, rate); + + pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu, + cur_cluster, rate); + + return rate; +} + +static unsigned int bL_cpufreq_get_rate(unsigned int cpu) { - u32 cur_cluster = cpu_to_cluster(cpu); + pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq, cpu)); - return clk_get_rate(clk[cur_cluster]) / 1000; + return per_cpu(cpu_last_req_freq, cpu); +} + +static unsigned int +bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) +{ + u32 new_rate, prev_rate; + int ret; + + mutex_lock(&cluster_lock[new_cluster]); + + prev_rate = per_cpu(cpu_last_req_freq, cpu); + per_cpu(cpu_last_req_freq, cpu) = rate; + per_cpu(physical_cluster, cpu) = new_cluster; + + if (is_bL_switching_enabled()) { + new_rate = find_cluster_maxfreq(new_cluster); + new_rate = ACTUAL_FREQ(new_cluster, new_rate); + } else { + new_rate = rate; + } + + pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n", + __func__, cpu, old_cluster, new_cluster, new_rate); + + ret = clk_set_rate(clk[new_cluster], new_rate * 1000); + if (WARN_ON(ret)) { + pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret, + new_cluster); + per_cpu(cpu_last_req_freq, cpu) = prev_rate; + per_cpu(physical_cluster, cpu) = old_cluster; + + mutex_unlock(&cluster_lock[new_cluster]); + + return ret; + } + + mutex_unlock(&cluster_lock[new_cluster]); + + /* Recalc freq for old cluster when switching clusters */ + if (old_cluster != new_cluster) { + pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n", + __func__, cpu, old_cluster, new_cluster); + + /* Switch cluster */ + bL_switch_request(cpu, new_cluster); + + mutex_lock(&cluster_lock[old_cluster]); + + /* Set freq of old cluster if there are cpus left on it */ + new_rate = find_cluster_maxfreq(old_cluster); + new_rate = ACTUAL_FREQ(old_cluster, new_rate); + + if (new_rate) { + pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n", + __func__, old_cluster, new_rate); + + if (clk_set_rate(clk[old_cluster], new_rate * 1000)) + pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n", + __func__, ret, old_cluster); + } + mutex_unlock(&cluster_lock[old_cluster]); + } + + return 0; } /* Validate policy frequency range */ @@ -60,12 +169,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct cpufreq_freqs freqs; - u32 cpu = policy->cpu, freq_tab_idx, cur_cluster; + u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster, + actual_cluster; int ret = 0; - cur_cluster = cpu_to_cluster(policy->cpu); + cur_cluster = cpu_to_cluster(cpu); + new_cluster = actual_cluster = per_cpu(physical_cluster, cpu); - freqs.old = bL_cpufreq_get(policy->cpu); + freqs.old = bL_cpufreq_get_rate(cpu); /* Determine valid target frequency using freq_table */ cpufreq_frequency_table_target(policy, freq_table[cur_cluster], @@ -79,13 +190,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; + if (is_bL_switching_enabled()) { + if ((actual_cluster == A15_CLUSTER) && + (freqs.new < clk_big_min)) { + new_cluster = A7_CLUSTER; + } else if ((actual_cluster == A7_CLUSTER) && + (freqs.new > clk_little_max)) { + new_cluster = A15_CLUSTER; + } + } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000); - if (ret) { - pr_err("clk_set_rate failed: %d\n", ret); + ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new); + if (ret) return ret; - } policy->cur = freqs.new; @@ -94,7 +213,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } -static void put_cluster_clk_and_freq_table(struct device *cpu_dev) +static inline u32 get_table_count(struct cpufreq_frequency_table *table) +{ + int count; + + for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++) + ; + + return count; +} + +/* get the minimum frequency in the cpufreq_frequency_table */ +static inline u32 get_table_min(struct cpufreq_frequency_table *table) +{ + int i; + uint32_t min_freq = ~0; + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) + if (table[i].frequency < min_freq) + min_freq = table[i].frequency; + return min_freq; +} + +/* get the maximum frequency in the cpufreq_frequency_table */ +static inline u32 get_table_max(struct cpufreq_frequency_table *table) +{ + int i; + uint32_t max_freq = 0; + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) + if (table[i].frequency > max_freq) + max_freq = table[i].frequency; + return max_freq; +} + +static int merge_cluster_tables(void) +{ + int i, j, k = 0, count = 1; + struct cpufreq_frequency_table *table; + + for (i = 0; i < MAX_CLUSTERS; i++) + count += get_table_count(freq_table[i]); + + table = kzalloc(sizeof(*table) * count, GFP_KERNEL); + if (!table) + return -ENOMEM; + + freq_table[MAX_CLUSTERS] = table; + + /* Add in reverse order to get freqs in increasing order */ + for (i = MAX_CLUSTERS - 1; i >= 0; i--) { + for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END; + j++) { + table[k].frequency = VIRT_FREQ(i, + freq_table[i][j].frequency); + pr_debug("%s: index: %d, freq: %d\n", __func__, k, + table[k].frequency); + k++; + } + } + + table[k].index = k; + table[k].frequency = CPUFREQ_TABLE_END; + + pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k); + + return 0; +} + +static void _put_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = cpu_to_cluster(cpu_dev->id); @@ -105,7 +290,32 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev) } } -static int get_cluster_clk_and_freq_table(struct device *cpu_dev) +static void put_cluster_clk_and_freq_table(struct device *cpu_dev) +{ + u32 cluster = cpu_to_cluster(cpu_dev->id); + int i; + + if (cluster < MAX_CLUSTERS) + return _put_cluster_clk_and_freq_table(cpu_dev); + + if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS])) + return; + + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return; + } + + _put_cluster_clk_and_freq_table(cdev); + } + + /* free virtual table */ + kfree(freq_table[MAX_CLUSTERS]); +} + +static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = cpu_to_cluster(cpu_dev->id); char name[14] = "cpu-cluster."; @@ -149,6 +359,62 @@ atomic_dec: return ret; } +static int get_cluster_clk_and_freq_table(struct device *cpu_dev) +{ + u32 cluster = cpu_to_cluster(cpu_dev->id); + int i, ret; + + if (cluster < MAX_CLUSTERS) + return _get_cluster_clk_and_freq_table(cpu_dev); + + if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1) + return 0; + + /* + * Get data for all clusters and fill virtual cluster with a merge of + * both + */ + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return -ENODEV; + } + + ret = _get_cluster_clk_and_freq_table(cdev); + if (ret) + goto put_clusters; + } + + ret = merge_cluster_tables(); + if (ret) + goto put_clusters; + + /* Assuming 2 cluster, set clk_big_min and clk_little_max */ + clk_big_min = get_table_min(freq_table[0]); + clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1])); + + pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n", + __func__, cluster, clk_big_min, clk_little_max); + + return 0; + +put_clusters: + while (i--) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return -ENODEV; + } + + _put_cluster_clk_and_freq_table(cdev); + } + + atomic_dec(&cluster_usage[MAX_CLUSTERS]); + + return ret; +} + /* Per-CPU initialization */ static int bL_cpufreq_init(struct cpufreq_policy *policy) { @@ -177,15 +443,23 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu); + if (cur_cluster < MAX_CLUSTERS) { + cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + + per_cpu(physical_cluster, policy->cpu) = cur_cluster; + } else { + /* Assumption: during init, we are always running on A15 */ + per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER; + } + if (arm_bL_ops->get_transition_latency) policy->cpuinfo.transition_latency = arm_bL_ops->get_transition_latency(cpu_dev); else policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = bL_cpufreq_get(policy->cpu); - - cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + policy->cur = clk_get_cpu_rate(policy->cpu); + per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur; dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu); return 0; @@ -219,7 +493,7 @@ static struct cpufreq_driver bL_cpufreq_driver = { .flags = CPUFREQ_STICKY, .verify = bL_cpufreq_verify_policy, .target = bL_cpufreq_set_target, - .get = bL_cpufreq_get, + .get = bL_cpufreq_get_rate, .init = bL_cpufreq_init, .exit = bL_cpufreq_exit, .have_governor_per_policy = true, @@ -228,7 +502,7 @@ static struct cpufreq_driver bL_cpufreq_driver = { int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) { - int ret; + int ret, i; if (arm_bL_ops) { pr_debug("%s: Already registered: %s, exiting\n", __func__, @@ -243,6 +517,9 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) arm_bL_ops = ops; + for (i = 0; i < MAX_CLUSTERS; i++) + mutex_init(&cluster_lock[i]); + ret = cpufreq_register_driver(&bL_cpufreq_driver); if (ret) { pr_info("%s: Failed registering platform driver: %s, err: %d\n", diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h index 79b2ce17884d..aa2c907f982e 100644 --- a/drivers/cpufreq/arm_big_little.h +++ b/drivers/cpufreq/arm_big_little.h @@ -23,6 +23,17 @@ #include #include +/* Currently we support only two clusters */ +#define A15_CLUSTER 0 +#define A7_CLUSTER 1 +#define MAX_CLUSTERS 2 + +#ifdef CONFIG_BL_SWITCHER +#define is_bL_switching_enabled() true +#else +#define is_bL_switching_enabled() false +#endif + struct cpufreq_arm_bL_ops { char name[CPUFREQ_NAME_LEN]; int (*get_transition_latency)(struct device *cpu_dev); @@ -36,7 +47,8 @@ struct cpufreq_arm_bL_ops { static inline int cpu_to_cluster(int cpu) { - return topology_physical_package_id(cpu); + return is_bL_switching_enabled() ? MAX_CLUSTERS: + topology_physical_package_id(cpu); } int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops); -- cgit v1.2.3