From 5558228276d0791637041e1616e1071509741db9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 13 Dec 2012 13:43:10 +0530
Subject: cpufreq: arm_big_little: add in-kernel switching(IKS) support

This patch adds IKS (In Kernel Switcher) support to cpufreq driver.  This
creates separate freq table for A7-A15 cpu pair. A15 frequency is vitalized and
is doubled, so that it touches boundaries with A7 frequencies.

Based on Earlier Work from Sudeep.

Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/arm_big_little.c | 262 +++++++++++++++++++++++++++++++++++----
 1 file changed, 241 insertions(+), 21 deletions(-)

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index b5601fcd79e..eb771c213a2 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -26,15 +26,34 @@
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <asm/bL_switcher.h>
 #include <asm/topology.h>
 #include "arm_big_little.h"
 
+#ifdef CONFIG_BL_SWITCHER
+#define is_bL_switching_enabled()		true
+#else
+#define is_bL_switching_enabled()		false
+#endif
+
+#define A15_CLUSTER	0
+#define A7_CLUSTER	1
 #define MAX_CLUSTERS	2
 
+#define ACTUAL_FREQ(cluster, freq)	((cluster == A15_CLUSTER) ? freq >> 1 : freq)
+#define VIRT_FREQ(cluster, freq)	((cluster == A15_CLUSTER) ? freq << 1 : freq)
+
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS];
-static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static int freq_table_cnt[MAX_CLUSTERS];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+
+static unsigned int clk_big_min;	/* (Big) clock frequencies */
+static unsigned int clk_little_max;	/* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
 
 /*
  * Functions to get the current status.
@@ -43,14 +62,74 @@ static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
  */
 static int cpu_to_cluster(int cpu)
 {
-	return topology_physical_package_id(cpu);
+	return is_bL_switching_enabled() ? MAX_CLUSTERS:
+		topology_physical_package_id(cpu);
 }
 
-static unsigned int bL_cpufreq_get(unsigned int cpu)
+static unsigned int find_cluster_maxfreq(int cpu, int cluster, unsigned int new)
 {
-	u32 cur_cluster = cpu_to_cluster(cpu);
+	int j;
+
+	for_each_online_cpu(j) {
+		unsigned int temp = per_cpu(cpu_last_req_freq, j);
+
+		if (cpu == j)
+			continue;
+		if (cluster == per_cpu(physical_cluster, j) &&
+			new < temp)
+			new = temp;
+	}
+
+	pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster, new);
 
-	return clk_get_rate(clk[cur_cluster]) / 1000;
+	return new;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
+{
+	u32 cur_cluster = per_cpu(physical_cluster, cpu);
+	u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+	/* For switcher we use virtual A15 clock rates */
+	if (is_bL_switching_enabled())
+		rate = VIRT_FREQ(cur_cluster, rate);
+
+	pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
+			cur_cluster, rate);
+
+	return rate;
+}
+
+static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
+{
+	pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq, cpu));
+
+	return per_cpu(cpu_last_req_freq, cpu);
+}
+
+static unsigned int bL_cpufreq_set_rate(u32 cpu, u32 cluster, u32 rate)
+{
+	u32 max_rate, actual_rate;
+	int ret;
+
+	if (is_bL_switching_enabled()) {
+		max_rate = find_cluster_maxfreq(cpu, cluster, rate);
+		actual_rate = ACTUAL_FREQ(cluster, max_rate);
+	} else {
+		actual_rate = max_rate = rate;
+	}
+
+	pr_debug("%s: cpu: %d, cluster: %d, freq: %d\n", __func__, cpu, cluster,
+			actual_rate);
+
+	ret = clk_set_rate(clk[cluster], actual_rate * 1000);
+	if (ret) {
+		pr_err("clk_set_rate failed: %d\n", ret);
+		return ret;
+	}
+
+	per_cpu(cpu_last_req_freq, cpu) = rate;
+	return 0;
 }
 
 /* Validate policy frequency range */
@@ -67,21 +146,22 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 		unsigned int target_freq, unsigned int relation)
 {
 	struct cpufreq_freqs freqs;
-	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster,
+	    actual_cluster;
 	int ret = 0;
 
 	/* ASSUMPTION: The cpu can't be hotplugged in this function */
-	cur_cluster = cpu_to_cluster(policy->cpu);
+	cur_cluster = cpu_to_cluster(cpu);
+	new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
 
-	freqs.old = bL_cpufreq_get(policy->cpu);
+	freqs.cpu = cpu;
+	freqs.old = bL_cpufreq_get_rate(cpu);
 
 	/* Determine valid target frequency using freq_table */
 	cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
 			target_freq, relation, &freq_tab_idx);
 	freqs.new = freq_table[cur_cluster][freq_tab_idx].frequency;
 
-	freqs.cpu = policy->cpu;
-
 	pr_debug("%s: cpu: %d, cluster: %d, oldfreq: %d, target freq: %d, new freq: %d\n",
 			__func__, cpu, cur_cluster, freqs.old, target_freq,
 			freqs.new);
@@ -89,13 +169,29 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
+	if (is_bL_switching_enabled()) {
+		if ((actual_cluster == A15_CLUSTER) &&
+				(freqs.new < clk_big_min)) {
+			new_cluster = A7_CLUSTER;
+		} else if ((actual_cluster == A7_CLUSTER) &&
+				(freqs.new > clk_little_max)) {
+			new_cluster = A15_CLUSTER;
+		}
+	}
+
 	for_each_cpu(freqs.cpu, policy->cpus)
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
-	if (ret) {
-		pr_err("clk_set_rate failed: %d\n", ret);
+	ret = bL_cpufreq_set_rate(cpu, new_cluster, freqs.new);
+	if (ret)
 		return ret;
+
+	if (new_cluster != actual_cluster) {
+		pr_debug("%s: old cluster: %d, new cluster: %d\n", __func__,
+				actual_cluster, new_cluster);
+
+		bL_switch_request(cpu, new_cluster);
+		per_cpu(physical_cluster, cpu) = new_cluster;
 	}
 
 	policy->cur = freqs.new;
@@ -106,6 +202,28 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	return ret;
 }
 
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t min_freq = ~0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency < min_freq)
+			min_freq = table[i].frequency;
+	return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t max_freq = 0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency > max_freq)
+			max_freq = table[i].frequency;
+	return max_freq;
+}
+
 /* translate the integer array into cpufreq_frequency_table entries */
 struct cpufreq_frequency_table *
 arm_bL_copy_table_from_array(unsigned int *table, int count)
@@ -141,7 +259,41 @@ void arm_bL_free_freq_table(u32 cluster)
 }
 EXPORT_SYMBOL_GPL(arm_bL_free_freq_table);
 
-static void put_cluster_clk_and_freq_table(u32 cluster)
+static int merge_cluster_tables(void)
+{
+	int i, j, k = 0, count = 1;
+	struct cpufreq_frequency_table *table;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		count += freq_table_cnt[i];
+
+	table = kzalloc(sizeof(*table) * count, GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	freq_table[MAX_CLUSTERS] = table;
+
+	/* Add in reverse order to get freqs in increasing order */
+	for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
+		for (j = 0; j < freq_table_cnt[i]; j++) {
+			table[k].index = k;
+			table[k].frequency = VIRT_FREQ(i,
+					freq_table[i][j].frequency);
+			pr_debug("%s: index: %d, freq: %d\n", __func__, k,
+					table[k].frequency);
+			k++;
+		}
+	}
+
+	table[k].index = k;
+	table[k].frequency = CPUFREQ_TABLE_END;
+
+	pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
+
+	return 0;
+}
+
+static void _put_cluster_clk_and_freq_table(u32 cluster)
 {
 	if (!atomic_dec_return(&cluster_usage[cluster])) {
 		clk_put(clk[cluster]);
@@ -152,7 +304,21 @@ static void put_cluster_clk_and_freq_table(u32 cluster)
 	}
 }
 
-static int get_cluster_clk_and_freq_table(u32 cluster)
+static void put_cluster_clk_and_freq_table(u32 cluster)
+{
+	int i;
+
+	if (cluster < MAX_CLUSTERS)
+		return _put_cluster_clk_and_freq_table(cluster);
+
+	if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS]))
+		return;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		return _put_cluster_clk_and_freq_table(i);
+}
+
+static int _get_cluster_clk_and_freq_table(u32 cluster)
 {
 	char name[9] = "cluster";
 	int count;
@@ -164,6 +330,8 @@ static int get_cluster_clk_and_freq_table(u32 cluster)
 	if (!freq_table[cluster])
 		goto atomic_dec;
 
+	freq_table_cnt[cluster] = count;
+
 	name[7] = cluster + '0';
 	clk[cluster] = clk_get(NULL, name);
 	if (!IS_ERR_OR_NULL(clk[cluster])) {
@@ -181,6 +349,48 @@ atomic_dec:
 	return -ENODATA;
 }
 
+static int get_cluster_clk_and_freq_table(u32 cluster)
+{
+	int i, ret;
+
+	if (cluster < MAX_CLUSTERS)
+		return _get_cluster_clk_and_freq_table(cluster);
+
+	if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1)
+		return 0;
+
+	/*
+	 * Get data for all clusters and fill virtual cluster with a merge of
+	 * both
+	 */
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		ret = _get_cluster_clk_and_freq_table(i);
+		if (ret)
+			goto put_clusters;
+	}
+
+	ret = merge_cluster_tables();
+	if (ret)
+		goto put_clusters;
+
+	/* Assuming 2 cluster, set clk_big_min and clk_little_max */
+	clk_big_min = VIRT_FREQ(0, get_table_min(freq_table[0]));
+	clk_little_max = get_table_max(freq_table[1]);
+
+	pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
+			__func__, cluster, clk_big_min, clk_little_max);
+
+	return 0;
+
+put_clusters:
+	while (i)
+		_put_cluster_clk_and_freq_table(--i);
+
+	atomic_dec(&cluster_usage[MAX_CLUSTERS]);
+
+	return ret;
+}
+
 /* Per-CPU initialization */
 static int bL_cpufreq_init(struct cpufreq_policy *policy)
 {
@@ -202,13 +412,23 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 
 	cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
 
+	if (cur_cluster < MAX_CLUSTERS) {
+		cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+		cpumask_copy(policy->related_cpus, policy->cpus);
+
+		per_cpu(physical_cluster, policy->cpu) = cur_cluster;
+	} else {
+		/* Assumption: during init, we are always running on A15 */
+		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+	}
+
 	policy->cpuinfo.transition_latency = 1000000;	/* 1 ms assumed */
-	policy->cur = bL_cpufreq_get(policy->cpu);
+	policy->cur = clk_get_cpu_rate(policy->cpu);
+	per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur;
 
-	cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
-	cpumask_copy(policy->related_cpus, policy->cpus);
+	pr_info("%s: Initialized, cpu: %d, cluster %d\n", __func__,
+			policy->cpu, cur_cluster);
 
-	pr_info("CPU %d initialized\n", policy->cpu);
 	return 0;
 }
 
@@ -231,7 +451,7 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.flags	= CPUFREQ_STICKY,
 	.verify	= bL_cpufreq_verify_policy,
 	.target	= bL_cpufreq_set_target,
-	.get	= bL_cpufreq_get,
+	.get	= bL_cpufreq_get_rate,
 	.init	= bL_cpufreq_init,
 	.exit	= bL_cpufreq_exit,
 	.attr	= bL_cpufreq_attr,
-- 
cgit v1.2.3