sched: load-tracking driven wakeup migration for HMP platforms

Attempts to migrate tasks to an appropriate cpu on heterogeneous platforms based on the task's individual tracked load at wakeup. The migration decision is based on task load thresholds and task priority. Currently only two types of cpus are supported: fast (high-performance) and slow (power-efficient). The HMP setup (fast/slow cpuids) is currently hardcoded in the scheduler. Obviously, this hack needs to be replaced by a generic way to expose to expose this information to the scheduler. Ideally this could be done using device tree and a not yet implemented scheduler interface. Signed-off-by: Morten Rasmussen <Morten.Rasmussen@arm.com>
author: Morten Rasmussen <Morten.Rasmussen@arm.com> 2012-06-29 16:49:59 +0100
committer: Viresh Kumar <viresh.kumar@linaro.org> 2012-07-25 10:03:58 +0100
commit: e3355d72293e865acad2ca56e4b836c5717c3d3a (patch)
tree: 5b5df27bf9834a1116f4a7065d965bf5aa1e9d44
parent: b32ad183db527a60b39e989b5b7e1576d5b80a66 (diff)
2 files changed, 134 insertions, 0 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a91009c61870..adddd220bd9a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1512,6 +1512,35 @@ config SCHED_SMT
 	  MultiThreading at a cost of slightly increased overhead in some
 	  places. If unsure say N here.
 
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+	bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+	help
+	  Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+	bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+	depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+	help
+	  Experimental scheduler optimizations for heterogeneous platforms.
+	  Attempts introspectively select task affinity to optimize power
+	  and performance. Currently support two types of CPUs: fast
+	  (high-performance) and slow (power-efficient). There is currently
+	  no support for migration of task groups, hence !SCHED_AUTOGROUP.
+
+config HMP_FAST_CPU_MASK
+	string "HMP scheduler fast CPU mask"
+	depends on SCHED_HMP
+	help
+	  Specifies the cpuids of the fast CPUs in the system as a list
+	  string, e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+	string "HMP scheduler slow CPU mask"
+	depends on SCHED_HMP
+	help
+	  Specifies the cpuids of the slow CPUs in the system as a list
+	  string, e.g. cpuid 0+1 should be specified as 0-1.
+
 config HAVE_ARM_SCU
 	bool
 	help
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 97760898376c..92d806d2a667 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3082,6 +3082,73 @@ done:
 	return target;
 }
 
+#ifdef CONFIG_SCHED_HMP
+/* Heterogenous multiprocessor (HMP) optimizations
+ * We need to know which cpus that are fast and slow. Ideally, this
+ * information would be provided by the platform in some way. For now it is
+ * set in the kernel config. */
+static struct cpumask hmp_fast_cpu_mask;
+static struct cpumask hmp_slow_cpu_mask;
+
+/* Setup fast and slow cpumasks.
+ * This should be setup based on device tree somehow. */
+static int __init hmp_cpu_mask_setup(void)
+{
+	char buf[64];
+
+	cpumask_clear(&hmp_fast_cpu_mask);
+	cpumask_clear(&hmp_slow_cpu_mask);
+
+	if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, &hmp_fast_cpu_mask))
+		WARN(1, "Failed to parse HMP fast cpu mask!\n");
+	if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, &hmp_slow_cpu_mask))
+		WARN(1, "Failed to parse HMP slow cpu mask!\n");
+
+	printk(KERN_DEBUG "Initializing HMP scheduler:\n");
+	cpulist_scnprintf(buf, 64, &hmp_fast_cpu_mask);
+	printk(KERN_DEBUG "  fast cpus: %s\n", buf);
+	cpulist_scnprintf(buf, 64, &hmp_slow_cpu_mask);
+	printk(KERN_DEBUG "  slow cpus: %s\n", buf);
+
+	return 1;
+}
+early_initcall(hmp_cpu_mask_setup);
+
+/* Migration thresholds should be in the range [0..1023]
+ * hmp_up_threshold: min. load required for migrating tasks to a fast cpu
+ * hmp_down_threshold: max. load allowed for tasks migrating to a slow cpu
+ * hmp_up_prio: min. task prio for tasks migrating to faster cpus */
+unsigned int hmp_up_threshold = 512;
+unsigned int hmp_down_threshold = 256;
+unsigned int hmp_up_prio = 125;
+static unsigned int hmp_up_migration(int cpu, struct sched_entity *se);
+static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
+
+static unsigned int hmp_cpu_is_fast(int cpu)
+{
+	return cpumask_test_cpu(cpu, &hmp_fast_cpu_mask);
+}
+
+static unsigned int hmp_cpu_is_slow(int cpu)
+{
+	return cpumask_test_cpu(cpu, &hmp_slow_cpu_mask);
+}
+
+/* Select target cpu for HMP migration to fast cpu
+ * returns target >= nr_cpu_ids if no fast cpus in affinity mask */
+static inline unsigned int hmp_select_fast_cpu(struct task_struct *tsk)
+{
+	return cpumask_any_and(&hmp_fast_cpu_mask, tsk_cpus_allowed(tsk));
+}
+
+/* Select target cpu for HMP migration to slow cpu
+ * returns target >= nr_cpu_ids if no slow cpus in affinity mask */
+static inline unsigned int hmp_select_slow_cpu(struct task_struct *tsk)
+{
+	return cpumask_any_and(&hmp_slow_cpu_mask, tsk_cpus_allowed(tsk));
+}
+#endif /* CONFIG_SCHED_HMP */
+
 /*
  * sched_balance_self: balance the current task (running on cpu) in domains
  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -3208,6 +3275,19 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 unlock:
 	rcu_read_unlock();
 
+#ifdef CONFIG_SCHED_HMP
+	if (hmp_up_migration(new_cpu, &p->se)) {
+		cpu = hmp_select_fast_cpu(p);
+		if (cpu < nr_cpu_ids)
+			return cpu;
+	}
+	if (hmp_down_migration(new_cpu, &p->se)) {
+		cpu = hmp_select_slow_cpu(p);
+		if (cpu < nr_cpu_ids)
+			return cpu;
+	}
+#endif
+
 	return new_cpu;
 }
 
@@ -5290,6 +5370,31 @@ need_kick:
 static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
 #endif
 
+#ifdef CONFIG_SCHED_HMP
+/* Check if task should migrate to a faster core */
+static unsigned int hmp_up_migration(int cpu, struct sched_entity *se)
+{
+	struct task_struct *p = task_of(se);
+	if (p->prio < hmp_up_prio && p->prio > 100
+		&& hmp_cpu_is_slow(cpu)
+		&& se->avg.load_avg_ratio > hmp_up_threshold) {
+		return 1;
+	}
+	return 0;
+}
+
+/* Check if task should migrate to a slower core */
+static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
+{
+	struct task_struct *p = task_of(se);
+	if (p->prio >= hmp_up_prio || (hmp_cpu_is_fast(cpu)
+		&& se->avg.load_avg_ratio < hmp_down_threshold)) {
+		return 1;
+	}
+	return 0;
+}
+#endif /* CONFIG_SCHED_HMP */
+
 /*
  * run_rebalance_domains is triggered when needed from the scheduler tick.
  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
author	Morten Rasmussen <Morten.Rasmussen@arm.com>	2012-06-29 16:49:59 +0100
committer	Viresh Kumar <viresh.kumar@linaro.org>	2012-07-25 10:03:58 +0100
commit	e3355d72293e865acad2ca56e4b836c5717c3d3a (patch)
tree	5b5df27bf9834a1116f4a7065d965bf5aa1e9d44
parent	b32ad183db527a60b39e989b5b7e1576d5b80a66 (diff)