HMP: Implement task packing for small tasks in HMP systems

If we wake up a task on a little CPU, fill CPUs rather than spread. Adds 2 new files to sys/kernel/hmp to control packing behaviour. packing_enable: task packing enabled (1) or disabled (0) packing_limit: Runqueues will be filled up to this load ratio. This functionality is disabled by default on TC2 as it lacks per-cpu power gating so packing small tasks there doesn't make sense. Signed-off-by: Chris Redpath <chris.redpath@arm.com> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com> Signed-off-by: Jon Medhurst <tixy@linaro.org>
author: Chris Redpath <chris.redpath@arm.com> 2013-10-11 11:45:04 +0100
committer: Jon Medhurst <tixy@linaro.org> 2013-10-11 15:07:18 +0100
commit: d8063e7015122eb3f6173acf496171def8941734 (patch)
tree: 4b0f9d624a18f554f1707de86ea9b5c3c9060a3f
parent: cd5c2cc93d3dc581a19c62442f40895500d2a34c (diff)
2 files changed, 146 insertions, 9 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2a5f5b8c385..e79dfda6644 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1578,6 +1578,18 @@ config HMP_FREQUENCY_INVARIANT_SCALE
 	  migration strategy to interact more predictably with CPUFreq's
 	  asynchronous compute capacity changes.
 
+config SCHED_HMP_LITTLE_PACKING
+	bool "Small task packing for HMP"
+	depends on SCHED_HMP
+	default n
+	help
+	  Allows the HMP Scheduler to pack small tasks into CPUs in the
+	  smallest HMP domain.
+	  Controlled by two sysfs files in sys/kernel/hmp.
+	  packing_enable: 1 to enable, 0 to disable packing. Default 1.
+	  packing_limit: runqueue load ratio where a RQ is considered
+	    to be full. Default is NICE_0_LOAD * 9/8.
+
 config HAVE_ARM_SCU
 	bool
 	help
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 003de405acd..66b5b30159f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1226,11 +1226,7 @@ struct hmp_global_attr {
 	int (*from_sysfs)(int);
 };
 
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
 
 struct hmp_data_struct {
 #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -3668,6 +3664,13 @@ static struct sched_entity *hmp_get_lightest_task(
  * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
  * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
  * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
  */
 unsigned int hmp_up_threshold = 700;
 unsigned int hmp_down_threshold = 512;
@@ -3677,11 +3680,27 @@ unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
 unsigned int hmp_next_up_threshold = 4096;
 unsigned int hmp_next_down_threshold = 4096;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+   we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650;  /*  80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
 static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
 static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
 static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 						int *min_cpu, struct cpumask *affinity);
 
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+	return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
+
 /* Check if cpu is in fastest hmp_domain */
 static inline unsigned int hmp_cpu_is_fastest(int cpu)
 {
@@ -3760,7 +3779,49 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
 
 	return lowest_cpu;
 }
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+		int cpu) {
+	int tmp_cpu;
+	unsigned long estimated_load;
+	struct hmp_domain *hmp;
+	struct sched_avg *avg;
+	struct cpumask allowed_hmp_cpus;
 
+	if(!hmp_packing_enabled ||
+			tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+		return hmp_select_slower_cpu(tsk, cpu);
+
+	if (hmp_cpu_is_slowest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_slower_domain(cpu);
+
+	/* respect affinity */
+	cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+			tsk_cpus_allowed(tsk));
+
+	for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+		avg = &cpu_rq(tmp_cpu)->avg;
+		/* estimate new rq load if we add this task */
+		estimated_load = avg->load_avg_ratio +
+				tsk->se.avg.load_avg_ratio;
+		if (estimated_load <= hmp_full_threshold) {
+			cpu = tmp_cpu;
+			break;
+		}
+	}
+	/* if no match was found, the task uses the initial value */
+	return cpu;
+}
+#endif
 static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 {
 	/* hack - always use clock from first online CPU */
@@ -3884,6 +3945,15 @@ static int hmp_freqinvar_from_sysfs(int value)
 	return value;
 }
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+	if (value < 0)
+		return -1;
+	return value;
+}
+#endif
 static void hmp_attr_add(
 	const char *name,
 	int *value,
@@ -3936,6 +4006,16 @@ static int hmp_attr_init(void)
 		NULL,
 		hmp_freqinvar_from_sysfs);
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	hmp_attr_add("packing_enable",
+		&hmp_packing_enabled,
+		NULL,
+		hmp_freqinvar_from_sysfs);
+	hmp_attr_add("packing_limit",
+		&hmp_full_threshold,
+		NULL,
+		hmp_packing_from_sysfs);
+#endif
 	hmp_data.attr_group.name = "hmp";
 	hmp_data.attr_group.attrs = hmp_data.attributes;
 	ret = sysfs_create_group(kernel_kobj,
@@ -4194,10 +4274,16 @@ unlock:
 		return new_cpu;
 	}
 	if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
 		new_cpu = hmp_select_slower_cpu(p, prev_cpu);
-		hmp_next_down_delay(&p->se, new_cpu);
-		trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
-		return new_cpu;
+#endif
+		if (new_cpu != prev_cpu) {
+			hmp_next_down_delay(&p->se, new_cpu);
+			trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+			return new_cpu;
+		}
 	}
 	/* Make sure that the task stays in its previous hmp domain */
 	if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -6164,16 +6250,49 @@ static struct {
 	unsigned long next_balance;     /* in jiffy units */
 } nohz ____cacheline_aligned;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+	struct hmp_domain *hmp;
+	/* always allow ilb on non-slowest domain */
+	if (!hmp_cpu_is_slowest(cpu))
+		return 1;
+
+	hmp = hmp_cpu_domain(cpu);
+	for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+		/* only idle balance if a CPU is loaded over threshold */
+		if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+			return 1;
+	}
+	return 0;
+}
+#endif
+
 static inline int find_new_ilb(int call_cpu)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
 #ifdef CONFIG_SCHED_HMP
+	int ilb_needed = 1;
+
 	/* restrict nohz balancing to occur in the same hmp domain */
 	ilb = cpumask_first_and(nohz.idle_cpus_mask,
 			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	if (ilb < nr_cpu_ids)
+		ilb_needed = hmp_packing_ilb_needed(ilb);
 #endif
+
+	if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+		return ilb;
+#else
 	if (ilb < nr_cpu_ids && idle_cpu(ilb))
 		return ilb;
+#endif
 
 	return nr_cpu_ids;
 }
@@ -6539,8 +6658,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
 	struct task_struct *p = task_of(se);
 	u64 now;
 
-	if (hmp_cpu_is_slowest(cpu))
+	if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		if(hmp_packing_enabled)
+			return 1;
+		else
+#endif
 		return 0;
+	}
 
 #ifdef CONFIG_SCHED_HMP_PRIO_FILTER
 	/* Filter by task priority */
author	Chris Redpath <chris.redpath@arm.com>	2013-10-11 11:45:04 +0100
committer	Jon Medhurst <tixy@linaro.org>	2013-10-11 15:07:18 +0100
commit	d8063e7015122eb3f6173acf496171def8941734 (patch)
tree	4b0f9d624a18f554f1707de86ea9b5c3c9060a3f
parent	cd5c2cc93d3dc581a19c62442f40895500d2a34c (diff)