aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@intel.com>2012-11-20 19:00:30 +0800
committerViresh Kumar <viresh.kumar@linaro.org>2013-03-12 15:03:11 +0530
commitb8cc2530a62c2168d9c2e445bdf97f6a477c4c06 (patch)
treee4535066deba4451fd1200ecbfd45a1964f560c9
parent92d696fa7394330aa65abed9e23231db8f57d37e (diff)
sched: lazy power balancepower-aware-scheduling-v5
When active task number in sched domain waves around the power friendly scheduling creteria, scheduling will thresh between the power friendly balance and performance balance, bring unnecessary task migration. The typical benchmark is 'make -j x'. To remove such issue, introduce a u64 perf_lb_record variable to record performance load balance history. If there is no performance LB for continuing 32 times load balancing, or no LB for 8 times max_interval ms, or only 4 times performance LB in last 64 times load balancing, then we accept a power friendly LB. Otherwise, give up this time power friendly LB chance, do nothing. With this patch, the worst case for power scheduling -- kbuild, gets similar performance/power value among different policy. BTW, the lazy balance shows the performance gain when j is up to 32. On my SNB EP 2 sockets machine with 8 cores * HT: 'make -j x' results: powersaving balance performance x = 1 175.603 /417 13 175.220 /416 13 176.073 /407 13 x = 2 192.215 /218 23 194.522 /202 25 217.393 /200 23 x = 4 205.226 /124 39 208.823 /114 42 230.425 /105 41 x = 8 236.369 /71 59 249.005 /65 61 257.661 /62 62 x = 16 283.842 /48 73 307.465 /40 81 309.336 /39 82 x = 32 325.197 /32 96 333.503 /32 93 336.138 /32 92 data explains: 175.603 /417 13 175.603: avagerage Watts 417: seconds(compile time) 13: scaled performance/power = 1000000 / seconds / watts Signed-off-by: Alex Shi <alex.shi@intel.com>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched/fair.c68
2 files changed, 57 insertions, 12 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c0dbebe481..52ac745210d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -906,6 +906,7 @@ struct sched_domain {
unsigned long last_balance; /* init to jiffies. units in jiffies */
unsigned int balance_interval; /* initialise to 1. units in ms. */
unsigned int nr_balance_failed; /* initialise to 0 */
+ u64 perf_lb_record; /* performance balance record */
u64 last_update;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 16e1cec20c7..8bfad654508 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4505,6 +4505,60 @@ static inline void update_sd_lb_power_stats(struct lb_env *env,
}
}
+#define PERF_LB_HH_MASK 0xffffffff00000000ULL
+#define PERF_LB_LH_MASK 0xffffffffULL
+
+/**
+ * need_perf_balance - Check if the performance load balance needed
+ * in the sched_domain.
+ *
+ * @env: The load balancing environment.
+ * @sds: Variable containing the statistics of the sched_domain
+ */
+static int need_perf_balance(struct lb_env *env, struct sd_lb_stats *sds)
+{
+ env->sd->perf_lb_record <<= 1;
+
+ if (env->perf_lb) {
+ env->sd->perf_lb_record |= 0x1;
+ return 1;
+ }
+
+ /*
+ * The situation isn't eligible for performance balance. If this_cpu
+ * is not eligible or the timing is not suitable for lazy powersaving
+ * balance, we will stop both powersaving and performance balance.
+ */
+ if (env->power_lb && sds->this == sds->group_leader
+ && sds->group_leader != sds->group_min) {
+ int interval;
+
+ /* powersaving balance interval set as 8 * max_interval */
+ interval = msecs_to_jiffies(8 * env->sd->max_interval);
+ if (time_after(jiffies, env->sd->last_balance + interval))
+ env->sd->perf_lb_record = 0;
+
+ /*
+ * A eligible timing is no performance balance in last 32
+ * balance and performance balance is no more than 4 times
+ * in last 64 balance, or no balance in powersaving interval
+ * time.
+ */
+ if ((hweight64(env->sd->perf_lb_record & PERF_LB_HH_MASK) <= 4)
+ && !(env->sd->perf_lb_record & PERF_LB_LH_MASK)) {
+
+ env->imbalance = sds->min_load_per_task;
+ return 0;
+ }
+
+ }
+
+ /* give up this time power balancing, do nothing */
+ env->power_lb = 0;
+ sds->group_min = NULL;
+ return 0;
+}
+
/**
* get_sd_load_idx - Obtain the load index for a given sched domain.
* @sd: The sched_domain whose load_idx is to be obtained.
@@ -5128,18 +5182,8 @@ find_busiest_group(struct lb_env *env, int *balance)
*/
update_sd_lb_stats(env, balance, &sds);
- if (!env->perf_lb && !env->power_lb)
- return NULL;
-
- if (env->power_lb) {
- if (sds.this == sds.group_leader &&
- sds.group_leader != sds.group_min) {
- env->imbalance = sds.min_load_per_task;
- return sds.group_min;
- }
- env->power_lb = 0;
- return NULL;
- }
+ if (!need_perf_balance(env, &sds))
+ return sds.group_min;
/*
* this_cpu is not the appropriate cpu to perform load balancing at