Merge branch 'big-LITTLE-MP-upstream-v2' into linux-linaro-MP-upstream-3.9-rc3big-LITTLE-MP-upstream-v1

author: Viresh Kumar <viresh.kumar@linaro.org> 2013-04-02 21:59:48 +0530
committer: Viresh Kumar <viresh.kumar@linaro.org> 2013-04-02 21:59:48 +0530
commit: e801e36a4d4ffccbcbb63f2ece35d13cad4f280b (patch)
tree: 31676a773066bf680e58fa8d035ca88f3b64aafb
parent: 089165092916ec98d675e078dc5782da5d29f181 (diff)
parent: 63a67f13217cbb76305b79804e73e5c8b8315370 (diff)
12 files changed, 509 insertions, 81 deletions
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 58b8b84adcd..d394578f7a8 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -34,6 +34,67 @@ static inline void store_cpu_topology(unsigned int cpuid) { }
 
 #endif
 
+#ifdef CONFIG_SCHED_MC
+/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
+#ifndef SD_MC_INIT
+#define SD_MC_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 16,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 1*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_local_flags(SD_SHARE_PKG_RESOURCES)\
+				| 0*SD_SERIALIZE			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+#endif /* CONFIG_SCHED_MC */
+
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 16,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_local_flags(0)		\
+				| 0*SD_SERIALIZE			\
+				| 1*SD_PREFER_SIBLING			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 79282ebcd93..f89a4a2b6ed 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -201,6 +201,15 @@ static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
  */
 struct cputopo_arm cpu_topology[NR_CPUS];
 
+int arch_sd_local_flags(int level)
+{
+	/* Powergate at threading level doesn't make sense */
+	if (level & SD_SHARE_CPUPOWER)
+		return 1*SD_SHARE_POWERDOMAIN;
+
+	return 0*SD_SHARE_POWERDOMAIN;
+}
+
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 	return &cpu_topology[cpu].core_sibling;
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index a2496e449b7..6d0b61741c1 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -65,6 +65,7 @@ void build_cpu_to_node_map(void);
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE,	\
+				| arch_sd_local_flags(0)\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index d5e86c9f74f..adc871020d4 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -71,6 +71,7 @@ static inline const struct cpumask *cpumask_of_node(int node)
 				| 0*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_local_flags(0)		\
 				| 0*SD_SERIALIZE			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4afded2be58..43d10ee10cb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -809,6 +809,7 @@ enum cpu_idle_type {
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
@@ -959,6 +960,12 @@ struct sched_domain {
 	unsigned long span[0];
 };
 
+struct sched_domain_rq {
+	struct sched_domain *sd;
+	unsigned long flags;
+	struct rcu_head rcu;	/* used during destruction */
+};
+
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 {
 	return to_cpumask(sd->span);
@@ -1034,6 +1041,7 @@ struct sched_domain;
 #else
 #define ENQUEUE_WAKING		0
 #endif
+#define ENQUEUE_NEWTASK		8
 
 #define DEQUEUE_SLEEP		1
 
@@ -1160,13 +1168,7 @@ struct sched_entity {
 	struct cfs_rq		*my_q;
 #endif
 
-/*
- * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
- * removed when useful for applications beyond shares distribution (e.g.
- * load-balance).
- */
-#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
-	/* Per-entity load-tracking */
+#ifdef CONFIG_SMP
 	struct sched_avg	avg;
 #endif
 };
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d3cf0d6e771..3eab2933c8e 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,6 +99,8 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_local_flags(SD_SHARE_CPUPOWER|\
+						SD_SHARE_PKG_RESOURCES) \
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
 				| arch_sd_sibling_asym_packing()	\
@@ -131,6 +133,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_local_flags(SD_SHARE_PKG_RESOURCES)\
 				| 0*SD_SERIALIZE			\
 				,					\
 	.last_balance		= jiffies,				\
@@ -161,6 +164,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_local_flags(0)		\
 				| 0*SD_SERIALIZE			\
 				| 1*SD_PREFER_SIBLING			\
 				,					\
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 870c3242855..06933795dc2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1561,14 +1561,11 @@ static void __sched_fork(struct task_struct *p)
 	p->se.vruntime			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
 
-/*
- * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
- * removed when useful for applications beyond shares distribution (e.g.
- * load-balance).
- */
-#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+#ifdef CONFIG_SMP
 	p->se.avg.runnable_avg_period = 0;
 	p->se.avg.runnable_avg_sum = 0;
+	p->se.avg.decay_count = 0;
+	p->se.avg.load_avg_contrib = 0;
 #endif
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -1714,7 +1711,7 @@ void wake_up_new_task(struct task_struct *p)
 #endif
 
 	rq = __task_rq_lock(p);
-	activate_task(rq, p, 0);
+	activate_task(rq, p, ENQUEUE_NEWTASK);
 	p->on_rq = 1;
 	trace_sched_wakeup_new(p, true);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -2533,7 +2530,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
 void update_idle_cpu_load(struct rq *this_rq)
 {
 	unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
-	unsigned long load = this_rq->load.weight;
+	unsigned long load = (unsigned long)this_rq->cfs.runnable_load_avg;
 	unsigned long pending_updates;
 
 	/*
@@ -2583,7 +2580,7 @@ static void update_cpu_load_active(struct rq *this_rq)
 	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
 	 */
 	this_rq->last_load_update_tick = jiffies;
-	__update_cpu_load(this_rq, this_rq->load.weight, 1);
+	__update_cpu_load(this_rq, this_rq->cfs.runnable_load_avg, 1);
 
 	calc_load_account_active(this_rq);
 }
@@ -2689,8 +2686,8 @@ void scheduler_tick(void)
 
 	raw_spin_lock(&rq->lock);
 	update_rq_clock(rq);
-	update_cpu_load_active(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
+	update_cpu_load_active(rq);
 	raw_spin_unlock(&rq->lock);
 
 	perf_event_task_tick();
@@ -5602,6 +5599,15 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
 		destroy_sched_domain(sd, cpu);
 }
 
+static void destroy_sched_domain_rq(struct sched_domain_rq *sd_rq, int cpu)
+{
+	if (!sd_rq)
+		return;
+
+	destroy_sched_domains(sd_rq->sd, cpu);
+	kfree_rcu(sd_rq, rcu);
+}
+
 /*
  * Keep a special pointer to the highest sched_domain that has
  * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
@@ -5632,10 +5638,23 @@ static void update_top_cache_domain(int cpu)
  * hold the hotplug lock.
  */
 static void
-cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
+cpu_attach_domain(struct sched_domain_rq *sd_rq, struct root_domain *rd,
+		int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	struct sched_domain *tmp;
+	struct sched_domain_rq *tmp_rq;
+	struct sched_domain *tmp, *sd = NULL;
+
+	/*
+	 * If we don't have any sched_domain and associated object, we can
+	 * directly jump to the attach sequence otherwise we try to degenerate
+	 * the sched_domain
+	 */
+	if (!sd_rq)
+		goto attach;
+
+	/* Get a pointer to the 1st sched_domain */
+	sd = sd_rq->sd;
 
 	/* Remove the sched domains which do not contribute to scheduling. */
 	for (tmp = sd; tmp; ) {
@@ -5658,15 +5677,19 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 		destroy_sched_domain(tmp, cpu);
 		if (sd)
 			sd->child = NULL;
+		/* update sched_domain_rq */
+		sd_rq->sd = sd;
 	}
 
+attach:
 	sched_domain_debug(sd, cpu);
 
 	rq_attach_root(rq, rd);
-	tmp = rq->sd;
-	rcu_assign_pointer(rq->sd, sd);
-	destroy_sched_domains(tmp, cpu);
+	tmp_rq = rq->sd_rq;
+	rcu_assign_pointer(rq->sd_rq, sd_rq);
+	destroy_sched_domain_rq(tmp_rq, cpu);
 
+	update_packing_domain(cpu);
 	update_top_cache_domain(cpu);
 }
 
@@ -5695,12 +5718,14 @@ struct sd_data {
 };
 
 struct s_data {
+	struct sched_domain_rq ** __percpu sd_rq;
 	struct sched_domain ** __percpu sd;
 	struct root_domain	*rd;
 };
 
 enum s_alloc {
 	sa_rootdomain,
+	sa_sd_rq,
 	sa_sd,
 	sa_sd_storage,
 	sa_none,
@@ -5935,7 +5960,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 		return;
 
 	update_group_power(sd, cpu);
-	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
+	atomic_set(&sg->sgp->nr_busy_cpus, 0);
 }
 
 int __weak arch_sd_sibling_asym_packing(void)
@@ -5943,6 +5968,11 @@ int __weak arch_sd_sibling_asym_packing(void)
        return 0*SD_ASYM_PACKING;
 }
 
+int __weak arch_sd_local_flags(int level)
+{
+	return 1*SD_SHARE_POWERDOMAIN;
+}
+
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -6011,6 +6041,8 @@ static void set_domain_attribute(struct sched_domain *sd,
 
 static void __sdt_free(const struct cpumask *cpu_map);
 static int __sdt_alloc(const struct cpumask *cpu_map);
+static void __sdrq_free(const struct cpumask *cpu_map, struct s_data *d);
+static int __sdrq_alloc(const struct cpumask *cpu_map, struct s_data *d);
 
 static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 				 const struct cpumask *cpu_map)
@@ -6019,6 +6051,9 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 	case sa_rootdomain:
 		if (!atomic_read(&d->rd->refcount))
 			free_rootdomain(&d->rd->rcu); /* fall through */
+	case sa_sd_rq:
+		__sdrq_free(cpu_map, d); /* fall through */
+		free_percpu(d->sd_rq); /* fall through */
 	case sa_sd:
 		free_percpu(d->sd); /* fall through */
 	case sa_sd_storage:
@@ -6038,9 +6073,14 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 	d->sd = alloc_percpu(struct sched_domain *);
 	if (!d->sd)
 		return sa_sd_storage;
+	d->sd_rq = alloc_percpu(struct sched_domain_rq *);
+	if (!d->sd_rq)
+		return sa_sd;
+	if (__sdrq_alloc(cpu_map, d))
+		return sa_sd_rq;
 	d->rd = alloc_rootdomain();
 	if (!d->rd)
-		return sa_sd;
+		return sa_sd_rq;
 	return sa_rootdomain;
 }
 
@@ -6132,6 +6172,7 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
 					| 0*SD_WAKE_AFFINE
 					| 0*SD_SHARE_CPUPOWER
 					| 0*SD_SHARE_PKG_RESOURCES
+					| 1*SD_SHARE_POWERDOMAIN
 					| 1*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
 					| sd_local_flags(level)
@@ -6466,6 +6507,46 @@ static void __sdt_free(const struct cpumask *cpu_map)
 	}
 }
 
+static int __sdrq_alloc(const struct cpumask *cpu_map, struct s_data *d)
+{
+	int j;
+
+	for_each_cpu(j, cpu_map) {
+		struct sched_domain_rq *sd_rq;
+
+		sd_rq = kzalloc_node(sizeof(struct sched_domain_rq),
+				GFP_KERNEL, cpu_to_node(j));
+		if (!sd_rq)
+			return -ENOMEM;
+
+		*per_cpu_ptr(d->sd_rq, j) = sd_rq;
+	}
+
+	return 0;
+}
+
+static void __sdrq_free(const struct cpumask *cpu_map, struct s_data *d)
+{
+	int j;
+
+	for_each_cpu(j, cpu_map)
+		if (*per_cpu_ptr(d->sd_rq, j))
+			kfree(*per_cpu_ptr(d->sd_rq, j));
+}
+
+static void build_sched_domain_rq(struct s_data *d, int cpu)
+{
+	struct sched_domain_rq *sd_rq;
+	struct sched_domain *sd;
+
+	/* Attach sched_domain to sched_domain_rq */
+	sd = *per_cpu_ptr(d->sd, cpu);
+	sd_rq = *per_cpu_ptr(d->sd_rq, cpu);
+	sd_rq->sd = sd;
+	/* Init flags */
+	set_bit(NOHZ_IDLE, sched_rq_flags(sd_rq));
+}
+
 struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		struct s_data *d, const struct cpumask *cpu_map,
 		struct sched_domain_attr *attr, struct sched_domain *child,
@@ -6495,6 +6576,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 			       struct sched_domain_attr *attr)
 {
 	enum s_alloc alloc_state = sa_none;
+	struct sched_domain_rq *sd_rq;
 	struct sched_domain *sd;
 	struct s_data d;
 	int i, ret = -ENOMEM;
@@ -6547,11 +6629,18 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 		}
 	}
 
+	/* Init objects that must follow the sched_domain lifecycle */
+	for_each_cpu(i, cpu_map) {
+		build_sched_domain_rq(&d, i);
+	}
+
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
-		sd = *per_cpu_ptr(d.sd, i);
-		cpu_attach_domain(sd, d.rd, i);
+		sd_rq = *per_cpu_ptr(d.sd_rq, i);
+		cpu_attach_domain(sd_rq, d.rd, i);
+		/* claim allocation of sched_domain_rq object */
+		*per_cpu_ptr(d.sd_rq, i) = NULL;
 	}
 	rcu_read_unlock();
 
@@ -6982,7 +7071,7 @@ void __init sched_init(void)
 		rq->last_load_update_tick = jiffies;
 
 #ifdef CONFIG_SMP
-		rq->sd = NULL;
+		rq->sd_rq = NULL;
 		rq->rd = NULL;
 		rq->cpu_power = SCHED_POWER_SCALE;
 		rq->post_schedule = 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a33e5986fc..d66fa87ebdb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -160,6 +160,76 @@ void sched_init_granularity(void)
 	update_sysctl();
 }
 
+
+#ifdef CONFIG_SMP
+/*
+ * Save the id of the optimal CPU that should be used to pack small tasks
+ * The value -1 is used when no buddy has been found
+ */
+DEFINE_PER_CPU(int, sd_pack_buddy);
+
+/*
+ * Look for the best buddy CPU that can be used to pack small tasks
+ * We make the assumption that it doesn't wort to pack on CPU that share the
+ * same powerline. We look for the 1st sched_domain without the
+ * SD_SHARE_POWERDOMAIN flag. Then we look for the sched_group with the lowest
+ * power per core based on the assumption that their power efficiency is
+ * better
+ */
+void update_packing_domain(int cpu)
+{
+	struct sched_domain *sd;
+	int id = -1;
+
+	sd = highest_flag_domain(cpu, SD_SHARE_POWERDOMAIN);
+	if (!sd)
+		sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+	else
+		sd = sd->parent;
+
+	while (sd && (sd->flags & SD_LOAD_BALANCE)
+		&& !(sd->flags & SD_SHARE_POWERDOMAIN)) {
+		struct sched_group *sg = sd->groups;
+		struct sched_group *pack = sg;
+		struct sched_group *tmp;
+
+		/*
+		 * The sched_domain of a CPU points on the local sched_group
+		 * and the 1st CPU of this local group is a good candidate
+		 */
+		id = cpumask_first(sched_group_cpus(pack));
+
+		/* loop the sched groups to find the best one */
+		for (tmp = sg->next; tmp != sg; tmp = tmp->next) {
+			if (tmp->sgp->power * pack->group_weight >
+					pack->sgp->power * tmp->group_weight)
+				continue;
+
+			if ((tmp->sgp->power * pack->group_weight ==
+					pack->sgp->power * tmp->group_weight)
+			 && (cpumask_first(sched_group_cpus(tmp)) >= id))
+				continue;
+
+			/* we have found a better group */
+			pack = tmp;
+
+			/* Take the 1st CPU of the new group */
+			id = cpumask_first(sched_group_cpus(pack));
+		}
+
+		/* Look for another CPU than itself */
+		if (id != cpu)
+			break;
+
+		sd = sd->parent;
+	}
+
+	pr_debug("CPU%d packing on CPU%d\n", cpu, id);
+	per_cpu(sd_pack_buddy, cpu) = id;
+}
+
+#endif /* CONFIG_SMP */
+
 #if BITS_PER_LONG == 32
 # define WMULT_CONST	(~0UL)
 #else
@@ -1109,8 +1179,7 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
 }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-/* Only depends on SMP, FAIR_GROUP_SCHED may be removed when useful in lb */
-#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+#ifdef CONFIG_SMP
 /*
  * We choose a half-life close to 1 scheduling period.
  * Note: The tables below are dependent on this value.
@@ -1503,8 +1572,9 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 /* Add the load generated by se into cfs_rq's child load-average */
 static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
 						  struct sched_entity *se,
-						  int wakeup)
+						  int flags)
 {
+	int wakeup = flags & ENQUEUE_WAKEUP;
 	/*
 	 * We track migrations using entity decay_count <= 0, on a wake-up
 	 * migration we use a negative decay count to track the remote decays
@@ -1538,6 +1608,12 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
 		update_entity_load_avg(se, 0);
 	}
 
+	/*
+	 * set the initial load avg of new task same as its load
+	 * in order to avoid brust fork make few cpu too heavier
+	 */
+	if (flags & ENQUEUE_NEWTASK)
+		se->avg.load_avg_contrib = se->load.weight;
 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
 	/* we force update consideration on load-balancer moves */
 	update_cfs_rq_blocked_load(cfs_rq, !wakeup);
@@ -1562,6 +1638,16 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
 		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
 	} /* migrations, e.g. sleep=0 leave decay_count == 0 */
 }
+
+/*
+ * Update the rq's load with the elapsed idle time before a task is
+ * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
+ * be the only way to update the runnable statistic.
+ */
+void idle_exit(int this_cpu, struct rq *this_rq)
+{
+	update_rq_runnable_avg(this_rq, 0);
+}
 #else
 static inline void update_entity_load_avg(struct sched_entity *se,
 					  int update_cfs_rq) {}
@@ -1699,7 +1785,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
-	enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
+	enqueue_entity_load_avg(cfs_rq, se, flags);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
@@ -2897,7 +2983,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 /* Used instead of source_load when we know the type == 0 */
 static unsigned long weighted_cpuload(const int cpu)
 {
-	return cpu_rq(cpu)->load.weight;
+	return (unsigned long)cpu_rq(cpu)->cfs.runnable_load_avg;
 }
 
 /*
@@ -2944,7 +3030,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 	unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
 
 	if (nr_running)
-		return rq->load.weight / nr_running;
+		return (unsigned long)rq->cfs.runnable_load_avg / nr_running;
 
 	return 0;
 }
@@ -2973,7 +3059,8 @@ static void task_waking_fair(struct task_struct *p)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
- * effective_load() calculates the load change as seen from the root_task_group
+ * effective_load() calculates the runnable load average change as seen from
+ * the root_task_group
  *
  * Adding load to a group doesn't make a group heavier, but can cause movement
  * of group shares between cpus. Assuming the shares were perfectly aligned one
@@ -3021,6 +3108,9 @@ static void task_waking_fair(struct task_struct *p)
  * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
  * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
  * 4/7) times the weight of the group.
+ *
+ * After get effective_load of the load moving, will engaged the sched entity's
+ * runnable avg.
  */
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 {
@@ -3095,6 +3185,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	struct task_group *tg;
 	unsigned long weight;
 	int balanced;
+	int runnable_avg;
 
 	idx	  = sd->wake_idx;
 	this_cpu  = smp_processor_id();
@@ -3110,13 +3201,19 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	if (sync) {
 		tg = task_group(current);
 		weight = current->se.load.weight;
+		runnable_avg = current->se.avg.runnable_avg_sum * NICE_0_LOAD
+				/ (current->se.avg.runnable_avg_period + 1);
 
-		this_load += effective_load(tg, this_cpu, -weight, -weight);
-		load += effective_load(tg, prev_cpu, 0, -weight);
+		this_load += effective_load(tg, this_cpu, -weight, -weight)
+				* runnable_avg >> NICE_0_SHIFT;
+		load += effective_load(tg, prev_cpu, 0, -weight)
+				* runnable_avg >> NICE_0_SHIFT;
 	}
 
 	tg = task_group(p);
 	weight = p->se.load.weight;
+	runnable_avg = p->se.avg.runnable_avg_sum * NICE_0_LOAD
+				/ (p->se.avg.runnable_avg_period + 1);
 
 	/*
 	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
@@ -3128,16 +3225,18 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	 * task to be woken on this_cpu.
 	 */
 	if (this_load > 0) {
-		s64 this_eff_load, prev_eff_load;
+		s64 this_eff_load, prev_eff_load, tmp_eff_load;
 
 		this_eff_load = 100;
 		this_eff_load *= power_of(prev_cpu);
-		this_eff_load *= this_load +
-			effective_load(tg, this_cpu, weight, weight);
+		tmp_eff_load = effective_load(tg, this_cpu, weight, weight)
+				* runnable_avg >> NICE_0_SHIFT;
+		this_eff_load *= this_load + tmp_eff_load;
 
 		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
 		prev_eff_load *= power_of(this_cpu);
-		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
+		prev_eff_load *= load + (effective_load(tg, prev_cpu, 0, weight)
+						* runnable_avg >> NICE_0_SHIFT);
 
 		balanced = this_eff_load <= prev_eff_load;
 	} else
@@ -3292,6 +3391,50 @@ done:
 	return target;
 }
 
+static bool is_buddy_busy(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u32 sum = rq->avg.runnable_avg_sum;
+	u32 period = rq->avg.runnable_avg_period;
+
+	sum = min(sum, period);
+
+	/*
+	 * A busy buddy is a CPU with a high load or a small load with a lot of
+	 * running tasks.
+	 */
+	return (sum > (period / (rq->nr_running + 2)));
+}
+
+static bool is_light_task(struct task_struct *p)
+{
+	/* A light task runs less than 20% in average */
+	return ((p->se.avg.runnable_avg_sum  * 5) <
+			(p->se.avg.runnable_avg_period));
+}
+
+static int check_pack_buddy(int cpu, struct task_struct *p)
+{
+	int buddy = per_cpu(sd_pack_buddy, cpu);
+
+	/* No pack buddy for this CPU */
+	if (buddy == -1)
+		return false;
+
+	/* buddy is not an allowed CPU */
+	if (!cpumask_test_cpu(buddy, tsk_cpus_allowed(p)))
+		return false;
+
+	/*
+	 * If the task is a small one and the buddy is not overloaded,
+	 * we use buddy cpu
+	 */
+	if (!is_light_task(p) || is_buddy_busy(buddy))
+		return false;
+
+	return true;
+}
+
 /*
  * sched_balance_self: balance the current task (running on cpu) in domains
  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -3320,6 +3463,10 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 			want_affine = 1;
 		new_cpu = prev_cpu;
+
+		/* We pack only at wake up and not new task */
+		if (check_pack_buddy(new_cpu, p))
+			return per_cpu(sd_pack_buddy, new_cpu);
 	}
 
 	rcu_read_lock();
@@ -3394,12 +3541,6 @@ unlock:
 }
 
 /*
- * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
- * removed when useful for applications beyond shares distribution (e.g.
- * load-balance).
- */
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
  * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
  * cfs_rq_of(p) references at time of call are still valid and identify the
  * previous cpu.  However, the caller only guarantees p->pi_lock is held; no
@@ -3422,7 +3563,6 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu)
 		atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load);
 	}
 }
-#endif
 #endif /* CONFIG_SMP */
 
 static unsigned long
@@ -3970,6 +4110,15 @@ static unsigned long task_h_load(struct task_struct *p);
 
 static const unsigned int sched_nr_migrate_break = 32;
 
+static unsigned long task_h_load_avg(struct task_struct *p)
+{
+	u32 period = p->se.avg.runnable_avg_period;
+	if (!period)
+		return 0;
+
+	return task_h_load(p) * p->se.avg.runnable_avg_sum / period;
+}
+
 /*
  * move_tasks tries to move up to imbalance weighted load from busiest to
  * this_rq, as part of a balancing operation within domain "sd".
@@ -4005,12 +4154,13 @@ static int move_tasks(struct lb_env *env)
 		if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
 			goto next;
 
-		load = task_h_load(p);
+		load = task_h_load_avg(p);
 
-		if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
+		if (sched_feat(LB_MIN) && load < 204 && !env->sd->nr_balance_failed)
 			goto next;
 
-		if ((load / 2) > env->imbalance)
+		if ((load / 2) > env->imbalance &&
+			(env->idle != CPU_IDLE && env->idle != CPU_NEWLY_IDLE))
 			goto next;
 
 		if (!can_migrate_task(p, env))
@@ -4415,7 +4565,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 {
 	unsigned long nr_running, max_nr_running, min_nr_running;
 	unsigned long load, max_cpu_load, min_cpu_load;
-	unsigned int balance_cpu = -1, first_idle_cpu = 0;
+	unsigned int balance_cpu = -1, first_idle_cpu = 0, overloaded_cpu = 0;
 	unsigned long avg_load_per_task = 0;
 	int i;
 
@@ -4453,6 +4603,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 				max_nr_running = nr_running;
 			if (min_nr_running > nr_running)
 				min_nr_running = nr_running;
+
+			if ((load > rq->cpu_power)
+			 && ((rq->cpu_power*env->sd->imbalance_pct) < (env->dst_rq->cpu_power*100))
+			 && (load > target_load(env->dst_cpu, load_idx)))
+				overloaded_cpu = 1;
 		}
 
 		sgs->group_load += load;
@@ -4498,6 +4653,22 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 	    (max_nr_running - min_nr_running) > 1)
 		sgs->group_imb = 1;
 
+	/*
+	 * The load contrib of a CPU exceeds its capacity, we should try to
+	 * find a better CPU with more capacity
+	 */
+	if (overloaded_cpu)
+		sgs->group_imb = 1;
+
+	/*
+	 * When idle balancing pull tasks if more than one task per cpu
+	 * in group
+	 */
+	if (env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) {
+		if (group->group_weight < sgs->sum_nr_running)
+			sgs->group_imb = 1;
+	}
+
 	sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
 						SCHED_POWER_SCALE);
 	if (!sgs->group_capacity)
@@ -4725,8 +4896,13 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 			min(sds->this_load_per_task, sds->this_load + tmp);
 	pwr_move /= SCHED_POWER_SCALE;
 
-	/* Move if we gain throughput */
-	if (pwr_move > pwr_now)
+	/*
+	 * Move if we gain throughput, or if we have cpus idling while others
+	 * are running more than one task.
+	 */
+	if ((pwr_move > pwr_now) ||
+		(sds->busiest_group_weight < sds->busiest_nr_running &&
+		(env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE)))
 		env->imbalance = sds->busiest_load_per_task;
 }
 
@@ -4911,6 +5087,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 				     struct sched_group *group)
 {
 	struct rq *busiest = NULL, *rq;
+	struct rq *overloaded = NULL, *dst_rq = cpu_rq(env->dst_cpu);
 	unsigned long max_load = 0;
 	int i;
 
@@ -4930,6 +5107,17 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 		wl = weighted_cpuload(i);
 
 		/*
+		 * If the task requires more power than the current CPU
+		 * capacity and the dst_cpu has more capacity, keep the
+		 * dst_cpu in mind
+		 */
+		if ((rq->nr_running == 1)
+		 && (rq->cfs.runnable_load_avg > rq->cpu_power)
+		 && (rq->cfs.runnable_load_avg > dst_rq->cfs.runnable_load_avg)
+		 && ((rq->cpu_power*env->sd->imbalance_pct) < (dst_rq->cpu_power*100)))
+			overloaded = rq;
+
+		/*
 		 * When comparing with imbalance, use weighted_cpuload()
 		 * which is not scaled with the cpu power.
 		 */
@@ -4950,6 +5138,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 		}
 	}
 
+	if (!busiest)
+		busiest = overloaded;
+
 	return busiest;
 }
 
@@ -4977,6 +5168,9 @@ static int need_active_balance(struct lb_env *env)
 			return 1;
 	}
 
+	if ((power_of(env->src_cpu)*sd->imbalance_pct) < (power_of(env->dst_cpu)*100))
+		return 1;
+
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -5035,6 +5229,10 @@ redo:
 
 	ld_moved = 0;
 	lb_iterations = 1;
+
+	env.src_cpu   = busiest->cpu;
+	env.src_rq    = busiest;
+
 	if (busiest->nr_running > 1) {
 		/*
 		 * Attempt to move tasks. If find_busiest_group has found
@@ -5043,8 +5241,6 @@ redo:
 		 * correctly treated as an imbalance.
 		 */
 		env.flags |= LBF_ALL_PINNED;
-		env.src_cpu   = busiest->cpu;
-		env.src_rq    = busiest;
 		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
 		update_h_load(env.src_cpu);
@@ -5345,7 +5541,25 @@ static struct {
 
 static inline int find_new_ilb(int call_cpu)
 {
+	struct sched_domain *sd;
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
+	int buddy = per_cpu(sd_pack_buddy, call_cpu);
+
+	/*
+	 * If we have a pack buddy CPU, we try to run load balance on a CPU
+	 * that is close to the buddy.
+	 */
+	if (buddy != -1)
+		for_each_domain(buddy, sd) {
+			if (sd->flags & SD_SHARE_CPUPOWER)
+				continue;
+
+			ilb = cpumask_first_and(sched_domain_span(sd),
+					nohz.idle_cpus_mask);
+
+			if (ilb < nr_cpu_ids)
+				break;
+		}
 
 	if (ilb < nr_cpu_ids && idle_cpu(ilb))
 		return ilb;
@@ -5392,31 +5606,39 @@ static inline void nohz_balance_exit_idle(int cpu)
 
 static inline void set_cpu_sd_state_busy(void)
 {
+	struct sched_domain_rq *sd_rq;
 	struct sched_domain *sd;
 	int cpu = smp_processor_id();
 
-	if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
-		return;
-	clear_bit(NOHZ_IDLE, nohz_flags(cpu));
-
 	rcu_read_lock();
-	for_each_domain(cpu, sd)
+	sd_rq = get_sched_domain_rq(cpu);
+
+	if (!sd_rq || !test_bit(NOHZ_IDLE, sched_rq_flags(sd_rq)))
+		goto unlock;
+	clear_bit(NOHZ_IDLE, sched_rq_flags(sd_rq));
+
+	for_each_domain_from_rq(sd_rq, sd)
 		atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+unlock:
 	rcu_read_unlock();
 }
 
 void set_cpu_sd_state_idle(void)
 {
+	struct sched_domain_rq *sd_rq;
 	struct sched_domain *sd;
 	int cpu = smp_processor_id();
 
-	if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
-		return;
-	set_bit(NOHZ_IDLE, nohz_flags(cpu));
-
 	rcu_read_lock();
-	for_each_domain(cpu, sd)
+	sd_rq = get_sched_domain_rq(cpu);
+
+	if (!sd_rq || test_bit(NOHZ_IDLE, sched_rq_flags(sd_rq)))
+		goto unlock;
+	set_bit(NOHZ_IDLE, sched_rq_flags(sd_rq));
+
+	for_each_domain_from_rq(sd_rq, sd)
 		atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+unlock:
 	rcu_read_unlock();
 }
 
@@ -5621,6 +5843,10 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 	if (rq->nr_running >= 2)
 		goto need_kick;
 
+	/* load contrib is higher than cpu capacity */
+	if (rq->cfs.runnable_load_avg > rq->cpu_power)
+		goto need_kick;
+
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		struct sched_group *sg = sd->groups;
@@ -5673,7 +5899,12 @@ static void run_rebalance_domains(struct softirq_action *h)
 
 static inline int on_null_domain(int cpu)
 {
-	return !rcu_dereference_sched(cpu_rq(cpu)->sd);
+	struct sched_domain_rq *sd_rq =
+		rcu_dereference_sched(cpu_rq(cpu)->sd_rq);
+	struct sched_domain *sd = NULL;
+	if (sd_rq)
+		sd = sd_rq->sd;
+	return !sd;
 }
 
 /*
@@ -6114,9 +6345,8 @@ const struct sched_class fair_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_fair,
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	.migrate_task_rq	= migrate_task_rq_fair,
-#endif
+
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1ad1d2b5395..4760a2d2da4 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -65,7 +65,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
 
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
-SCHED_FEAT(LB_MIN, false)
+SCHED_FEAT(LB_MIN, true)
 
 /*
  * Apply the automatic NUMA scheduling policy. Enabled automatically
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index b6baf370cae..27cd379a754 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -13,6 +13,12 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
+
+static void pre_schedule_idle(struct rq *rq, struct task_struct *prev)
+{
+	/* Update rq's load with elapsed idle time */
+	idle_exit(smp_processor_id(), rq);
+}
 #endif /* CONFIG_SMP */
 /*
  * Idle tasks are unconditionally rescheduled:
@@ -86,6 +92,7 @@ const struct sched_class idle_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_idle,
+	.pre_schedule		= pre_schedule_idle,
 #endif
 
 	.set_curr_task          = set_curr_task_idle,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cc03cfdf469..01833d81684 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -227,12 +227,6 @@ struct cfs_rq {
 #endif
 
 #ifdef CONFIG_SMP
-/*
- * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
- * removed when useful for applications beyond shares distribution (e.g.
- * load-balance).
- */
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	/*
 	 * CFS Load tracking
 	 * Under CFS, load is tracked on a per-entity basis and aggregated up.
@@ -242,8 +236,7 @@ struct cfs_rq {
 	u64 runnable_load_avg, blocked_load_avg;
 	atomic64_t decay_counter, removed_load;
 	u64 last_decay;
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-/* These always depend on CONFIG_FAIR_GROUP_SCHED */
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	u32 tg_runnable_contrib;
 	u64 tg_load_contrib;
@@ -417,7 +410,7 @@ struct rq {
 
 #ifdef CONFIG_SMP
 	struct root_domain *rd;
-	struct sched_domain *sd;
+	struct sched_domain_rq *sd_rq;
 
 	unsigned long cpu_power;
 
@@ -505,21 +498,37 @@ DECLARE_PER_CPU(struct rq, runqueues);
 
 #ifdef CONFIG_SMP
 
-#define rcu_dereference_check_sched_domain(p) \
+#define rcu_dereference_check_sched_domain_rq(p) \
 	rcu_dereference_check((p), \
 			      lockdep_is_held(&sched_domains_mutex))
 
+#define get_sched_domain_rq(cpu) \
+	rcu_dereference_check_sched_domain_rq(cpu_rq(cpu)->sd_rq)
+
+#define rcu_dereference_check_sched_domain(cpu) ({ \
+	struct sched_domain_rq *__sd_rq = get_sched_domain_rq(cpu); \
+	struct sched_domain *__sd = NULL; \
+	if (__sd_rq) \
+		__sd = __sd_rq->sd; \
+	__sd; \
+})
+
+#define sched_rq_flags(sd_rq) (&sd_rq->flags)
+
 /*
- * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * The domain tree (rq->sd_rq) is protected by RCU's quiescent state transition.
  * See detach_destroy_domains: synchronize_sched for details.
  *
  * The domain tree of any CPU may only be accessed from within
  * preempt-disabled sections.
  */
 #define for_each_domain(cpu, __sd) \
-	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
+	for (__sd = rcu_dereference_check_sched_domain(cpu); \
 			__sd; __sd = __sd->parent)
 
+#define for_each_domain_from_rq(sd_rq, __sd) \
+	for (__sd = sd_rq->sd; __sd; __sd = __sd->parent)
+
 #define for_each_lower_domain(sd) for (; sd; sd = sd->child)
 
 /**
@@ -879,6 +888,8 @@ extern const struct sched_class idle_sched_class;
 
 extern void trigger_load_balance(struct rq *rq, int cpu);
 extern void idle_balance(int this_cpu, struct rq *this_rq);
+extern void idle_exit(int this_cpu, struct rq *this_rq);
+extern void update_packing_domain(int cpu);
 
 #else	/* CONFIG_SMP */
 
@@ -886,6 +897,14 @@ static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
 
+static inline void idle_exit(int this_cpu, struct rq *this_rq)
+{
+}
+
+static inline void update_packing_domain(int cpu)
+{
+}
+
 #endif
 
 extern void sysrq_sched_debug_show(void);
diff --git a/linaro/configs/big-LITTLE-MP.conf b/linaro/configs/big-LITTLE-MP.conf
new file mode 100644
index 00000000000..80bf45fa6e2
--- /dev/null
+++ b/linaro/configs/big-LITTLE-MP.conf
@@ -0,0 +1,5 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_NO_HZ=y
+CONFIG_SCHED_MC=y
author	Viresh Kumar <viresh.kumar@linaro.org>	2013-04-02 21:59:48 +0530
committer	Viresh Kumar <viresh.kumar@linaro.org>	2013-04-02 21:59:48 +0530
commit	e801e36a4d4ffccbcbb63f2ece35d13cad4f280b (patch)
tree	31676a773066bf680e58fa8d035ca88f3b64aafb
parent	089165092916ec98d675e078dc5782da5d29f181 (diff)
parent	63a67f13217cbb76305b79804e73e5c8b8315370 (diff)