aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Guittot <vincent.guittot@linaro.org>2020-07-10 17:24:26 +0200
committerVincent Guittot <vincent.guittot@linaro.org>2020-07-17 15:24:18 +0200
commit1936106bd8033b9deb0e5a743adcf1f75674dde3 (patch)
treead2da07510c7c6f96cb1091eba653e789695c7a4
parentc57b1153a58a6263863667296b5f00933fc46a4f (diff)
sched/fair: handle case of task_h_load() returning 0sched/backport-h_load-v5.4.51
[ Upstream commit a61fa2799ef9bf6c4f54cf7295036577cececc72 ] task_h_load() can return 0 in some situations like running stress-ng mmapfork, which forks thousands of threads, in a sched group on a 224 cores system. The load balance doesn't handle this correctly because env->imbalance never decreases and it will stop pulling tasks only after reaching loop_max, which can be equal to the number of running tasks of the cfs. Make sure that imbalance will be decreased by at least 1. misfit task is the other feature that doesn't handle correctly such situation although it's probably more difficult to face the problem because of the smaller number of CPUs and running tasks on heterogenous system. We can't simply ensure that task_h_load() returns at least one because it would imply to handle underflow in other places. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: <stable@vger.kernel.org> # v5.4 Link: https://lkml.kernel.org/r/20200710152426.16981-1-vincent.guittot@linaro.org
-rw-r--r--kernel/sched/fair.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2f81e4ae844e..9b16080093be 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3824,7 +3824,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
return;
}
- rq->misfit_task_load = task_h_load(p);
+ /*
+ * Make sure that misfit_task_load will not be null even if
+ * task_h_load() returns 0.
+ */
+ rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
}
#else /* CONFIG_SMP */
@@ -7407,7 +7411,15 @@ static int detach_tasks(struct lb_env *env)
if (!can_migrate_task(p, env))
goto next;
- load = task_h_load(p);
+ /*
+ * Depending of the number of CPUs and tasks and the
+ * cgroup hierarchy, task_h_load() can return a null
+ * value. Make sure that env->imbalance decreases
+ * otherwise detach_tasks() will stop only after
+ * detaching up to loop_max tasks.
+ */
+ load = max_t(unsigned long, task_h_load(p), 1);
+
if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
goto next;