summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Schneider <valentin.schneider@arm.com>2021-01-13 18:31:41 +0000
committerPeter Zijlstra <peterz@infradead.org>2021-01-22 15:09:41 +0100
commit36c6e17bf16922935a5a0dd073d5b032d34aa73d (patch)
tree4f20b106f92b74f80d3b033512ef58ffad252ffe
parent9c7d9017a49fb8516c13b7bff59b7da2abed23e1 (diff)
sched/core: Print out straggler tasks in sched_cpu_dying()
Since commit 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug") tasks are expected to move themselves out of a out-going CPU. For most tasks this will be done automagically via BALANCE_PUSH, but percpu kthreads will have to cooperate and move themselves away one way or another. Currently, some percpu kthreads (workqueues being a notable exemple) do not cooperate nicely and can end up on an out-going CPU at the time sched_cpu_dying() is invoked. Print the dying rq's tasks to shed some light on the stragglers. Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> Tested-by: Valentin Schneider <valentin.schneider@arm.com> Link: https://lkml.kernel.org/r/20210113183141.11974-1-valentin.schneider@arm.com
-rw-r--r--kernel/sched/core.c24
1 files changed, 23 insertions, 1 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 15d2562118d1..627534fa9196 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7574,6 +7574,25 @@ static void calc_load_migrate(struct rq *rq)
atomic_long_add(delta, &calc_load_tasks);
}
+static void dump_rq_tasks(struct rq *rq, const char *loglvl)
+{
+ struct task_struct *g, *p;
+ int cpu = cpu_of(rq);
+
+ lockdep_assert_held(&rq->lock);
+
+ printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running);
+ for_each_process_thread(g, p) {
+ if (task_cpu(p) != cpu)
+ continue;
+
+ if (!task_on_rq_queued(p))
+ continue;
+
+ printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm);
+ }
+}
+
int sched_cpu_dying(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -7583,7 +7602,10 @@ int sched_cpu_dying(unsigned int cpu)
sched_tick_stop(cpu);
rq_lock_irqsave(rq, &rf);
- BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
+ if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) {
+ WARN(true, "Dying CPU not properly vacated!");
+ dump_rq_tasks(rq, KERN_WARNING);
+ }
rq_unlock_irqrestore(rq, &rf);
calc_load_migrate(rq);