diff options
author | Vincent Guittot <vincent.guittot@linaro.org> | 2022-11-08 16:21:20 +0100 |
---|---|---|
committer | Vincent Guittot <vincent.guittot@linaro.org> | 2022-11-09 12:13:35 +0100 |
commit | f864e961417814724e7f273055d58e8a855a0521 (patch) | |
tree | a4e3ebf3256e240cb2ea52b621f43ce48ac2d369 | |
parent | 2f7003400273ea7ac4741bb228b84073e7b1b829 (diff) |
sched/fair: Use augmented rbtree instead of 2 rbtreessched/latency-nice
Use augmented rbtree to save a wakeup_vruntime for the latency sensitive
tasks in addition to the normal vruntime instead of using a dedicated
rbtree.
a wakeup_vruntime field is added on sched_entity that will be set at enqueue
to take into account the latency offset wheen needed.
A min_vruntime field is also added to the sched_entity for the augmented
function to keep track of the minimum wakeup_vruntime of the rb subtree of a
node.
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/sched/core.c | 1 | ||||
-rw-r--r-- | kernel/sched/fair.c | 168 | ||||
-rw-r--r-- | kernel/sched/sched.h | 1 |
4 files changed, 118 insertions, 55 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 45b8c36e64cc..a7e32c33c88c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -547,7 +547,6 @@ struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; - struct rb_node latency_node; struct list_head group_node; unsigned int on_rq; @@ -555,6 +554,8 @@ struct sched_entity { u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; + u64 wakeup_vruntime; + u64 min_vruntime; /* rb augmented field */ u64 nr_migrations; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c6c67677d71f..3f42b1f61a7e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4342,7 +4342,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.nr_migrations = 0; p->se.vruntime = 0; INIT_LIST_HEAD(&p->se.group_node); - RB_CLEAR_NODE(&p->se.latency_node); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = NULL; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c2c75d531612..0bd8c7b94149 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -50,6 +50,7 @@ #include <asm/switch_to.h> #include <linux/sched/cond_resched.h> +#include <linux/rbtree_augmented.h> #include "sched.h" #include "stats.h" @@ -616,22 +617,74 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) max_vruntime(cfs_rq->min_vruntime, vruntime)); } -static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) +static inline bool wakeup_preempt_vruntime(struct sched_entity *se, bool exit) { - return entity_before(__node_2_se(a), __node_2_se(b)); + struct sched_entity *child; + u64 min = se->wakeup_vruntime; + if (se->run_node.rb_left) { + child = rb_entry(se->run_node.rb_left, struct sched_entity, run_node); + if (child->min_vruntime < min) + min = child->min_vruntime; + } + if (se->run_node.rb_right) { + child = rb_entry(se->run_node.rb_right, struct sched_entity, run_node); + if (child->min_vruntime < min) + min = child->min_vruntime; + } + if (exit && se->min_vruntime == min) + return true; + se->min_vruntime = min; + return false; } +RB_DECLARE_CALLBACKS(static, wakeup_preempt_augmented, + struct sched_entity, run_node, min_vruntime, wakeup_preempt_vruntime) + /* * Enqueue an entity into the rb-tree: */ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less); + struct rb_root_cached *tree = &cfs_rq->tasks_timeline; + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *node = &se->run_node; + struct rb_node *rb_parent = NULL; + struct sched_entity *parent; + bool leftmost = true; + + /* + * Because of the augmented function, we can't use directly + * rb_add_cached(). + */ + while (*link) { + rb_parent = *link; + parent = rb_entry(rb_parent, struct sched_entity, run_node); + + /* + * Update parent augmented field to keep track of the minimum wakeup + * vruntime of the subtree. + */ + if (se->wakeup_vruntime < parent->min_vruntime) + parent->min_vruntime = se->wakeup_vruntime; + + if (entity_before(se, parent)) { + link = &rb_parent->rb_left; + } else { + link = &rb_parent->rb_right; + leftmost = false; + } + } + + se->min_vruntime = se->wakeup_vruntime; + rb_link_node(node, rb_parent, link); + rb_insert_augmented_cached(node, tree, leftmost, + &wakeup_preempt_augmented); } static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline); + rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, + &wakeup_preempt_augmented); } struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) @@ -666,44 +719,23 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) } #endif -/************************************************************** - * Scheduling class tree data structure manipulation methods: - * for latency - */ - -static inline bool latency_before(struct sched_entity *a, - struct sched_entity *b) -{ - return (s64)(a->vruntime + a->latency_offset - b->vruntime - b->latency_offset) < 0; -} - -#define __latency_node_2_se(node) \ - rb_entry((node), struct sched_entity, latency_node) - -static inline bool __latency_less(struct rb_node *a, const struct rb_node *b) -{ - return latency_before(__latency_node_2_se(a), __latency_node_2_se(b)); -} - /* - * Enqueue an entity into the latency rb-tree: + * Compute the wakeup vruntime that can be used to preempt current. */ -static void __enqueue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) +static void __enqueue_latency(struct sched_entity *se, int flags) { + se->wakeup_vruntime = se->vruntime; - /* Only latency sensitive entity can be added to the list */ + /* Only latency sensitive entity got special preemption vruntime */ if (se->latency_offset >= 0) return; - if (!RB_EMPTY_NODE(&se->latency_node)) - return; - /* * An execution time less than sysctl_sched_min_granularity means that * the entity has been preempted by a higher sched class or an entity * with higher latency constraint. - * Put it back in the list so it gets a chance to run 1st during the - * next slice. + * Keep preemption property for such task so it gets a chance to run + * 1st during the next slice. */ if (!(flags & ENQUEUE_WAKEUP)) { u64 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; @@ -712,25 +744,61 @@ static void __enqueue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se, in return; } - rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less); + /* Waking task gets a wakeup vruntime */ + se->wakeup_vruntime += se->latency_offset; } -static void __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - if (!RB_EMPTY_NODE(&se->latency_node)) { - rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline); - RB_CLEAR_NODE(&se->latency_node); - } -} +/* + * Search an entity with a wakeup vruntime that matches a vruntime. + */ -static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq, u64 vruntime) { - struct rb_node *left = rb_first_cached(&cfs_rq->latency_timeline); + struct rb_root_cached *tree = &cfs_rq->tasks_timeline; + struct rb_node *node = tree->rb_root.rb_node; + struct sched_entity *se; - if (!left) + if (!node) return NULL; - return __latency_node_2_se(left); + se = rb_entry(node, struct sched_entity, run_node); + + /* There is no pending latency sensitive task */ + if (se->min_vruntime >= vruntime) + return NULL; + + /* There is at least one wakeup vrutime before the target vruntime */ + while (true) { + if (se->run_node.rb_left) { + struct sched_entity *left = rb_entry(se->run_node.rb_left, + struct sched_entity, run_node); + + if (left->min_vruntime <= se->min_vruntime) { + /* left has a lower wakeup_vruntime */ + se = left; + continue; + } + } + + /* This node has the lower wakeup_vruntime */ + if (se->wakeup_vruntime <= se->min_vruntime) + return se; + + if (se->run_node.rb_right) { + struct sched_entity *right = rb_entry(se->run_node.rb_right, + struct sched_entity, run_node); + + if (right->min_vruntime <= se->min_vruntime) { + /* right has a lower wakeup_vruntime */ + se = right; + continue; + } + } + + /* Should never happen */ + SCHED_WARN_ON(true); + return NULL; + } } #ifdef CONFIG_SCHED_DEBUG @@ -4509,8 +4577,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_stats_enqueue_fair(cfs_rq, se, flags); check_spread(cfs_rq, se); if (!curr) { + __enqueue_latency(se, flags); __enqueue_entity(cfs_rq, se); - __enqueue_latency(cfs_rq, se, flags); } se->on_rq = 1; @@ -4597,10 +4665,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) clear_buddies(cfs_rq, se); - if (se != cfs_rq->curr) { + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); - __dequeue_latency(cfs_rq, se); - } + se->on_rq = 0; account_entity_dequeue(cfs_rq, se); @@ -4689,7 +4756,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) */ update_stats_wait_end_fair(cfs_rq, se); __dequeue_entity(cfs_rq, se); - __dequeue_latency(cfs_rq, se); update_load_avg(cfs_rq, se, UPDATE_TG); } @@ -4771,7 +4837,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) } /* Check for latency sensitive entity waiting for running */ - latency = __pick_first_latency(cfs_rq); + latency = __pick_first_latency(cfs_rq, left->wakeup_vruntime); if (latency && (latency != se) && wakeup_preempt_entity(latency, se) < 1) se = latency; @@ -4798,8 +4864,8 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) if (prev->on_rq) { update_stats_wait_start_fair(cfs_rq, prev); /* Put 'current' back into the tree. */ + __enqueue_latency(prev, 0); __enqueue_entity(cfs_rq, prev); - __enqueue_latency(cfs_rq, prev, 0); /* in !on_rq case, update occurred at dequeue */ update_load_avg(cfs_rq, prev, 0); } @@ -11763,7 +11829,6 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) void init_cfs_rq(struct cfs_rq *cfs_rq) { cfs_rq->tasks_timeline = RB_ROOT_CACHED; - cfs_rq->latency_timeline = RB_ROOT_CACHED; u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20))); #ifdef CONFIG_SMP raw_spin_lock_init(&cfs_rq->removed.lock); @@ -12077,7 +12142,6 @@ int sched_group_set_latency(struct task_group *tg, s64 latency) rq_lock_irqsave(rq, &rf); - __dequeue_latency(se->cfs_rq, se); WRITE_ONCE(se->latency_offset, latency); rq_unlock_irqrestore(rq, &rf); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 91ec36c1158b..95d4be4f3af6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -599,7 +599,6 @@ struct cfs_rq { #endif struct rb_root_cached tasks_timeline; - struct rb_root_cached latency_timeline; /* * 'curr' points to currently running entity on this cfs_rq. |