aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/sched.c328
-rw-r--r--kernel/sched_fair.c155
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/sched_idletask.c6
-rw-r--r--kernel/sched_rt.c7
-rw-r--r--kernel/user.c4
7 files changed, 237 insertions, 269 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f17e9854c24..9e7ebde1331 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
struct take_cpu_down_param *param = _param;
int err;
- raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
- param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
return err;
+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
+
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
diff --git a/kernel/sched.c b/kernel/sched.c
index 13dd2db9fb2..669c49aa57f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
}
+static inline int rt_bandwidth_enabled(void)
+{
+ return sysctl_sched_rt_runtime >= 0;
+}
+
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
{
ktime_t now;
- if (rt_b->rt_runtime == RUNTIME_INF)
+ if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
return;
if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
#endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_FAIR_GROUP_SCHED */
+#else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_USER_SCHED */
/* task_group_lock serializes add/remove of task groups and also changes to
* a task group's cpu shares.
@@ -604,9 +609,9 @@ struct rq {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
{
- rq->curr->sched_class->check_preempt_curr(rq, p);
+ rq->curr->sched_class->check_preempt_curr(rq, p, sync);
}
static inline int cpu_of(struct rq *rq)
@@ -1102,7 +1107,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
}
-static void init_hrtick(void)
+static inline void init_hrtick(void)
{
}
#endif /* CONFIG_SMP */
@@ -1121,7 +1126,7 @@ static void init_rq_hrtick(struct rq *rq)
rq->hrtick_timer.function = hrtick;
rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
}
-#else
+#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
{
}
@@ -1133,7 +1138,7 @@ static inline void init_rq_hrtick(struct rq *rq)
static inline void init_hrtick(void)
{
}
-#endif
+#endif /* CONFIG_SCHED_HRTICK */
/*
* resched_task - mark a task 'to be rescheduled now'.
@@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
update_load_sub(&rq->load, load);
}
-#ifdef CONFIG_SMP
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
-static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
-
-static unsigned long cpu_avg_load_per_task(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
-
- if (rq->nr_running)
- rq->avg_load_per_task = rq->load.weight / rq->nr_running;
-
- return rq->avg_load_per_task;
-}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
+#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
+typedef int (*tg_visitor)(struct task_group *, void *);
/*
* Iterate the full tree, calling @down when first entering a node and @up when
* leaving it for the final time.
*/
-static void
-walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
+static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
{
struct task_group *parent, *child;
+ int ret;
rcu_read_lock();
parent = &root_task_group;
down:
- (*down)(parent, cpu, sd);
+ ret = (*down)(parent, data);
+ if (ret)
+ goto out_unlock;
list_for_each_entry_rcu(child, &parent->children, siblings) {
parent = child;
goto down;
@@ -1419,15 +1410,43 @@ down:
up:
continue;
}
- (*up)(parent, cpu, sd);
+ ret = (*up)(parent, data);
+ if (ret)
+ goto out_unlock;
child = parent;
parent = parent->parent;
if (parent)
goto up;
+out_unlock:
rcu_read_unlock();
+
+ return ret;
}
+static int tg_nop(struct task_group *tg, void *data)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+static unsigned long source_load(int cpu, int type);
+static unsigned long target_load(int cpu, int type);
+static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+
+static unsigned long cpu_avg_load_per_task(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ if (rq->nr_running)
+ rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+
+ return rq->avg_load_per_task;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
/*
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
* This needs to be done in a bottom-up fashion because the rq weight of a
* parent group depends on the shares of its child groups.
*/
-static void
-tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
+static int tg_shares_up(struct task_group *tg, void *data)
{
unsigned long rq_weight = 0;
unsigned long shares = 0;
+ struct sched_domain *sd = data;
int i;
for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
__update_group_shares_cpu(tg, i, shares, rq_weight);
spin_unlock_irqrestore(&rq->lock, flags);
}
+
+ return 0;
}
/*
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
* This needs to be done in a top-down fashion because the load of a child
* group is a fraction of its parents load.
*/
-static void
-tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
+static int tg_load_down(struct task_group *tg, void *data)
{
unsigned long load;
+ long cpu = (long)data;
if (!tg->parent) {
load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
}
tg->cfs_rq[cpu]->h_load = load;
-}
-static void
-tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
-{
+ return 0;
}
static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
sd->last_update = now;
- walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+ walk_tg_tree(tg_nop, tg_shares_up, sd);
}
}
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
spin_lock(&rq->lock);
}
-static void update_h_load(int cpu)
+static void update_h_load(long cpu)
{
- walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
+ walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
}
#else
@@ -1921,11 +1939,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
running = task_running(rq, p);
on_rq = p->se.on_rq;
ncsw = 0;
- if (!match_state || p->state == match_state) {
- ncsw = p->nivcsw + p->nvcsw;
- if (unlikely(!ncsw))
- ncsw = 1;
- }
+ if (!match_state || p->state == match_state)
+ ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, &flags);
/*
@@ -2285,7 +2300,7 @@ out_running:
trace_mark(kernel_sched_wakeup,
"pid %d state %ld ## rq %p task %p rq->curr %p",
p->pid, p->state, rq, p, rq->curr);
- check_preempt_curr(rq, p);
+ check_preempt_curr(rq, p, sync);
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
@@ -2420,7 +2435,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
trace_mark(kernel_sched_wakeup_new,
"pid %d state %ld ## rq %p task %p rq->curr %p",
p->pid, p->state, rq, p, rq->curr);
- check_preempt_curr(rq, p);
+ check_preempt_curr(rq, p, 0);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
@@ -2880,7 +2895,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
* Note that idle threads have a prio of MAX_PRIO, for this test
* to be always true for them.
*/
- check_preempt_curr(this_rq, p);
+ check_preempt_curr(this_rq, p, 0);
}
/*
@@ -4627,6 +4642,15 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
}
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
+/**
+ * complete: - signals a single thread waiting on this completion
+ * @x: holds the state of this particular completion
+ *
+ * This will wake up a single thread waiting on this completion. Threads will be
+ * awakened in the same order in which they were queued.
+ *
+ * See also complete_all(), wait_for_completion() and related routines.
+ */
void complete(struct completion *x)
{
unsigned long flags;
@@ -4638,6 +4662,12 @@ void complete(struct completion *x)
}
EXPORT_SYMBOL(complete);
+/**
+ * complete_all: - signals all threads waiting on this completion
+ * @x: holds the state of this particular completion
+ *
+ * This will wake up all threads waiting on this particular completion event.
+ */
void complete_all(struct completion *x)
{
unsigned long flags;
@@ -4658,10 +4688,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
wait.flags |= WQ_FLAG_EXCLUSIVE;
__add_wait_queue_tail(&x->wait, &wait);
do {
- if ((state == TASK_INTERRUPTIBLE &&
- signal_pending(current)) ||
- (state == TASK_KILLABLE &&
- fatal_signal_pending(current))) {
+ if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
@@ -4689,12 +4716,31 @@ wait_for_common(struct completion *x, long timeout, int state)
return timeout;
}
+/**
+ * wait_for_completion: - waits for completion of a task
+ * @x: holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout.
+ *
+ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+ * and interrupt capability. Also see complete().
+ */
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion);
+/**
+ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+ * @x: holds the state of this particular completion
+ * @timeout: timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible.
+ */
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
{
@@ -4702,6 +4748,13 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
}
EXPORT_SYMBOL(wait_for_completion_timeout);
+/**
+ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+ * @x: holds the state of this particular completion
+ *
+ * This waits for completion of a specific task to be signaled. It is
+ * interruptible.
+ */
int __sched wait_for_completion_interruptible(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@ -4711,6 +4764,14 @@ int __sched wait_for_completion_interruptible(struct completion *x)
}
EXPORT_SYMBOL(wait_for_completion_interruptible);
+/**
+ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+ * @x: holds the state of this particular completion
+ * @timeout: timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ */
unsigned long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
unsigned long timeout)
@@ -4719,6 +4780,13 @@ wait_for_completion_interruptible_timeout(struct completion *x,
}
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
+/**
+ * wait_for_completion_killable: - waits for completion of a task (killable)
+ * @x: holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It can be
+ * interrupted by a kill signal.
+ */
int __sched wait_for_completion_killable(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@ -5121,7 +5189,8 @@ recheck:
* Do not allow realtime tasks into groups that have no runtime
* assigned.
*/
- if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+ if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ task_group(p)->rt_bandwidth.rt_runtime == 0)
return -EPERM;
#endif
@@ -5957,7 +6026,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
set_task_cpu(p, dest_cpu);
if (on_rq) {
activate_task(rq_dest, p, 0);
- check_preempt_curr(rq_dest, p);
+ check_preempt_curr(rq_dest, p, 0);
}
done:
ret = 1;
@@ -8242,20 +8311,25 @@ void __might_sleep(char *file, int line)
#ifdef in_atomic
static unsigned long prev_jiffy; /* ratelimiting */
- if ((in_atomic() || irqs_disabled()) &&
- system_state == SYSTEM_RUNNING && !oops_in_progress) {
- if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
- return;
- prev_jiffy = jiffies;
- printk(KERN_ERR "BUG: sleeping function called from invalid"
- " context at %s:%d\n", file, line);
- printk("in_atomic():%d, irqs_disabled():%d\n",
- in_atomic(), irqs_disabled());
- debug_show_held_locks(current);
- if (irqs_disabled())
- print_irqtrace_events(current);
- dump_stack();
- }
+ if ((!in_atomic() && !irqs_disabled()) ||
+ system_state != SYSTEM_RUNNING || oops_in_progress)
+ return;
+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+ return;
+ prev_jiffy = jiffies;
+
+ printk(KERN_ERR
+ "BUG: sleeping function called from invalid context at %s:%d\n",
+ file, line);
+ printk(KERN_ERR
+ "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+ in_atomic(), irqs_disabled(),
+ current->pid, current->comm);
+
+ debug_show_held_locks(current);
+ if (irqs_disabled())
+ print_irqtrace_events(current);
+ dump_stack();
#endif
}
EXPORT_SYMBOL(__might_sleep);
@@ -8753,73 +8827,77 @@ static DEFINE_MUTEX(rt_constraints_mutex);
static unsigned long to_ratio(u64 period, u64 runtime)
{
if (runtime == RUNTIME_INF)
- return 1ULL << 16;
+ return 1ULL << 20;
- return div64_u64(runtime << 16, period);
+ return div64_u64(runtime << 20, period);
}
-#ifdef CONFIG_CGROUP_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
{
- struct task_group *tgi, *parent = tg->parent;
- unsigned long total = 0;
+ struct task_struct *g, *p;
- if (!parent) {
- if (global_rt_period() < period)
- return 0;
+ do_each_thread(g, p) {
+ if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+ return 1;
+ } while_each_thread(g, p);
- return to_ratio(period, runtime) <
- to_ratio(global_rt_period(), global_rt_runtime());
- }
+ return 0;
+}
- if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
- return 0;
+struct rt_schedulable_data {
+ struct task_group *tg;
+ u64 rt_period;
+ u64 rt_runtime;
+};
- rcu_read_lock();
- list_for_each_entry_rcu(tgi, &parent->children, siblings) {
- if (tgi == tg)
- continue;
+static int tg_schedulable(struct task_group *tg, void *data)
+{
+ struct rt_schedulable_data *d = data;
+ struct task_group *child;
+ unsigned long total, sum = 0;
+ u64 period, runtime;
- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
- tgi->rt_bandwidth.rt_runtime);
+ period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+ runtime = tg->rt_bandwidth.rt_runtime;
+
+ if (tg == d->tg) {
+ period = d->rt_period;
+ runtime = d->rt_runtime;
}
- rcu_read_unlock();
- return total + to_ratio(period, runtime) <=
- to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
- parent->rt_bandwidth.rt_runtime);
-}
-#elif defined CONFIG_USER_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
- struct task_group *tgi;
- unsigned long total = 0;
- unsigned long global_ratio =
- to_ratio(global_rt_period(), global_rt_runtime());
+ if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+ return -EBUSY;
- rcu_read_lock();
- list_for_each_entry_rcu(tgi, &task_groups, list) {
- if (tgi == tg)
- continue;
+ total = to_ratio(period, runtime);
+
+ list_for_each_entry_rcu(child, &tg->children, siblings) {
+ period = ktime_to_ns(child->rt_bandwidth.rt_period);
+ runtime = child->rt_bandwidth.rt_runtime;
- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
- tgi->rt_bandwidth.rt_runtime);
+ if (child == d->tg) {
+ period = d->rt_period;
+ runtime = d->rt_runtime;
+ }
+
+ sum += to_ratio(period, runtime);
}
- rcu_read_unlock();
- return total + to_ratio(period, runtime) < global_ratio;
+ if (sum > total)
+ return -EINVAL;
+
+ return 0;
}
-#endif
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
- struct task_struct *g, *p;
- do_each_thread(g, p) {
- if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
- return 1;
- } while_each_thread(g, p);
- return 0;
+ struct rt_schedulable_data data = {
+ .tg = tg,
+ .rt_period = period,
+ .rt_runtime = runtime,
+ };
+
+ return walk_tg_tree(tg_schedulable, tg_nop, &data);
}
static int tg_set_bandwidth(struct task_group *tg,
@@ -8829,14 +8907,9 @@ static int tg_set_bandwidth(struct task_group *tg,
mutex_lock(&rt_constraints_mutex);
read_lock(&tasklist_lock);
- if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
- err = -EBUSY;
- goto unlock;
- }
- if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
- err = -EINVAL;
+ err = __rt_schedulable(tg, rt_period, rt_runtime);
+ if (err)
goto unlock;
- }
spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8916,8 +8989,9 @@ static int sched_rt_global_constraints(void)
rt_runtime = tg->rt_bandwidth.rt_runtime;
mutex_lock(&rt_constraints_mutex);
- if (!__rt_schedulable(tg, rt_period, rt_runtime))
- ret = -EINVAL;
+ read_lock(&tasklist_lock);
+ ret = __rt_schedulable(tg, rt_period, rt_runtime);
+ read_unlock(&tasklist_lock);
mutex_unlock(&rt_constraints_mutex);
return ret;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c6d4b..c2089976345 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -409,64 +409,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
}
/*
- * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
- * that it favours >=0 over <0.
- *
- * -20 |
- * |
- * 0 --------+-------
- * .'
- * 19 .'
- *
- */
-static unsigned long
-calc_delta_asym(unsigned long delta, struct sched_entity *se)
-{
- struct load_weight lw = {
- .weight = NICE_0_LOAD,
- .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
- };
-
- for_each_sched_entity(se) {
- struct load_weight *se_lw = &se->load;
- unsigned long rw = cfs_rq_of(se)->load.weight;
-
-#ifdef CONFIG_FAIR_SCHED_GROUP
- struct cfs_rq *cfs_rq = se->my_q;
- struct task_group *tg = NULL
-
- if (cfs_rq)
- tg = cfs_rq->tg;
-
- if (tg && tg->shares < NICE_0_LOAD) {
- /*
- * scale shares to what it would have been had
- * tg->weight been NICE_0_LOAD:
- *
- * weight = 1024 * shares / tg->weight
- */
- lw.weight *= se->load.weight;
- lw.weight /= tg->shares;
-
- lw.inv_weight = 0;
-
- se_lw = &lw;
- rw += lw.weight - se->load.weight;
- } else
-#endif
-
- if (se->load.weight < NICE_0_LOAD) {
- se_lw = &lw;
- rw += NICE_0_LOAD - se->load.weight;
- }
-
- delta = calc_delta_mine(delta, rw, se_lw);
- }
-
- return delta;
-}
-
-/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
*/
@@ -1281,62 +1223,20 @@ static unsigned long wakeup_gran(struct sched_entity *se)
* + nice tasks.
*/
if (sched_feat(ASYM_GRAN))
- gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
- else
- gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
+ gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
return gran;
}
/*
- * Should 'se' preempt 'curr'.
- *
- * |s1
- * |s2
- * |s3
- * g
- * |<--->|c
- *
- * w(c, s1) = -1
- * w(c, s2) = 0
- * w(c, s3) = 1
- *
- */
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
-{
- s64 gran, vdiff = curr->vruntime - se->vruntime;
-
- if (vdiff < 0)
- return -1;
-
- gran = wakeup_gran(curr);
- if (vdiff > gran)
- return 1;
-
- return 0;
-}
-
-/* return depth at which a sched entity is present in the hierarchy */
-static inline int depth_se(struct sched_entity *se)
-{
- int depth = 0;
-
- for_each_sched_entity(se)
- depth++;
-
- return depth;
-}
-
-/*
* Preempt the current task with a newly woken task if needed:
*/
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
{
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se, *pse = &p->se;
- int se_depth, pse_depth;
+ s64 delta_exec;
if (unlikely(rt_prio(p->prio))) {
update_rq_clock(rq);
@@ -1348,6 +1248,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
if (unlikely(se == pse))
return;
+ /*
+ * We can come here with TIF_NEED_RESCHED already set from new task
+ * wake up path.
+ */
+ if (test_tsk_need_resched(curr))
+ return;
+
cfs_rq_of(pse)->next = pse;
/*
@@ -1360,33 +1267,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
if (!sched_feat(WAKEUP_PREEMPT))
return;
- /*
- * preemption test can be made between sibling entities who are in the
- * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
- * both tasks until we find their ancestors who are siblings of common
- * parent.
- */
-
- /* First walk up until both entities are at same depth */
- se_depth = depth_se(se);
- pse_depth = depth_se(pse);
-
- while (se_depth > pse_depth) {
- se_depth--;
- se = parent_entity(se);
- }
-
- while (pse_depth > se_depth) {
- pse_depth--;
- pse = parent_entity(pse);
- }
-
- while (!is_same_group(se, pse)) {
- se = parent_entity(se);
- pse = parent_entity(pse);
+ if (sched_feat(WAKEUP_OVERLAP) && sync &&
+ se->avg_overlap < sysctl_sched_migration_cost &&
+ pse->avg_overlap < sysctl_sched_migration_cost) {
+ resched_task(curr);
+ return;
}
- if (wakeup_preempt_entity(se, pse) == 1)
+ delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+ if (delta_exec > wakeup_gran(pse))
resched_task(curr);
}
@@ -1451,7 +1340,7 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
next = next->next;
} while (next != &cfs_rq->tasks && !entity_is_task(se));
- if (next == &cfs_rq->tasks)
+ if (next == &cfs_rq->tasks && !entity_is_task(se))
return NULL;
cfs_rq->balance_iterator = next;
@@ -1507,7 +1396,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
rcu_read_lock();
update_h_load(busiest_cpu);
- list_for_each_entry(tg, &task_groups, list) {
+ list_for_each_entry_rcu(tg, &task_groups, list) {
struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
@@ -1620,10 +1509,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
+ resched_task(rq->curr);
}
enqueue_task_fair(rq, p, 0);
- resched_task(rq->curr);
}
/*
@@ -1642,7 +1531,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,
if (p->prio > oldprio)
resched_task(rq->curr);
} else
- check_preempt_curr(rq, p);
+ check_preempt_curr(rq, p, 0);
}
/*
@@ -1659,7 +1548,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,
if (running)
resched_task(rq->curr);
else
- check_preempt_curr(rq, p);
+ check_preempt_curr(rq, p, 0);
}
/* Account for a task changing its policy or group.
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 9353ca78154..7c9e8f4a049 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1)
SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1)
+SCHED_FEAT(WAKEUP_OVERLAP, 0)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 3a4f92dbbe6..dec4ccabe2f 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
/*
* Idle tasks are unconditionally rescheduled:
*/
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
{
resched_task(rq->idle);
}
@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p,
if (running)
resched_task(rq->curr);
else
- check_preempt_curr(rq, p);
+ check_preempt_curr(rq, p, 0);
}
static void prio_changed_idle(struct rq *rq, struct task_struct *p,
@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
if (p->prio > oldprio)
resched_task(rq->curr);
} else
- check_preempt_curr(rq, p);
+ check_preempt_curr(rq, p, 0);
}
/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1113157b205..2e228bd5395 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -389,7 +389,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
int i, idle = 1;
cpumask_t span;
- if (rt_b->rt_runtime == RUNTIME_INF)
+ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return 1;
span = sched_rt_period_mask();
@@ -487,6 +487,9 @@ static void update_curr_rt(struct rq *rq)
curr->se.exec_start = rq->clock;
cpuacct_charge(curr, delta_exec);
+ if (!rt_bandwidth_enabled())
+ return;
+
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);
@@ -784,7 +787,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
/*
* Preempt the current task with a newly woken task if needed:
*/
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
{
if (p->prio < rq->curr->prio) {
resched_task(rq->curr);
diff --git a/kernel/user.c b/kernel/user.c
index 865ecf57a09..39d6159fae4 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
- return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
+ return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
}
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
unsigned long rt_runtime;
int rc;
- sscanf(buf, "%lu", &rt_runtime);
+ sscanf(buf, "%ld", &rt_runtime);
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);