From 720a2592cf1b9af86f30c44e8d89348826c03372 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:36 +0100
Subject: hrtimer: more hrtimer_init_sleeper() fallout.

Missed an instance...

  futex_lock_pi()
    hrtimer_init_sleeper()
    rt_mutex_timed_lock()
      rt_mutex_timed_fastlock()
        rt_mutex_slowlock()
          hrtimer_start()

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/rtmutex.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel')
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 0deef71ff8d..6522ae5b14a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	set_current_state(state);
 
 	/* Setup the timer, when timeout != NULL */
-	if (unlikely(timeout))
+	if (unlikely(timeout)) {
 		hrtimer_start(&timeout->timer, timeout->timer.expires,
 			      HRTIMER_MODE_ABS);
+		if (!hrtimer_active(&timeout->timer))
+			timeout->task = NULL;
+	}
 
 	for (;;) {
 		/* Try to acquire the lock: */
-- 
cgit v1.2.3


From 8ed3699682be75fd68281239c202ad3830f9c72d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:39 +0100
Subject: sched: fair-group: separate tg->shares from task_group_lock

On Mon, 2008-02-11 at 15:09 +0300, Denis V. Lunev wrote:
> BUG: sleeping function called from invalid context
> at /home/den/src/linux-netns26/kernel/mutex.c:209
> in_atomic():1, irqs_disabled():0
> no locks held by swapper/0.
> Pid: 0, comm: swapper Not tainted 2.6.24 #304
>
> Call Trace:
>  <IRQ>  [<ffffffff80252d1e>] ? __debug_show_held_locks+0x15/0x27
>  [<ffffffff8022c2a8>] __might_sleep+0xc0/0xdf
>  [<ffffffff8049f1df>] mutex_lock_nested+0x28/0x2a9
>  [<ffffffff80231294>] sched_destroy_group+0x18/0xea
>  [<ffffffff8023e835>] sched_destroy_user+0xd/0xf
>  [<ffffffff8023e8c1>] free_uid+0x8a/0xab
>  [<ffffffff80233e24>] __put_task_struct+0x3f/0xd3
>  [<ffffffff80236708>] delayed_put_task_struct+0x23/0x25
>  [<ffffffff8026fda7>] __rcu_process_callbacks+0x8d/0x215
>  [<ffffffff8026ff52>] rcu_process_callbacks+0x23/0x44
>  [<ffffffff8023a2ae>] __do_softirq+0x79/0xf8
>  [<ffffffff8020f8c3>] ? profile_pc+0x2a/0x67
>  [<ffffffff8020d38c>] call_softirq+0x1c/0x30
>  [<ffffffff8020f689>] do_softirq+0x61/0x9c
>  [<ffffffff8023a233>] irq_exit+0x51/0x53
>  [<ffffffff8021bd1a>] smp_apic_timer_interrupt+0x77/0xad
>  [<ffffffff8020ce3b>] apic_timer_interrupt+0x6b/0x70
>  <EOI>  [<ffffffff8020b0dd>] ? default_idle+0x43/0x76
>  [<ffffffff8020b0db>] ? default_idle+0x41/0x76
>  [<ffffffff8020b09a>] ? default_idle+0x0/0x76
>  [<ffffffff8020b186>] ? cpu_idle+0x76/0x98

separate the tg->shares protection from the task_group lock.

Reported-by: Denis V. Lunev <den@openvz.org>
Tested-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 3eedd526090..6b02276baaa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -232,10 +232,10 @@ static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
 static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
 static struct rt_rq *init_rt_rq_p[NR_CPUS];
 
-/* task_group_mutex serializes add/remove of task groups and also changes to
+/* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
  */
-static DEFINE_MUTEX(task_group_mutex);
+static DEFINE_SPINLOCK(task_group_lock);
 
 /* doms_cur_mutex serializes access to doms_cur[] array */
 static DEFINE_MUTEX(doms_cur_mutex);
@@ -295,16 +295,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 	p->rt.parent = task_group(p)->rt_se[cpu];
 }
 
-static inline void lock_task_group_list(void)
-{
-	mutex_lock(&task_group_mutex);
-}
-
-static inline void unlock_task_group_list(void)
-{
-	mutex_unlock(&task_group_mutex);
-}
-
 static inline void lock_doms_cur(void)
 {
 	mutex_lock(&doms_cur_mutex);
@@ -318,8 +308,6 @@ static inline void unlock_doms_cur(void)
 #else
 
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline void lock_task_group_list(void) { }
-static inline void unlock_task_group_list(void) { }
 static inline void lock_doms_cur(void) { }
 static inline void unlock_doms_cur(void) { }
 
@@ -7571,6 +7559,7 @@ struct task_group *sched_create_group(void)
 	struct rt_rq *rt_rq;
 	struct sched_rt_entity *rt_se;
 	struct rq *rq;
+	unsigned long flags;
 	int i;
 
 	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
@@ -7620,7 +7609,7 @@ struct task_group *sched_create_group(void)
 		init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
 	}
 
-	lock_task_group_list();
+	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
 		cfs_rq = tg->cfs_rq[i];
@@ -7629,7 +7618,7 @@ struct task_group *sched_create_group(void)
 		list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
 	}
 	list_add_rcu(&tg->list, &task_groups);
-	unlock_task_group_list();
+	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	return tg;
 
@@ -7650,9 +7639,10 @@ void sched_destroy_group(struct task_group *tg)
 {
 	struct cfs_rq *cfs_rq = NULL;
 	struct rt_rq *rt_rq = NULL;
+	unsigned long flags;
 	int i;
 
-	lock_task_group_list();
+	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
 		cfs_rq = tg->cfs_rq[i];
 		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
@@ -7660,7 +7650,7 @@ void sched_destroy_group(struct task_group *tg)
 		list_del_rcu(&rt_rq->leaf_rt_rq_list);
 	}
 	list_del_rcu(&tg->list);
-	unlock_task_group_list();
+	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	BUG_ON(!cfs_rq);
 
@@ -7728,13 +7718,16 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
 	}
 }
 
+static DEFINE_MUTEX(shares_mutex);
+
 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 {
 	int i;
 	struct cfs_rq *cfs_rq;
 	struct rq *rq;
+	unsigned long flags;
 
-	lock_task_group_list();
+	mutex_lock(&shares_mutex);
 	if (tg->shares == shares)
 		goto done;
 
@@ -7746,10 +7739,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	 * load_balance_fair) from referring to this group first,
 	 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
 	 */
+	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
 		cfs_rq = tg->cfs_rq[i];
 		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
 	}
+	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	/* wait for any ongoing reference to this group to finish */
 	synchronize_sched();
@@ -7769,13 +7764,15 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	 * Enable load balance activity on this group, by inserting it back on
 	 * each cpu's rq->leaf_cfs_rq_list.
 	 */
+	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
 		cfs_rq = tg->cfs_rq[i];
 		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 	}
+	spin_unlock_irqrestore(&task_group_lock, flags);
 done:
-	unlock_task_group_list();
+	mutex_unlock(&shares_mutex);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4cf5d77a6eefaa7a464bc34e8cb767356f10fd74 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:39 +0100
Subject: sched: fix incorrect irq lock usage in normalize_rt_tasks()

lockdep spotted this bogus irq locking. normalize_rt_tasks() can be called
from hardirq context through sysrq-n

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 6b02276baaa..88a17c7128c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7291,7 +7291,7 @@ void normalize_rt_tasks(void)
 	unsigned long flags;
 	struct rq *rq;
 
-	read_lock_irq(&tasklist_lock);
+	read_lock_irqsave(&tasklist_lock, flags);
 	do_each_thread(g, p) {
 		/*
 		 * Only normalize user tasks:
@@ -7317,16 +7317,16 @@ void normalize_rt_tasks(void)
 			continue;
 		}
 
-		spin_lock_irqsave(&p->pi_lock, flags);
+		spin_lock(&p->pi_lock);
 		rq = __task_rq_lock(p);
 
 		normalize_task(rq, p);
 
 		__task_rq_unlock(rq);
-		spin_unlock_irqrestore(&p->pi_lock, flags);
+		spin_unlock(&p->pi_lock);
 	} while_each_thread(g, p);
 
-	read_unlock_irq(&tasklist_lock);
+	read_unlock_irqrestore(&tasklist_lock, flags);
 }
 
 #endif /* CONFIG_MAGIC_SYSRQ */
-- 
cgit v1.2.3


From 23b0fdfc9299b137bd126e9dc22f62a59dae546d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:39 +0100
Subject: sched: rt-group: deal with PI

Steven mentioned the fun case where a lock holding task will be throttled.

Simple fix: allow groups that have boosted tasks to run anyway.

If a runnable task in a throttled group gets boosted the dequeue/enqueue
done by rt_mutex_setprio() is enough to unthrottle the group.

This is ofcourse not quite correct. Two possible ways forward are:
  - second prio array for boosted tasks
  - boost to a prio ceiling (this would also work for deadline scheduling)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c    |  3 +++
 kernel/sched_rt.c | 43 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 88a17c7128c..cecaea67ae9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -362,6 +362,8 @@ struct rt_rq {
 	u64 rt_time;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+	unsigned long rt_nr_boosted;
+
 	struct rq *rq;
 	struct list_head leaf_rt_rq_list;
 	struct task_group *tg;
@@ -7112,6 +7114,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 	rt_rq->rt_throttled = 0;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+	rt_rq->rt_nr_boosted = 0;
 	rt_rq->rq = rq;
 #endif
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 274b40d7bef..8d426938123 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -110,6 +110,23 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
 		dequeue_rt_entity(rt_se);
 }
 
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
+}
+
+static int rt_se_boosted(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *rt_rq = group_rt_rq(rt_se);
+	struct task_struct *p;
+
+	if (rt_rq)
+		return !!rt_rq->rt_nr_boosted;
+
+	p = rt_task_of(rt_se);
+	return p->prio != p->normal_prio;
+}
+
 #else
 
 static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
@@ -149,6 +166,10 @@ static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
 {
 }
 
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_throttled;
+}
 #endif
 
 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -172,7 +193,7 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
 		return 0;
 
 	if (rt_rq->rt_throttled)
-		return 1;
+		return rt_rq_throttled(rt_rq);
 
 	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
 	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
@@ -183,8 +204,10 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
 		rq->rt_throttled = 1;
 		rt_rq->rt_throttled = 1;
 
-		sched_rt_ratio_dequeue(rt_rq);
-		return 1;
+		if (rt_rq_throttled(rt_rq)) {
+			sched_rt_ratio_dequeue(rt_rq);
+			return 1;
+		}
 	}
 
 	return 0;
@@ -265,6 +288,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (rt_se_boosted(rt_se))
+		rt_rq->rt_nr_boosted++;
+#endif
 }
 
 static inline
@@ -295,6 +322,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (rt_se_boosted(rt_se))
+		rt_rq->rt_nr_boosted--;
+
+	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
+#endif
 }
 
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -303,7 +336,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
 	struct rt_prio_array *array = &rt_rq->active;
 	struct rt_rq *group_rq = group_rt_rq(rt_se);
 
-	if (group_rq && group_rq->rt_throttled)
+	if (group_rq && rt_rq_throttled(group_rq))
 		return;
 
 	list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
@@ -496,7 +529,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	if (unlikely(!rt_rq->rt_nr_running))
 		return NULL;
 
-	if (sched_rt_ratio_exceeded(rt_rq))
+	if (rt_rq_throttled(rt_rq))
 		return NULL;
 
 	do {
-- 
cgit v1.2.3


From 9f0c1e560c43327b70998e6c702b2f01321130d9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:39 +0100
Subject: sched: rt-group: interface

Change the rt_ratio interface to rt_runtime_us, to match rt_period_us.
This avoids picking a granularity for the ratio.

Extend the /sys/kernel/uids/<uid>/ interface to allow setting
the group's rt_runtime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c    | 141 ++++++++++++++++++++++++++++++++++++++++++------------
 kernel/sched_rt.c |  53 +++++++++-----------
 kernel/sysctl.c   |  32 ++++++-------
 kernel/user.c     |  28 +++++++++++
 4 files changed, 178 insertions(+), 76 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index cecaea67ae9..85a5fbff2b0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,7 +176,7 @@ struct task_group {
 	struct sched_rt_entity **rt_se;
 	struct rt_rq **rt_rq;
 
-	unsigned int rt_ratio;
+	u64 rt_runtime;
 
 	/*
 	 * shares assigned to a task group governs how much of cpu bandwidth
@@ -642,19 +642,21 @@ const_debug unsigned int sysctl_sched_features =
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
 /*
- * period over which we measure -rt task cpu usage in ms.
+ * period over which we measure -rt task cpu usage in us.
  * default: 1s
  */
-const_debug unsigned int sysctl_sched_rt_period = 1000;
+unsigned int sysctl_sched_rt_period = 1000000;
 
-#define SCHED_RT_FRAC_SHIFT	16
-#define SCHED_RT_FRAC		(1UL << SCHED_RT_FRAC_SHIFT)
+/*
+ * part of the period that we allow rt tasks to run in us.
+ * default: 0.95s
+ */
+int sysctl_sched_rt_runtime = 950000;
 
 /*
- * ratio of time -rt tasks may consume.
- * default: 95%
+ * single value that denotes runtime == period, ie unlimited time.
  */
-const_debug unsigned int sysctl_sched_rt_ratio = 62259;
+#define RUNTIME_INF	((u64)~0ULL)
 
 /*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -7187,7 +7189,8 @@ void __init sched_init(void)
 				&per_cpu(init_cfs_rq, i),
 				&per_cpu(init_sched_entity, i), i, 1);
 
-		init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+		init_task_group.rt_runtime =
+			sysctl_sched_rt_runtime * NSEC_PER_USEC;
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
 		init_tg_rt_entry(rq, &init_task_group,
 				&per_cpu(init_rt_rq, i),
@@ -7583,7 +7586,7 @@ struct task_group *sched_create_group(void)
 		goto err;
 
 	tg->shares = NICE_0_LOAD;
-	tg->rt_ratio = 0; /* XXX */
+	tg->rt_runtime = 0;
 
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
@@ -7785,30 +7788,76 @@ unsigned long sched_group_shares(struct task_group *tg)
 }
 
 /*
- * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
+ * Ensure that the real time constraints are schedulable.
  */
-int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
+static DEFINE_MUTEX(rt_constraints_mutex);
+
+static unsigned long to_ratio(u64 period, u64 runtime)
+{
+	if (runtime == RUNTIME_INF)
+		return 1ULL << 16;
+
+	runtime *= (1ULL << 16);
+	div64_64(runtime, period);
+	return runtime;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 {
 	struct task_group *tgi;
 	unsigned long total = 0;
+	unsigned long global_ratio =
+		to_ratio(sysctl_sched_rt_period,
+			 sysctl_sched_rt_runtime < 0 ?
+				RUNTIME_INF : sysctl_sched_rt_runtime);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(tgi, &task_groups, list)
-		total += tgi->rt_ratio;
-	rcu_read_unlock();
+	list_for_each_entry_rcu(tgi, &task_groups, list) {
+		if (tgi == tg)
+			continue;
 
-	if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
-		return -EINVAL;
+		total += to_ratio(period, tgi->rt_runtime);
+	}
+	rcu_read_unlock();
 
-	tg->rt_ratio = rt_ratio;
-	return 0;
+	return total + to_ratio(period, runtime) < global_ratio;
 }
 
-unsigned long sched_group_rt_ratio(struct task_group *tg)
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
 {
-	return tg->rt_ratio;
+	u64 rt_runtime, rt_period;
+	int err = 0;
+
+	rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
+	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+	if (rt_runtime_us == -1)
+		rt_runtime = rt_period;
+
+	mutex_lock(&rt_constraints_mutex);
+	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+	if (rt_runtime_us == -1)
+		rt_runtime = RUNTIME_INF;
+	tg->rt_runtime = rt_runtime;
+ unlock:
+	mutex_unlock(&rt_constraints_mutex);
+
+	return err;
 }
 
+long sched_group_rt_runtime(struct task_group *tg)
+{
+	u64 rt_runtime_us;
+
+	if (tg->rt_runtime == RUNTIME_INF)
+		return -1;
+
+	rt_runtime_us = tg->rt_runtime;
+	do_div(rt_runtime_us, NSEC_PER_USEC);
+	return rt_runtime_us;
+}
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
 
 #ifdef CONFIG_FAIR_CGROUP_SCHED
@@ -7884,17 +7933,49 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
 	return (u64) tg->shares;
 }
 
-static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
-		u64 rt_ratio_val)
+static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
+				struct file *file,
+				const char __user *userbuf,
+				size_t nbytes, loff_t *unused_ppos)
 {
-	return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
+	char buffer[64];
+	int retval = 0;
+	s64 val;
+	char *end;
+
+	if (!nbytes)
+		return -EINVAL;
+	if (nbytes >= sizeof(buffer))
+		return -E2BIG;
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+
+	/* strip newline if necessary */
+	if (nbytes && (buffer[nbytes-1] == '\n'))
+		buffer[nbytes-1] = 0;
+	val = simple_strtoll(buffer, &end, 0);
+	if (*end)
+		return -EINVAL;
+
+	/* Pass to subsystem */
+	retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+	if (!retval)
+		retval = nbytes;
+	return retval;
 }
 
-static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   char __user *buf, size_t nbytes,
+				   loff_t *ppos)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	char tmp[64];
+	long val = sched_group_rt_runtime(cgroup_tg(cgrp));
+	int len = sprintf(tmp, "%ld\n", val);
 
-	return (u64) tg->rt_ratio;
+	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
 static struct cftype cpu_files[] = {
@@ -7904,9 +7985,9 @@ static struct cftype cpu_files[] = {
 		.write_uint = cpu_shares_write_uint,
 	},
 	{
-		.name = "rt_ratio",
-		.read_uint = cpu_rt_ratio_read_uint,
-		.write_uint = cpu_rt_ratio_write_uint,
+		.name = "rt_runtime_us",
+		.read = cpu_rt_runtime_read,
+		.write = cpu_rt_runtime_write,
 	},
 };
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8d426938123..35825b28e42 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -57,12 +57,12 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
 	if (!rt_rq->tg)
-		return SCHED_RT_FRAC;
+		return RUNTIME_INF;
 
-	return rt_rq->tg->rt_ratio;
+	return rt_rq->tg->rt_runtime;
 }
 
 #define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
 static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 
-static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
 	}
 }
 
-static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
@@ -129,9 +129,12 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
 
 #else
 
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
-	return sysctl_sched_rt_ratio;
+	if (sysctl_sched_rt_runtime == -1)
+		return RUNTIME_INF;
+
+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
 }
 
 #define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -158,11 +161,11 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 	return NULL;
 }
 
-static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 }
 
-static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
 }
 
@@ -184,28 +187,24 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 	return rt_task_of(rt_se)->prio;
 }
 
-static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
+static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 {
-	unsigned int rt_ratio = sched_rt_ratio(rt_rq);
-	u64 period, ratio;
+	u64 runtime = sched_rt_runtime(rt_rq);
 
-	if (rt_ratio == SCHED_RT_FRAC)
+	if (runtime == RUNTIME_INF)
 		return 0;
 
 	if (rt_rq->rt_throttled)
 		return rt_rq_throttled(rt_rq);
 
-	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
-	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
-	if (rt_rq->rt_time > ratio) {
+	if (rt_rq->rt_time > runtime) {
 		struct rq *rq = rq_of_rt_rq(rt_rq);
 
 		rq->rt_throttled = 1;
 		rt_rq->rt_throttled = 1;
 
 		if (rt_rq_throttled(rt_rq)) {
-			sched_rt_ratio_dequeue(rt_rq);
+			sched_rt_rq_dequeue(rt_rq);
 			return 1;
 		}
 	}
@@ -219,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq)
 	u64 period;
 
 	while (rq->clock > rq->rt_period_expire) {
-		period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
+		period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
 		rq->rt_period_expire += period;
 
 		for_each_leaf_rt_rq(rt_rq, rq) {
-			unsigned long rt_ratio = sched_rt_ratio(rt_rq);
-			u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+			u64 runtime = sched_rt_runtime(rt_rq);
 
-			rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
-			if (rt_rq->rt_throttled) {
+			rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
+			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
 				rt_rq->rt_throttled = 0;
-				sched_rt_ratio_enqueue(rt_rq);
+				sched_rt_rq_enqueue(rt_rq);
 			}
 		}
 
@@ -262,12 +260,7 @@ static void update_curr_rt(struct rq *rq)
 	cpuacct_charge(curr, delta_exec);
 
 	rt_rq->rt_time += delta_exec;
-	/*
-	 * might make it a tad more accurate:
-	 *
-	 * update_sched_rt_period(rq);
-	 */
-	if (sched_rt_ratio_exceeded(rt_rq))
+	if (sched_rt_runtime_exceeded(rt_rq))
 		resched_task(curr);
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d41ef6b4cf7..924c674b76e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_rt_period_ms",
-		.data		= &sysctl_sched_rt_period,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_rt_ratio",
-		.data		= &sysctl_sched_rt_ratio,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
 	{
 		.ctl_name       = CTL_UNNUMBERED,
@@ -346,6 +330,22 @@ static struct ctl_table kern_table[] = {
 	},
 #endif
 #endif
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_rt_period_us",
+		.data		= &sysctl_sched_rt_period,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_rt_runtime_us",
+		.data		= &sysctl_sched_rt_runtime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_compat_yield",
diff --git a/kernel/user.c b/kernel/user.c
index 7d7900c5a1f..9f6d471bfd0 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -164,9 +164,37 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
 static struct kobj_attribute cpu_share_attr =
 	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
 
+static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   char *buf)
+{
+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
+
+	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
+}
+
+static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
+	unsigned long rt_runtime;
+	int rc;
+
+	sscanf(buf, "%lu", &rt_runtime);
+
+	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
+
+	return (rc ? rc : size);
+}
+
+static struct kobj_attribute cpu_rt_runtime_attr =
+	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
+
 /* default attributes per uid directory */
 static struct attribute *uids_attributes[] = {
 	&cpu_share_attr.attr,
+	&cpu_rt_runtime_attr.attr,
 	NULL
 };
 
-- 
cgit v1.2.3


From 052f1dc7eb02300b05170ae341ccd03b76207778 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:40 +0100
Subject: sched: rt-group: make rt groups scheduling configurable

Make the rt group scheduler compile time configurable.
Keep it experimental for now.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c    | 148 ++++++++++++++++++++++++++++++++++++++----------------
 kernel/sched_rt.c |  12 ++---
 kernel/user.c     |  22 +++++---
 3 files changed, 126 insertions(+), 56 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 85a5fbff2b0..5edc549edae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -155,7 +155,7 @@ struct rt_prio_array {
 	struct list_head queue[MAX_RT_PRIO];
 };
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
 #include <linux/cgroup.h>
 
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups);
 
 /* task group related information */
 struct task_group {
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 	struct cgroup_subsys_state css;
 #endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
 	struct cfs_rq **cfs_rq;
 
-	struct sched_rt_entity **rt_se;
-	struct rt_rq **rt_rq;
-
-	u64 rt_runtime;
-
 	/*
 	 * shares assigned to a task group governs how much of cpu bandwidth
 	 * is allocated to the group. The more shares a group has, the more is
@@ -213,24 +210,36 @@ struct task_group {
 	 *
 	 */
 	unsigned long shares;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct sched_rt_entity **rt_se;
+	struct rt_rq **rt_rq;
+
+	u64 rt_runtime;
+#endif
 
 	struct rcu_head rcu;
 	struct list_head list;
 };
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /* Default task group's sched entity on each cpu */
 static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
 /* Default task group's cfs_rq on each cpu */
 static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
 
-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
-
 static struct sched_entity *init_sched_entity_p[NR_CPUS];
 static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
+static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 
 static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
 static struct rt_rq *init_rt_rq_p[NR_CPUS];
+#endif
 
 /* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
@@ -240,6 +249,7 @@ static DEFINE_SPINLOCK(task_group_lock);
 /* doms_cur_mutex serializes access to doms_cur[] array */
 static DEFINE_MUTEX(doms_cur_mutex);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
 /* kernel thread that runs rebalance_shares() periodically */
 static struct task_struct *lb_monitor_task;
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused);
 
 static void set_se_shares(struct sched_entity *se, unsigned long shares);
 
+#ifdef CONFIG_USER_SCHED
+# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
+#else
+# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
+#endif
+
+#define MIN_GROUP_SHARES	2
+
+static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+#endif
+
 /* Default task group.
  *	Every task in system belong to this group at bootup.
  */
 struct task_group init_task_group = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	.se	= init_sched_entity_p,
 	.cfs_rq = init_cfs_rq_p,
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	.rt_se	= init_sched_rt_entity_p,
 	.rt_rq	= init_rt_rq_p,
-};
-
-#ifdef CONFIG_FAIR_USER_SCHED
-# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
-#else
-# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
 #endif
-
-#define MIN_GROUP_SHARES	2
-
-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+};
 
 /* return group to which a task belongs */
 static inline struct task_group *task_group(struct task_struct *p)
 {
 	struct task_group *tg;
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	tg = p->user->tg;
-#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+#elif defined(CONFIG_CGROUP_SCHED)
 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
 				struct task_group, css);
 #else
@@ -288,11 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p)
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
 	p->se.parent = task_group(p)->se[cpu];
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
 	p->rt.parent = task_group(p)->rt_se[cpu];
+#endif
 }
 
 static inline void lock_doms_cur(void)
@@ -311,7 +330,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
 static inline void lock_doms_cur(void) { }
 static inline void unlock_doms_cur(void) { }
 
-#endif	/* CONFIG_FAIR_GROUP_SCHED */
+#endif	/* CONFIG_GROUP_SCHED */
 
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
@@ -351,7 +370,7 @@ struct cfs_rq {
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned long rt_nr_running;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	int highest_prio; /* highest queued rt task prio */
 #endif
 #ifdef CONFIG_SMP
@@ -361,7 +380,7 @@ struct rt_rq {
 	int rt_throttled;
 	u64 rt_time;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	unsigned long rt_nr_boosted;
 
 	struct rq *rq;
@@ -437,6 +456,8 @@ struct rq {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this cpu: */
 	struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct list_head leaf_rt_rq_list;
 #endif
 
@@ -7104,7 +7125,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 	/* delimiter for bitsearch: */
 	__set_bit(MAX_RT_PRIO, array->bitmap);
 
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	rt_rq->highest_prio = MAX_RT_PRIO;
 #endif
 #ifdef CONFIG_SMP
@@ -7115,7 +7136,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 	rt_rq->rt_time = 0;
 	rt_rq->rt_throttled = 0;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	rt_rq->rt_nr_boosted = 0;
 	rt_rq->rq = rq;
 #endif
@@ -7139,7 +7160,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
 	se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
 	se->parent = NULL;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
 		struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
 		int cpu, int add)
@@ -7168,7 +7191,7 @@ void __init sched_init(void)
 	init_defrootdomain();
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 	list_add(&init_task_group.list, &task_groups);
 #endif
 
@@ -7189,6 +7212,8 @@ void __init sched_init(void)
 				&per_cpu(init_cfs_rq, i),
 				&per_cpu(init_sched_entity, i), i, 1);
 
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 		init_task_group.rt_runtime =
 			sysctl_sched_rt_runtime * NSEC_PER_USEC;
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
@@ -7381,9 +7406,9 @@ void set_curr_task(int cpu, struct task_struct *p)
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
-#ifdef CONFIG_SMP
+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
 /*
  * distribute shares of all task groups among their schedulable entities,
  * to reflect load distribution across cpus.
@@ -7539,20 +7564,28 @@ static void free_sched_group(struct task_group *tg)
 	int i;
 
 	for_each_possible_cpu(i) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
 		if (tg->se)
 			kfree(tg->se[i]);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 		if (tg->rt_rq)
 			kfree(tg->rt_rq[i]);
 		if (tg->rt_se)
 			kfree(tg->rt_se[i]);
+#endif
 	}
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	kfree(tg->cfs_rq);
 	kfree(tg->se);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	kfree(tg->rt_rq);
 	kfree(tg->rt_se);
+#endif
 	kfree(tg);
 }
 
@@ -7560,10 +7593,14 @@ static void free_sched_group(struct task_group *tg)
 struct task_group *sched_create_group(void)
 {
 	struct task_group *tg;
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct rt_rq *rt_rq;
 	struct sched_rt_entity *rt_se;
+#endif
 	struct rq *rq;
 	unsigned long flags;
 	int i;
@@ -7572,12 +7609,18 @@ struct task_group *sched_create_group(void)
 	if (!tg)
 		return ERR_PTR(-ENOMEM);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
 	if (!tg->cfs_rq)
 		goto err;
 	tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
 	if (!tg->se)
 		goto err;
+
+	tg->shares = NICE_0_LOAD;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
 	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
 	if (!tg->rt_rq)
 		goto err;
@@ -7585,12 +7628,13 @@ struct task_group *sched_create_group(void)
 	if (!tg->rt_se)
 		goto err;
 
-	tg->shares = NICE_0_LOAD;
 	tg->rt_runtime = 0;
+#endif
 
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!cfs_rq)
@@ -7601,6 +7645,10 @@ struct task_group *sched_create_group(void)
 		if (!se)
 			goto err;
 
+		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
 		rt_rq = kmalloc_node(sizeof(struct rt_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!rt_rq)
@@ -7611,17 +7659,21 @@ struct task_group *sched_create_group(void)
 		if (!rt_se)
 			goto err;
 
-		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
 		init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
+#endif
 	}
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		cfs_rq = tg->cfs_rq[i];
 		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 		rt_rq = tg->rt_rq[i];
 		list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+#endif
 	}
 	list_add_rcu(&tg->list, &task_groups);
 	spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7643,23 +7695,21 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
 /* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
-	struct cfs_rq *cfs_rq = NULL;
-	struct rt_rq *rt_rq = NULL;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
-		cfs_rq = tg->cfs_rq[i];
-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-		rt_rq = tg->rt_rq[i];
-		list_del_rcu(&rt_rq->leaf_rt_rq_list);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+		list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+		list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list);
+#endif
 	}
 	list_del_rcu(&tg->list);
 	spin_unlock_irqrestore(&task_group_lock, flags);
 
-	BUG_ON(!cfs_rq);
-
 	/* wait for possible concurrent references to cfs_rqs complete */
 	call_rcu(&tg->rcu, free_sched_group_rcu);
 }
@@ -7699,6 +7749,7 @@ void sched_move_task(struct task_struct *tsk)
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /* rq->lock to be locked by caller */
 static void set_se_shares(struct sched_entity *se, unsigned long shares)
 {
@@ -7786,7 +7837,9 @@ unsigned long sched_group_shares(struct task_group *tg)
 {
 	return tg->shares;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 /*
  * Ensure that the real time constraints are schedulable.
  */
@@ -7858,9 +7911,10 @@ long sched_group_rt_runtime(struct task_group *tg)
 	do_div(rt_runtime_us, NSEC_PER_USEC);
 	return rt_runtime_us;
 }
-#endif	/* CONFIG_FAIR_GROUP_SCHED */
+#endif
+#endif	/* CONFIG_GROUP_SCHED */
 
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 
 /* return corresponding task_group object of a cgroup */
 static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
@@ -7920,6 +7974,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	sched_move_task(tsk);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
 				u64 shareval)
 {
@@ -7932,7 +7987,9 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
 
 	return (u64) tg->shares;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
@@ -7977,18 +8034,23 @@ static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
 
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
+#endif
 
 static struct cftype cpu_files[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	{
 		.name = "shares",
 		.read_uint = cpu_shares_read_uint,
 		.write_uint = cpu_shares_write_uint,
 	},
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	{
 		.name = "rt_runtime_us",
 		.read = cpu_rt_runtime_read,
 		.write = cpu_rt_runtime_write,
 	},
+#endif
 };
 
 static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -8007,7 +8069,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.early_init	= 1,
 };
 
-#endif	/* CONFIG_FAIR_CGROUP_SCHED */
+#endif	/* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_CPUACCT
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 35825b28e42..f54792b175b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -55,7 +55,7 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 	return !list_empty(&rt_se->run_list);
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 
 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
@@ -177,7 +177,7 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 
 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 {
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct rt_rq *rt_rq = group_rt_rq(rt_se);
 
 	if (rt_rq)
@@ -269,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	rt_rq->rt_nr_running++;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	if (rt_se_prio(rt_se) < rt_rq->highest_prio)
 		rt_rq->highest_prio = rt_se_prio(rt_se);
 #endif
@@ -281,7 +281,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	if (rt_se_boosted(rt_se))
 		rt_rq->rt_nr_boosted++;
 #endif
@@ -293,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	WARN_ON(!rt_rq->rt_nr_running);
 	rt_rq->rt_nr_running--;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	if (rt_rq->rt_nr_running) {
 		struct rt_prio_array *array;
 
@@ -315,7 +315,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	if (rt_se_boosted(rt_se))
 		rt_rq->rt_nr_boosted--;
 
diff --git a/kernel/user.c b/kernel/user.c
index 9f6d471bfd0..7132022a040 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -57,7 +57,7 @@ struct user_struct root_user = {
 	.uid_keyring	= &root_user_keyring,
 	.session_keyring = &root_session_keyring,
 #endif
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	.tg		= &init_task_group,
 #endif
 };
@@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
 	return NULL;
 }
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 
 static void sched_destroy_user(struct user_struct *up)
 {
@@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p)
 	sched_move_task(p);
 }
 
-#else	/* CONFIG_FAIR_USER_SCHED */
+#else	/* CONFIG_USER_SCHED */
 
 static void sched_destroy_user(struct user_struct *up) { }
 static int sched_create_user(struct user_struct *up) { return 0; }
 static void sched_switch_user(struct task_struct *p) { }
 
-#endif	/* CONFIG_FAIR_USER_SCHED */
+#endif	/* CONFIG_USER_SCHED */
 
-#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
+#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
 
 static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
 static DEFINE_MUTEX(uids_mutex);
@@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void)
 }
 
 /* uid directory attributes */
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static ssize_t cpu_shares_show(struct kobject *kobj,
 			       struct kobj_attribute *attr,
 			       char *buf)
@@ -163,7 +164,9 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
 
 static struct kobj_attribute cpu_share_attr =
 	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
 				   struct kobj_attribute *attr,
 				   char *buf)
@@ -190,11 +193,16 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
 
 static struct kobj_attribute cpu_rt_runtime_attr =
 	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
+#endif
 
 /* default attributes per uid directory */
 static struct attribute *uids_attributes[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	&cpu_share_attr.attr,
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	&cpu_rt_runtime_attr.attr,
+#endif
 	NULL
 };
 
@@ -297,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
 	schedule_work(&up->work);
 }
 
-#else	/* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
+#else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
 
 int uids_sysfs_init(void) { return 0; }
 static inline int uids_user_create(struct user_struct *up) { return 0; }
@@ -401,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 		spin_lock_irq(&uidhash_lock);
 		up = uid_hash_find(uid, hashent);
 		if (up) {
-			/* This case is not possible when CONFIG_FAIR_USER_SCHED
+			/* This case is not possible when CONFIG_USER_SCHED
 			 * is defined, since we serialize alloc_uid() using
 			 * uids_mutex. Hence no need to call
 			 * sched_destroy_user() or remove_user_sysfs_dir().
-- 
cgit v1.2.3


From bccbe08a60973c873e6af6fdb9ec11ffb1a6e4de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:40 +0100
Subject: sched: rt-group: clean up the ifdeffery

Clean up some of the excessive ifdeffery introduces in the last patch.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 210 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 139 insertions(+), 71 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 5edc549edae..d2f4398c5e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7559,57 +7559,29 @@ static int load_balance_monitor(void *unused)
 }
 #endif	/* CONFIG_SMP */
 
-static void free_sched_group(struct task_group *tg)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void free_fair_sched_group(struct task_group *tg)
 {
 	int i;
 
 	for_each_possible_cpu(i) {
-#ifdef CONFIG_FAIR_GROUP_SCHED
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
 		if (tg->se)
 			kfree(tg->se[i]);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-		if (tg->rt_rq)
-			kfree(tg->rt_rq[i]);
-		if (tg->rt_se)
-			kfree(tg->rt_se[i]);
-#endif
 	}
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	kfree(tg->cfs_rq);
 	kfree(tg->se);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-	kfree(tg->rt_rq);
-	kfree(tg->rt_se);
-#endif
-	kfree(tg);
 }
 
-/* allocate runqueue etc for a new task group */
-struct task_group *sched_create_group(void)
+static int alloc_fair_sched_group(struct task_group *tg)
 {
-	struct task_group *tg;
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-	struct rt_rq *rt_rq;
-	struct sched_rt_entity *rt_se;
-#endif
 	struct rq *rq;
-	unsigned long flags;
 	int i;
 
-	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
-	if (!tg)
-		return ERR_PTR(-ENOMEM);
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
 	if (!tg->cfs_rq)
 		goto err;
@@ -7618,23 +7590,10 @@ struct task_group *sched_create_group(void)
 		goto err;
 
 	tg->shares = NICE_0_LOAD;
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
-	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
-	if (!tg->rt_rq)
-		goto err;
-	tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
-	if (!tg->rt_se)
-		goto err;
-
-	tg->rt_runtime = 0;
-#endif
 
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
 		cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!cfs_rq)
@@ -7646,9 +7605,78 @@ struct task_group *sched_create_group(void)
 			goto err;
 
 		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+	}
+
+	return 1;
+
+ err:
+	return 0;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+	list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
+			&cpu_rq(cpu)->leaf_cfs_rq_list);
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+	list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+}
+#else
+static inline void free_fair_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_fair_sched_group(struct task_group *tg)
+{
+	return 1;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
+static void free_rt_sched_group(struct task_group *tg)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (tg->rt_rq)
+			kfree(tg->rt_rq[i]);
+		if (tg->rt_se)
+			kfree(tg->rt_se[i]);
+	}
+
+	kfree(tg->rt_rq);
+	kfree(tg->rt_se);
+}
+
+static int alloc_rt_sched_group(struct task_group *tg)
+{
+	struct rt_rq *rt_rq;
+	struct sched_rt_entity *rt_se;
+	struct rq *rq;
+	int i;
+
+	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+	if (!tg->rt_rq)
+		goto err;
+	tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+	if (!tg->rt_se)
+		goto err;
+
+	tg->rt_runtime = 0;
+
+	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+
 		rt_rq = kmalloc_node(sizeof(struct rt_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!rt_rq)
@@ -7660,20 +7688,71 @@ struct task_group *sched_create_group(void)
 			goto err;
 
 		init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
-#endif
 	}
 
+	return 1;
+
+ err:
+	return 0;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+	list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
+			&cpu_rq(cpu)->leaf_rt_rq_list);
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+	list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
+}
+#else
+static inline void free_rt_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_rt_sched_group(struct task_group *tg)
+{
+	return 1;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+#endif
+
+static void free_sched_group(struct task_group *tg)
+{
+	free_fair_sched_group(tg);
+	free_rt_sched_group(tg);
+	kfree(tg);
+}
+
+/* allocate runqueue etc for a new task group */
+struct task_group *sched_create_group(void)
+{
+	struct task_group *tg;
+	unsigned long flags;
+	int i;
+
+	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+	if (!tg)
+		return ERR_PTR(-ENOMEM);
+
+	if (!alloc_fair_sched_group(tg))
+		goto err;
+
+	if (!alloc_rt_sched_group(tg))
+		goto err;
+
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
-		rq = cpu_rq(i);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-		cfs_rq = tg->cfs_rq[i];
-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-		rt_rq = tg->rt_rq[i];
-		list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
-#endif
+		register_fair_sched_group(tg, i);
+		register_rt_sched_group(tg, i);
 	}
 	list_add_rcu(&tg->list, &task_groups);
 	spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7700,12 +7779,8 @@ void sched_destroy_group(struct task_group *tg)
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
-#ifdef CONFIG_FAIR_GROUP_SCHED
-		list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-		list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list);
-#endif
+		unregister_fair_sched_group(tg, i);
+		unregister_rt_sched_group(tg, i);
 	}
 	list_del_rcu(&tg->list);
 	spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7780,8 +7855,6 @@ static DEFINE_MUTEX(shares_mutex);
 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 {
 	int i;
-	struct cfs_rq *cfs_rq;
-	struct rq *rq;
 	unsigned long flags;
 
 	mutex_lock(&shares_mutex);
@@ -7797,10 +7870,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
 	 */
 	spin_lock_irqsave(&task_group_lock, flags);
-	for_each_possible_cpu(i) {
-		cfs_rq = tg->cfs_rq[i];
-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-	}
+	for_each_possible_cpu(i)
+		unregister_fair_sched_group(tg, i);
 	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	/* wait for any ongoing reference to this group to finish */
@@ -7822,11 +7893,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	 * each cpu's rq->leaf_cfs_rq_list.
 	 */
 	spin_lock_irqsave(&task_group_lock, flags);
-	for_each_possible_cpu(i) {
-		rq = cpu_rq(i);
-		cfs_rq = tg->cfs_rq[i];
-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-	}
+	for_each_possible_cpu(i)
+		register_fair_sched_group(tg, i);
 	spin_unlock_irqrestore(&task_group_lock, flags);
 done:
 	mutex_unlock(&shares_mutex);
-- 
cgit v1.2.3


From b68aa2300cabeb96801369a4bb37a4f19f59ed84 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:40 +0100
Subject: sched: rt-group: refure unrunnable tasks

Refuse to accept or create RT tasks in groups that can't run them.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index d2f4398c5e6..f28f19e65b5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4584,6 +4584,15 @@ recheck:
 			return -EPERM;
 	}
 
+#ifdef CONFIG_RT_GROUP_SCHED
+	/*
+	 * Do not allow realtime tasks into groups that have no runtime
+	 * assigned.
+	 */
+	if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
+		return -EPERM;
+#endif
+
 	retval = security_task_setscheduler(p, policy, param);
 	if (retval)
 		return retval;
@@ -8028,9 +8037,15 @@ static int
 cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		      struct task_struct *tsk)
 {
+#ifdef CONFIG_RT_GROUP_SCHED
+	/* Don't accept realtime tasks when there is no way for them to run */
+	if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
+		return -EINVAL;
+#else
 	/* We don't support RT-tasks being in separate groups */
 	if (tsk->sched_class != &fair_sched_class)
 		return -EINVAL;
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3