From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 20 Oct 2010 16:01:12 -0700 Subject: sched: Make sched_param argument static in sched_setscheduler() callers Andrew Morton pointed out almost all sched_setscheduler() callers are using fixed parameters and can be converted to static. It reduces runtime memory use a little. Signed-off-by: KOSAKI Motohiro Reported-by: Andrew Morton Acked-by: James Morris Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0383601a927c..849c8670583d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1942,9 +1942,10 @@ extern int task_nice(const struct task_struct *p); extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler(struct task_struct *, int, + const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, - struct sched_param *); + const struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); -- cgit v1.2.3 From 48c5ccae88dcd989d9de507e8510313c6cbd352b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 13 Nov 2010 19:32:29 +0100 Subject: sched: Simplify cpu-hot-unplug task migration While discussing the need for sched_idle_next(), Oleg remarked that since try_to_wake_up() ensures sleeping tasks will end up running on a sane cpu, we can do away with migrate_live_tasks(). If we then extend the existing hack of migrating current from CPU_DYING to migrating the full rq worth of tasks from CPU_DYING, the need for the sched_idle_next() abomination disappears as well, since idle will be the only possible thread left after the migration thread stops. This greatly simplifies the hot-unplug task migration path, as can be seen from the resulting code reduction (and about half the new lines are comments). Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <1289851597.2109.547.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cd70cf91fde..29d953abb5ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1871,14 +1871,11 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #ifdef CONFIG_HOTPLUG_CPU -extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} #endif -extern void sched_idle_next(void); - #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) extern void wake_up_idle_cpu(int cpu); #else -- cgit v1.2.3 From 2069dd75c7d0f49355939e5586daf5a9ab216db7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Nov 2010 15:47:00 -0800 Subject: sched: Rewrite tg_shares_up) By tracking a per-cpu load-avg for each cfs_rq and folding it into a global task_group load on each tick we can rework tg_shares_up to be strictly per-cpu. This should improve cpu-cgroup performance for smp systems significantly. [ Paul: changed to use queueing cfs_rq + bug fixes ] Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20101115234937.580480400@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 29d953abb5ad..8abb8aa59664 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { } extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_shares_ratelimit; -extern unsigned int sysctl_sched_shares_thresh; extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { -- cgit v1.2.3 From a7a4f8a752ec734b2eab904fc863d5dc873de338 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Mon, 15 Nov 2010 15:47:06 -0800 Subject: sched: Add sysctl_sched_shares_window Introduce a new sysctl for the shares window and disambiguate it from sched_time_avg. A 10ms window appears to be a good compromise between accuracy and performance. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20101115234938.112173964@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8abb8aa59664..840f1277492f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1900,6 +1900,7 @@ extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, -- cgit v1.2.3 From 335d7afbfb71faac833734a94240c1e07cf0ead8 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 22 Nov 2010 15:47:36 +0100 Subject: mutexes, sched: Introduce arch_mutex_cpu_relax() The spinning mutex implementation uses cpu_relax() in busy loops as a compiler barrier. Depending on the architecture, cpu_relax() may do more than needed in this specific mutex spin loops. On System z we also give up the time slice of the virtual cpu in cpu_relax(), which prevents effective spinning on the mutex. This patch replaces cpu_relax() in the spinning mutex code with arch_mutex_cpu_relax(), which can be defined by each architecture that selects HAVE_ARCH_MUTEX_CPU_RELAX. The default is still cpu_relax(), so this patch should not affect other architectures than System z for now. Signed-off-by: Gerald Schaefer Signed-off-by: Peter Zijlstra LKML-Reference: <1290437256.7455.4.camel@thinkpad> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f363bc8fdc74..94b48bd40dd7 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX +#define arch_mutex_cpu_relax() cpu_relax() +#endif + #endif -- cgit v1.2.3 From 5091faa449ee0b7d73bc296a93bca9540fc51d0a Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 30 Nov 2010 14:18:03 +0100 Subject: sched: Add 'autogroup' scheduling feature: automated per session task groups A recurring complaint from CFS users is that parallel kbuild has a negative impact on desktop interactivity. This patch implements an idea from Linus, to automatically create task groups. Currently, only per session autogroups are implemented, but the patch leaves the way open for enhancement. Implementation: each task's signal struct contains an inherited pointer to a refcounted autogroup struct containing a task group pointer, the default for all tasks pointing to the init_task_group. When a task calls setsid(), a new task group is created, the process is moved into the new task group, and a reference to the preveious task group is dropped. Child processes inherit this task group thereafter, and increase it's refcount. When the last thread of a process exits, the process's reference is dropped, such that when the last process referencing an autogroup exits, the autogroup is destroyed. At runqueue selection time, IFF a task has no cgroup assignment, its current autogroup is used. Autogroup bandwidth is controllable via setting it's nice level through the proc filesystem: cat /proc//autogroup Displays the task's group and the group's nice level. echo > /proc//autogroup Sets the task group's shares to the weight of nice task. Setting nice level is rate limited for !admin users due to the abuse risk of task group locking. The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP=y is selected, but can be disabled via the boot option noautogroup, and can also be turned on/off on the fly via: echo [01] > /proc/sys/kernel/sched_autogroup_enabled ... which will automatically move tasks to/from the root task group. Signed-off-by: Mike Galbraith Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Cc: Markus Trippelsdorf Cc: Mathieu Desnoyers Cc: Paul Turner Cc: Oleg Nesterov [ Removed the task_group_path() debug code, and fixed !EVENTFD build failure. ] Signed-off-by: Ingo Molnar LKML-Reference: <1290281700.28711.9.camel@maggy.simson.net> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a5b92c70c737..9c2d46da486e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -509,6 +509,8 @@ struct thread_group_cputimer { spinlock_t lock; }; +struct autogroup; + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -576,6 +578,9 @@ struct signal_struct { struct tty_struct *tty; /* NULL if no tty */ +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. @@ -1927,6 +1932,24 @@ int sched_rt_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_compat_yield; +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; + +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +#ifdef CONFIG_PROC_FS +extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); +#endif +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +#endif + #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); -- cgit v1.2.3 From 806c09a7db457be3758e14b1f152761135d89af5 Mon Sep 17 00:00:00 2001 From: Dario Faggioli Date: Tue, 30 Nov 2010 19:51:33 +0100 Subject: sched: Make pushable_tasks CONFIG_SMP dependant As noted by Peter Zijlstra at https://lkml.org/lkml/2010/11/10/391 (while reviewing other stuff, though), tracking pushable tasks only makes sense on SMP systems. Signed-off-by: Dario Faggioli Acked-by: Steven Rostedt Acked-by: Gregory Haskins Signed-off-by: Peter Zijlstra LKML-Reference: <1291143093.2697.298.camel@Palantir> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 9 ++++++++- include/linux/sched.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f8c06ce0fa6..6ed8812bfe2d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,6 +12,13 @@ #include #include +#ifdef CONFIG_SMP +# define INIT_PUSHABLE_TASKS(tsk) \ + .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), +#else +# define INIT_PUSHABLE_TASKS(tsk) +#endif + extern struct files_struct init_files; extern struct fs_struct init_fs; @@ -137,7 +144,7 @@ extern struct cred init_cred; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \ + INIT_PUSHABLE_TASKS(tsk) \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c2d46da486e..4f92a239c14d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1240,7 +1240,9 @@ struct task_struct { #endif struct list_head tasks; +#ifdef CONFIG_SMP struct plist_node pushable_tasks; +#endif struct mm_struct *mm, *active_mm; #if defined(SPLIT_RSS_COUNTING) -- cgit v1.2.3 From 6bf4123760a5aece6e4829ce90b70b6ffd751d65 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 5 Jan 2011 12:50:16 +1100 Subject: sched: Change wait_for_completion_*_timeout() to return a signed long wait_for_completion_*_timeout() can return: 0: if the wait timed out -ve: if the wait was interrupted +ve: if the completion was completed. As they currently return an 'unsigned long', the last two cases are not easily distinguished which can easily result in buggy code, as is the case for the recently added wait_for_completion_interruptible_timeout() call in net/sunrpc/cache.c So change them both to return 'long'. As MAX_SCHEDULE_TIMEOUT is LONG_MAX, a large +ve return value should never overflow. Signed-off-by: NeilBrown Cc: Peter Zijlstra Cc: J. Bruce Fields Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: <20110105125016.64ccab0e@notabene.brown> Signed-off-by: Ingo Molnar --- include/linux/completion.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 36d57f74cd01..51494e6b5548 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); -extern unsigned long wait_for_completion_interruptible_timeout( - struct completion *x, unsigned long timeout); -extern unsigned long wait_for_completion_killable_timeout( - struct completion *x, unsigned long timeout); +extern long wait_for_completion_interruptible_timeout( + struct completion *x, unsigned long timeout); +extern long wait_for_completion_killable_timeout( + struct completion *x, unsigned long timeout); extern bool try_wait_for_completion(struct completion *x); extern bool completion_done(struct completion *x); -- cgit v1.2.3