From dd12f48d4e8774415b528d3991ae47c28f26e1ac Mon Sep 17 00:00:00 2001 From: "Bhavesh P. Davda" Date: Wed, 17 Aug 2005 12:26:33 -0600 Subject: [PATCH] NPTL signal delivery deadlock fix This bug is quite subtle and only happens in a very interesting situation where a real-time threaded process is in the middle of a coredump when someone whacks it with a SIGKILL. However, this deadlock leaves the system pretty hosed and you have to reboot to recover. Not good for real-time priority-preemption applications like our telephony application, with 90+ real-time (SCHED_FIFO and SCHED_RR) processes, many of them multi-threaded, interacting with each other for high volume call processing. Acked-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index ca1186eef93..d282fea8113 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -692,7 +692,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) { struct task_struct *t; - if (p->flags & SIGNAL_GROUP_EXIT) + if (p->signal->flags & SIGNAL_GROUP_EXIT) /* * The process is in the middle of dying already. */ -- cgit v1.2.3 From 024f474795af7a0d41bd6d60061d78bd66d13f56 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 18 Aug 2005 11:24:19 -0700 Subject: [PATCH] Make RLIMIT_NICE ranges consistent with getpriority(2) As suggested by Michael Kerrisk , make RLIMIT_NICE consistent with getpriority before it becomes available in released glibc. Signed-off-by: Matt Mackall Acked-by: Ingo Molnar Acked-by: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index a646e4f36c4..5f889d0cbfc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3378,8 +3378,8 @@ EXPORT_SYMBOL(set_user_nice); */ int can_nice(const task_t *p, const int nice) { - /* convert nice value [19,-20] to rlimit style value [0,39] */ - int nice_rlim = 19 - nice; + /* convert nice value [19,-20] to rlimit style value [1,40] */ + int nice_rlim = 20 - nice; return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || capable(CAP_SYS_NICE)); } -- cgit v1.2.3 From 4c5640cb5f5a6fd780d99397eca028b575cb1206 Mon Sep 17 00:00:00 2001 From: David Meybohm Date: Mon, 22 Aug 2005 13:11:08 -0700 Subject: [PATCH] preempt race in getppid With CONFIG_PREEMPT && !CONFIG_SMP, it's possible for sys_getppid to return a bogus value if the parent's task_struct gets reallocated after current->group_leader->real_parent is read: asmlinkage long sys_getppid(void) { int pid; struct task_struct *me = current; struct task_struct *parent; parent = me->group_leader->real_parent; RACE HERE => for (;;) { pid = parent->tgid; #ifdef CONFIG_SMP { struct task_struct *old = parent; /* * Make sure we read the pid before re-reading the * parent pointer: */ smp_rmb(); parent = me->group_leader->real_parent; if (old != parent) continue; } #endif break; } return pid; } If the process gets preempted at the indicated point, the parent process can go ahead and call exit() and then get wait()'d on to reap its task_struct. When the preempted process gets resumed, it will not do any further checks of the parent pointer on !CONFIG_SMP: it will read the bad pid and return. So, the same algorithm used when SMP is enabled should be used when preempt is enabled, which will recheck ->real_parent in this case. Signed-off-by: David Meybohm Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index f2a11887a72..5377f40723f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1023,7 +1023,7 @@ asmlinkage long sys_getppid(void) parent = me->group_leader->real_parent; for (;;) { pid = parent->tgid; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) { struct task_struct *old = parent; -- cgit v1.2.3 From d10689b68aff7b48e3de1a3f7fcd6567bd2905af Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 23 Aug 2005 01:04:27 -0700 Subject: [PATCH] cpu_exclusive sched domains on partial nodes temp fix This keeps the kernel/cpuset.c routine update_cpu_domains() from invoking the sched.c routine partition_sched_domains() if the cpuset in question doesn't fall on node boundaries. I have boot tested this on an SN2, and with the help of a couple of ad hoc printk's, determined that it does indeed avoid calling the partition_sched_domains() routine on partial nodes. I did not directly verify that this avoids setting up bogus sched domains or avoids the oops that Hawkes saw. This patch imposes a silent artificial constraint on which cpusets can be used to define dynamic sched domains. This patch should allow proceeding with this new feature in 2.6.13 for the configurations in which it is useful (node alligned sched domains) while avoiding trying to setup sched domains in the less useful cases that can cause the kernel corruption and oops. Signed-off-by: Paul Jackson Acked-by: Ingo Molnar Acked-by: Dinakar Guniguntala Acked-by: John Hawkes Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 21a4e3b2cbd..e0d296c5b30 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -635,6 +635,23 @@ static void update_cpu_domains(struct cpuset *cur) if (par == NULL || cpus_empty(cur->cpus_allowed)) return; + /* + * Hack to avoid 2.6.13 partial node dynamic sched domain bug. + * Require the 'cpu_exclusive' cpuset to include all (or none) + * of the CPUs on each node, or return w/o changing sched domains. + * Remove this hack when dynamic sched domains fixed. + */ + { + int i, j; + + for_each_cpu_mask(i, cur->cpus_allowed) { + for_each_cpu_mask(j, node_to_cpumask(cpu_to_node(i))) { + if (!cpu_isset(j, cur->cpus_allowed)) + return; + } + } + } + /* * Get all cpus from parent's cpus_allowed not part of exclusive * children -- cgit v1.2.3 From 3725822f7c7134249addcd4549aff086950c8090 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 24 Aug 2005 04:15:10 -0700 Subject: [PATCH] cpu_exclusive sched domains build fix As reported by Paul Mackerras , the previous patch "cpu_exclusive sched domains fix" broke the ppc64 build with CONFIC_CPUSET, yielding error messages: kernel/cpuset.c: In function 'update_cpu_domains': kernel/cpuset.c:648: error: invalid lvalue in unary '&' kernel/cpuset.c:648: error: invalid lvalue in unary '&' On some arch's, the node_to_cpumask() is a function, returning a cpumask_t. But the for_each_cpu_mask() requires an lvalue mask. The following patch fixes this build failure by making a copy of the cpumask_t on the stack. Signed-off-by: Paul Jackson Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e0d296c5b30..d7f4d0c9573 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -645,7 +645,9 @@ static void update_cpu_domains(struct cpuset *cur) int i, j; for_each_cpu_mask(i, cur->cpus_allowed) { - for_each_cpu_mask(j, node_to_cpumask(cpu_to_node(i))) { + cpumask_t mask = node_to_cpumask(cpu_to_node(i)); + + for_each_cpu_mask(j, mask) { if (!cpu_isset(j, cur->cpus_allowed)) return; } -- cgit v1.2.3 From ca2f3daf779f5e89d14e9783fcfd7920842df9e9 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Thu, 25 Aug 2005 12:47:50 -0700 Subject: [PATCH] undo partial cpu_exclusive sched domain disabling The partial disabling of Dinakar's new facility to allow cpu_exclusive cpusets to define dynamic sched domains doesn't go far enough. At the suggestion of Nick Piggin and Dinakar, let us instead totally disable this facility for 2.6.13, in order to avoid problems first reported by John Hawkes (corrupt sched data structures and kernel oops). This patch removes the partial disabling code in 2.6.13-rc7, in anticipation of the next patch, which will totally disable it instead. Signed-off-by: Paul Jackson Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d7f4d0c9573..21a4e3b2cbd 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -635,25 +635,6 @@ static void update_cpu_domains(struct cpuset *cur) if (par == NULL || cpus_empty(cur->cpus_allowed)) return; - /* - * Hack to avoid 2.6.13 partial node dynamic sched domain bug. - * Require the 'cpu_exclusive' cpuset to include all (or none) - * of the CPUs on each node, or return w/o changing sched domains. - * Remove this hack when dynamic sched domains fixed. - */ - { - int i, j; - - for_each_cpu_mask(i, cur->cpus_allowed) { - cpumask_t mask = node_to_cpumask(cpu_to_node(i)); - - for_each_cpu_mask(j, mask) { - if (!cpu_isset(j, cur->cpus_allowed)) - return; - } - } - } - /* * Get all cpus from parent's cpus_allowed not part of exclusive * children -- cgit v1.2.3 From 212d6d2237f60bc28c1518f8abf9d3ed6c17574a Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Thu, 25 Aug 2005 12:47:56 -0700 Subject: [PATCH] completely disable cpu_exclusive sched domain At the suggestion of Nick Piggin and Dinakar, totally disable the facility to allow cpu_exclusive cpusets to define dynamic sched domains in Linux 2.6.13, in order to avoid problems first reported by John Hawkes (corrupt sched data structures and kernel oops). This has been built for ppc64, i386, ia64, x86_64, sparc, alpha. It has been built, booted and tested for cpuset functionality on an SN2 (ia64). Dinakar or Nick - could you verify that it for sure does avoid the problems Hawkes reported. Hawkes is out of town, and I don't have the recipe to reproduce what he found. Signed-off-by: Paul Jackson Acked-by: Nick Piggin Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 21a4e3b2cbd..8ab1b4e518b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -627,6 +627,14 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) * Call with cpuset_sem held. May nest a call to the * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. */ + +/* + * Hack to avoid 2.6.13 partial node dynamic sched domain bug. + * Disable letting 'cpu_exclusive' cpusets define dynamic sched + * domains, until the sched domain can handle partial nodes. + * Remove this #if hackery when sched domains fixed. + */ +#if 0 static void update_cpu_domains(struct cpuset *cur) { struct cpuset *c, *par = cur->parent; @@ -667,6 +675,11 @@ static void update_cpu_domains(struct cpuset *cur) partition_sched_domains(&pspan, &cspan); unlock_cpu_hotplug(); } +#else +static void update_cpu_domains(struct cpuset *cur) +{ +} +#endif static int update_cpumask(struct cpuset *cs, char *buf) { -- cgit v1.2.3