aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cgroup.c23
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/mutex.c4
-rw-r--r--kernel/nsproxy.c27
-rw-r--r--kernel/pid_namespace.c4
-rw-r--r--kernel/power/process.c11
-rw-r--r--kernel/power/qos.c20
-rw-r--r--kernel/printk/Makefile2
-rw-r--r--kernel/printk/braille.c49
-rw-r--r--kernel/printk/braille.h48
-rw-r--r--kernel/printk/console_cmdline.h14
-rw-r--r--kernel/printk/printk.c (renamed from kernel/printk.c)183
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/sched/core.c96
-rw-r--r--kernel/sched/cpupri.c4
-rw-r--r--kernel/sched/fair.c14
-rw-r--r--kernel/sysctl.c6
-rw-r--r--kernel/time/sched_clock.c2
-rw-r--r--kernel/time/tick-sched.c14
-rw-r--r--kernel/time/timer_list.c41
-rw-r--r--kernel/trace/ftrace.c87
-rw-r--r--kernel/trace/trace.c27
-rw-r--r--kernel/trace/trace_events.c200
-rw-r--r--kernel/trace/trace_events_filter.c17
-rw-r--r--kernel/trace/trace_kprobe.c21
-rw-r--r--kernel/trace/trace_uprobe.c51
-rw-r--r--kernel/user_namespace.c17
-rw-r--r--kernel/wait.c3
-rw-r--r--kernel/workqueue.c53
32 files changed, 731 insertions, 344 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 470839d1a30..35ef1185e35 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
# Makefile for the linux kernel.
#
-obj-y = fork.o exec_domain.o panic.o printk.o \
+obj-y = fork.o exec_domain.o panic.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
@@ -24,6 +24,7 @@ endif
obj-y += sched/
obj-y += power/
+obj-y += printk/
obj-y += cpu/
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 789ec4683db..e91963302c0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4335,8 +4335,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
}
err = percpu_ref_init(&css->refcnt, css_release);
- if (err)
+ if (err) {
+ ss->css_free(cgrp);
goto err_free_all;
+ }
init_cgroup_css(css, ss, cgrp);
@@ -4478,6 +4480,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
struct dentry *d = cgrp->dentry;
struct cgroup_event *event, *tmp;
struct cgroup_subsys *ss;
+ struct cgroup *child;
bool empty;
lockdep_assert_held(&d->d_inode->i_mutex);
@@ -4488,12 +4491,28 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
* @cgrp from being removed while __put_css_set() is in progress.
*/
read_lock(&css_set_lock);
- empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children);
+ empty = list_empty(&cgrp->cset_links);
read_unlock(&css_set_lock);
if (!empty)
return -EBUSY;
/*
+ * Make sure there's no live children. We can't test ->children
+ * emptiness as dead children linger on it while being destroyed;
+ * otherwise, "rmdir parent/child parent" may fail with -EBUSY.
+ */
+ empty = true;
+ rcu_read_lock();
+ list_for_each_entry_rcu(child, &cgrp->children, sibling) {
+ empty = cgroup_is_dead(child);
+ if (!empty)
+ break;
+ }
+ rcu_read_unlock();
+ if (!empty)
+ return -EBUSY;
+
+ /*
* Block new css_tryget() by killing css refcnts. cgroup core
* guarantees that, by the time ->css_offline() is invoked, no new
* css reference will be given out via css_tryget(). We can't
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e5657788fed..ea1966db34f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -475,13 +475,17 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
/*
* Cpusets with tasks - existing or newly being attached - can't
- * have empty cpus_allowed or mems_allowed.
+ * be changed to have empty cpus_allowed or mems_allowed.
*/
ret = -ENOSPC;
- if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
- (cpumask_empty(trial->cpus_allowed) &&
- nodes_empty(trial->mems_allowed)))
- goto out;
+ if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress)) {
+ if (!cpumask_empty(cur->cpus_allowed) &&
+ cpumask_empty(trial->cpus_allowed))
+ goto out;
+ if (!nodes_empty(cur->mems_allowed) &&
+ nodes_empty(trial->mems_allowed))
+ goto out;
+ }
ret = 0;
out:
@@ -1608,11 +1612,13 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- int retval = -ENODEV;
+ int retval = 0;
mutex_lock(&cpuset_mutex);
- if (!is_cpuset_online(cs))
+ if (!is_cpuset_online(cs)) {
+ retval = -ENODEV;
goto out_unlock;
+ }
switch (type) {
case FILE_CPU_EXCLUSIVE:
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb8a96..bf46287c91a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1177,7 +1177,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* don't allow the creation of threads.
*/
if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
- (task_active_pid_ns(current) != current->nsproxy->pid_ns))
+ (task_active_pid_ns(current) !=
+ current->nsproxy->pid_ns_for_children))
return ERR_PTR(-EINVAL);
retval = security_task_create(clone_flags);
@@ -1351,7 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (pid != &init_struct_pid) {
retval = -ENOMEM;
- pid = alloc_pid(p->nsproxy->pid_ns);
+ pid = alloc_pid(p->nsproxy->pid_ns_for_children);
if (!pid)
goto bad_fork_cleanup_io;
}
@@ -1679,6 +1680,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
+#elif defined(CONFIG_CLONE_BACKWARDS3)
+SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
+ int, stack_size,
+ int __user *, parent_tidptr,
+ int __user *, child_tidptr,
+ int, tls_val)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 8b2afc1c9df..b462fa19751 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -33,7 +33,7 @@ static DEFINE_SPINLOCK(freezer_lock);
*/
bool freezing_slow_path(struct task_struct *p)
{
- if (p->flags & PF_NOFREEZE)
+ if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
return false;
if (pm_nosig_freezing || cgroup_freezing(p))
diff --git a/kernel/mutex.c b/kernel/mutex.c
index ff05f4bd86e..a52ee7bb830 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -686,7 +686,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
0, &ctx->dep_map, _RET_IP_, ctx);
- if (!ret && ctx->acquired > 0)
+ if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
@@ -702,7 +702,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
0, &ctx->dep_map, _RET_IP_, ctx);
- if (!ret && ctx->acquired > 0)
+ if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 364ceab15f0..997cbb951a3 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -29,15 +29,15 @@
static struct kmem_cache *nsproxy_cachep;
struct nsproxy init_nsproxy = {
- .count = ATOMIC_INIT(1),
- .uts_ns = &init_uts_ns,
+ .count = ATOMIC_INIT(1),
+ .uts_ns = &init_uts_ns,
#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
- .ipc_ns = &init_ipc_ns,
+ .ipc_ns = &init_ipc_ns,
#endif
- .mnt_ns = NULL,
- .pid_ns = &init_pid_ns,
+ .mnt_ns = NULL,
+ .pid_ns_for_children = &init_pid_ns,
#ifdef CONFIG_NET
- .net_ns = &init_net,
+ .net_ns = &init_net,
#endif
};
@@ -85,9 +85,10 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_ipc;
}
- new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
- if (IS_ERR(new_nsp->pid_ns)) {
- err = PTR_ERR(new_nsp->pid_ns);
+ new_nsp->pid_ns_for_children =
+ copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
+ if (IS_ERR(new_nsp->pid_ns_for_children)) {
+ err = PTR_ERR(new_nsp->pid_ns_for_children);
goto out_pid;
}
@@ -100,8 +101,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
return new_nsp;
out_net:
- if (new_nsp->pid_ns)
- put_pid_ns(new_nsp->pid_ns);
+ if (new_nsp->pid_ns_for_children)
+ put_pid_ns(new_nsp->pid_ns_for_children);
out_pid:
if (new_nsp->ipc_ns)
put_ipc_ns(new_nsp->ipc_ns);
@@ -174,8 +175,8 @@ void free_nsproxy(struct nsproxy *ns)
put_uts_ns(ns->uts_ns);
if (ns->ipc_ns)
put_ipc_ns(ns->ipc_ns);
- if (ns->pid_ns)
- put_pid_ns(ns->pid_ns);
+ if (ns->pid_ns_for_children)
+ put_pid_ns(ns->pid_ns_for_children);
put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 6917e8edb48..601bb361c23 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -349,8 +349,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
if (ancestor != active)
return -EINVAL;
- put_pid_ns(nsproxy->pid_ns);
- nsproxy->pid_ns = get_pid_ns(new);
+ put_pid_ns(nsproxy->pid_ns_for_children);
+ nsproxy->pid_ns_for_children = get_pid_ns(new);
return 0;
}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fc0df848644..06ec8869dbf 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -109,6 +109,8 @@ static int try_to_freeze_tasks(bool user_only)
/**
* freeze_processes - Signal user space processes to enter the refrigerator.
+ * The current thread will not be frozen. The same process that calls
+ * freeze_processes must later call thaw_processes.
*
* On success, returns 0. On failure, -errno and system is fully thawed.
*/
@@ -120,6 +122,9 @@ int freeze_processes(void)
if (error)
return error;
+ /* Make sure this task doesn't get frozen */
+ current->flags |= PF_SUSPEND_TASK;
+
if (!pm_freezing)
atomic_inc(&system_freezing_cnt);
@@ -168,6 +173,7 @@ int freeze_kernel_threads(void)
void thaw_processes(void)
{
struct task_struct *g, *p;
+ struct task_struct *curr = current;
if (pm_freezing)
atomic_dec(&system_freezing_cnt);
@@ -182,10 +188,15 @@ void thaw_processes(void)
read_lock(&tasklist_lock);
do_each_thread(g, p) {
+ /* No other threads should have PF_SUSPEND_TASK set */
+ WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
__thaw_task(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
+ WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
+ curr->flags &= ~PF_SUSPEND_TASK;
+
usermodehelper_enable();
schedule();
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 06fe28589e9..a394297f8b2 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -296,6 +296,17 @@ int pm_qos_request_active(struct pm_qos_request *req)
}
EXPORT_SYMBOL_GPL(pm_qos_request_active);
+static void __pm_qos_update_request(struct pm_qos_request *req,
+ s32 new_value)
+{
+ trace_pm_qos_update_request(req->pm_qos_class, new_value);
+
+ if (new_value != req->node.prio)
+ pm_qos_update_target(
+ pm_qos_array[req->pm_qos_class]->constraints,
+ &req->node, PM_QOS_UPDATE_REQ, new_value);
+}
+
/**
* pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
* @work: work struct for the delayed work (timeout)
@@ -308,7 +319,7 @@ static void pm_qos_work_fn(struct work_struct *work)
struct pm_qos_request,
work);
- pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
+ __pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
}
/**
@@ -364,12 +375,7 @@ void pm_qos_update_request(struct pm_qos_request *req,
}
cancel_delayed_work_sync(&req->work);
-
- trace_pm_qos_update_request(req->pm_qos_class, new_value);
- if (new_value != req->node.prio)
- pm_qos_update_target(
- pm_qos_array[req->pm_qos_class]->constraints,
- &req->node, PM_QOS_UPDATE_REQ, new_value);
+ __pm_qos_update_request(req, new_value);
}
EXPORT_SYMBOL_GPL(pm_qos_update_request);
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
new file mode 100644
index 00000000000..85405bdcf2b
--- /dev/null
+++ b/kernel/printk/Makefile
@@ -0,0 +1,2 @@
+obj-y = printk.o
+obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
new file mode 100644
index 00000000000..276762f3a46
--- /dev/null
+++ b/kernel/printk/braille.c
@@ -0,0 +1,49 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/string.h>
+
+#include "console_cmdline.h"
+#include "braille.h"
+
+char *_braille_console_setup(char **str, char **brl_options)
+{
+ if (!memcmp(*str, "brl,", 4)) {
+ *brl_options = "";
+ *str += 4;
+ } else if (!memcmp(str, "brl=", 4)) {
+ *brl_options = *str + 4;
+ *str = strchr(*brl_options, ',');
+ if (!*str)
+ pr_err("need port name after brl=\n");
+ else
+ *((*str)++) = 0;
+ } else
+ return NULL;
+
+ return *str;
+}
+
+int
+_braille_register_console(struct console *console, struct console_cmdline *c)
+{
+ int rtn = 0;
+
+ if (c->brl_options) {
+ console->flags |= CON_BRL;
+ rtn = braille_register_console(console, c->index, c->options,
+ c->brl_options);
+ }
+
+ return rtn;
+}
+
+int
+_braille_unregister_console(struct console *console)
+{
+ if (console->flags & CON_BRL)
+ return braille_unregister_console(console);
+
+ return 0;
+}
diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h
new file mode 100644
index 00000000000..769d771145c
--- /dev/null
+++ b/kernel/printk/braille.h
@@ -0,0 +1,48 @@
+#ifndef _PRINTK_BRAILLE_H
+#define _PRINTK_BRAILLE_H
+
+#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
+
+static inline void
+braille_set_options(struct console_cmdline *c, char *brl_options)
+{
+ c->brl_options = brl_options;
+}
+
+char *
+_braille_console_setup(char **str, char **brl_options);
+
+int
+_braille_register_console(struct console *console, struct console_cmdline *c);
+
+int
+_braille_unregister_console(struct console *console);
+
+#else
+
+static inline void
+braille_set_options(struct console_cmdline *c, char *brl_options)
+{
+}
+
+static inline char *
+_braille_console_setup(char **str, char **brl_options)
+{
+ return NULL;
+}
+
+static inline int
+_braille_register_console(struct console *console, struct console_cmdline *c)
+{
+ return 0;
+}
+
+static inline int
+_braille_unregister_console(struct console *console)
+{
+ return 0;
+}
+
+#endif
+
+#endif
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
new file mode 100644
index 00000000000..cbd69d84234
--- /dev/null
+++ b/kernel/printk/console_cmdline.h
@@ -0,0 +1,14 @@
+#ifndef _CONSOLE_CMDLINE_H
+#define _CONSOLE_CMDLINE_H
+
+struct console_cmdline
+{
+ char name[8]; /* Name of the driver */
+ int index; /* Minor dev. to use */
+ char *options; /* Options for the driver */
+#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
+ char *brl_options; /* Options for braille driver */
+#endif
+};
+
+#endif
diff --git a/kernel/printk.c b/kernel/printk/printk.c
index 69b0890ed7e..5b5a7080e2a 100644
--- a/kernel/printk.c
+++ b/kernel/printk/printk.c
@@ -51,6 +51,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>
+#include "console_cmdline.h"
+#include "braille.h"
+
/* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
@@ -105,19 +108,11 @@ static struct console *exclusive_console;
/*
* Array of consoles built from command line options (console=)
*/
-struct console_cmdline
-{
- char name[8]; /* Name of the driver */
- int index; /* Minor dev. to use */
- char *options; /* Options for the driver */
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
- char *brl_options; /* Options for braille driver */
-#endif
-};
#define MAX_CMDLINECONSOLES 8
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
+
static int selected_console = -1;
static int preferred_console = -1;
int console_set_on_cmdline;
@@ -178,7 +173,7 @@ static int console_may_schedule;
* 67 "g"
* 0032 00 00 00 padding to next message header
*
- * The 'struct log' buffer header must never be directly exported to
+ * The 'struct printk_log' buffer header must never be directly exported to
* userspace, it is a kernel-private implementation detail that might
* need to be changed in the future, when the requirements change.
*
@@ -200,7 +195,7 @@ enum log_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
-struct log {
+struct printk_log {
u64 ts_nsec; /* timestamp in nanoseconds */
u16 len; /* length of entire record */
u16 text_len; /* length of text buffer */
@@ -248,7 +243,7 @@ static u32 clear_idx;
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
#define LOG_ALIGN 4
#else
-#define LOG_ALIGN __alignof__(struct log)
+#define LOG_ALIGN __alignof__(struct printk_log)
#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -259,35 +254,35 @@ static u32 log_buf_len = __LOG_BUF_LEN;
static volatile unsigned int logbuf_cpu = UINT_MAX;
/* human readable text of the record */
-static char *log_text(const struct log *msg)
+static char *log_text(const struct printk_log *msg)
{
- return (char *)msg + sizeof(struct log);
+ return (char *)msg + sizeof(struct printk_log);
}
/* optional key/value pair dictionary attached to the record */
-static char *log_dict(const struct log *msg)
+static char *log_dict(const struct printk_log *msg)
{
- return (char *)msg + sizeof(struct log) + msg->text_len;
+ return (char *)msg + sizeof(struct printk_log) + msg->text_len;
}
/* get record by index; idx must point to valid msg */
-static struct log *log_from_idx(u32 idx)
+static struct printk_log *log_from_idx(u32 idx)
{
- struct log *msg = (struct log *)(log_buf + idx);
+ struct printk_log *msg = (struct printk_log *)(log_buf + idx);
/*
* A length == 0 record is the end of buffer marker. Wrap around and
* read the message at the start of the buffer.
*/
if (!msg->len)
- return (struct log *)log_buf;
+ return (struct printk_log *)log_buf;
return msg;
}
/* get next record; idx must point to valid msg */
static u32 log_next(u32 idx)
{
- struct log *msg = (struct log *)(log_buf + idx);
+ struct printk_log *msg = (struct printk_log *)(log_buf + idx);
/* length == 0 indicates the end of the buffer; wrap */
/*
@@ -296,7 +291,7 @@ static u32 log_next(u32 idx)
* return the one after that.
*/
if (!msg->len) {
- msg = (struct log *)log_buf;
+ msg = (struct printk_log *)log_buf;
return msg->len;
}
return idx + msg->len;
@@ -308,11 +303,11 @@ static void log_store(int facility, int level,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
- struct log *msg;
+ struct printk_log *msg;
u32 size, pad_len;
/* number of '\0' padding bytes to next message */
- size = sizeof(struct log) + text_len + dict_len;
+ size = sizeof(struct printk_log) + text_len + dict_len;
pad_len = (-size) & (LOG_ALIGN - 1);
size += pad_len;
@@ -324,7 +319,7 @@ static void log_store(int facility, int level,
else
free = log_first_idx - log_next_idx;
- if (free > size + sizeof(struct log))
+ if (free > size + sizeof(struct printk_log))
break;
/* drop old messages until we have enough contiuous space */
@@ -332,18 +327,18 @@ static void log_store(int facility, int level,
log_first_seq++;
}
- if (log_next_idx + size + sizeof(struct log) >= log_buf_len) {
+ if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) {
/*
* This message + an additional empty header does not fit
* at the end of the buffer. Add an empty header with len == 0
* to signify a wrap around.
*/
- memset(log_buf + log_next_idx, 0, sizeof(struct log));
+ memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
log_next_idx = 0;
}
/* fill message */
- msg = (struct log *)(log_buf + log_next_idx);
+ msg = (struct printk_log *)(log_buf + log_next_idx);
memcpy(log_text(msg), text, text_len);
msg->text_len = text_len;
memcpy(log_dict(msg), dict, dict_len);
@@ -356,7 +351,7 @@ static void log_store(int facility, int level,
else
msg->ts_nsec = local_clock();
memset(log_dict(msg) + dict_len, 0, pad_len);
- msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
+ msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len;
/* insert message */
log_next_idx += msg->len;
@@ -479,7 +474,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct devkmsg_user *user = file->private_data;
- struct log *msg;
+ struct printk_log *msg;
u64 ts_usec;
size_t i;
char cont = '-';
@@ -724,14 +719,14 @@ void log_buf_kexec_setup(void)
VMCOREINFO_SYMBOL(log_first_idx);
VMCOREINFO_SYMBOL(log_next_idx);
/*
- * Export struct log size and field offsets. User space tools can
+ * Export struct printk_log size and field offsets. User space tools can
* parse it and detect any changes to structure down the line.
*/
- VMCOREINFO_STRUCT_SIZE(log);
- VMCOREINFO_OFFSET(log, ts_nsec);
- VMCOREINFO_OFFSET(log, len);
- VMCOREINFO_OFFSET(log, text_len);
- VMCOREINFO_OFFSET(log, dict_len);
+ VMCOREINFO_STRUCT_SIZE(printk_log);
+ VMCOREINFO_OFFSET(printk_log, ts_nsec);
+ VMCOREINFO_OFFSET(printk_log, len);
+ VMCOREINFO_OFFSET(printk_log, text_len);
+ VMCOREINFO_OFFSET(printk_log, dict_len);
}
#endif
@@ -884,7 +879,7 @@ static size_t print_time(u64 ts, char *buf)
(unsigned long)ts, rem_nsec / 1000);
}
-static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
+static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
{
size_t len = 0;
unsigned int prefix = (msg->facility << 3) | msg->level;
@@ -907,7 +902,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
return len;
}
-static size_t msg_print_text(const struct log *msg, enum log_flags prev,
+static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
bool syslog, char *buf, size_t size)
{
const char *text = log_text(msg);
@@ -969,7 +964,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev,
static int syslog_print(char __user *buf, int size)
{
char *text;
- struct log *msg;
+ struct printk_log *msg;
int len = 0;
text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
@@ -1060,7 +1055,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
idx = clear_idx;
prev = 0;
while (seq < log_next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
len += msg_print_text(msg, prev, true, NULL, 0);
prev = msg->flags;
@@ -1073,7 +1068,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
idx = clear_idx;
prev = 0;
while (len > size && seq < log_next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
len -= msg_print_text(msg, prev, true, NULL, 0);
prev = msg->flags;
@@ -1087,7 +1082,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
len = 0;
prev = 0;
while (len >= 0 && seq < next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
int textlen;
textlen = msg_print_text(msg, prev, true, text,
@@ -1233,7 +1228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
error = 0;
while (seq < log_next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
error += msg_print_text(msg, prev, true, NULL, 0);
idx = log_next(idx);
@@ -1719,10 +1714,10 @@ static struct cont {
u8 level;
bool flushed:1;
} cont;
-static struct log *log_from_idx(u32 idx) { return NULL; }
+static struct printk_log *log_from_idx(u32 idx) { return NULL; }
static u32 log_next(u32 idx) { return 0; }
static void call_console_drivers(int level, const char *text, size_t len) {}
-static size_t msg_print_text(const struct log *msg, enum log_flags prev,
+static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
bool syslog, char *buf, size_t size) { return 0; }
static size_t cont_print_text(char *text, size_t size) { return 0; }
@@ -1761,23 +1756,23 @@ static int __add_preferred_console(char *name, int idx, char *options,
* See if this tty is not yet registered, and
* if we have a slot free.
*/
- for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
- if (strcmp(console_cmdline[i].name, name) == 0 &&
- console_cmdline[i].index == idx) {
- if (!brl_options)
- selected_console = i;
- return 0;
+ for (i = 0, c = console_cmdline;
+ i < MAX_CMDLINECONSOLES && c->name[0];
+ i++, c++) {
+ if (strcmp(c->name, name) == 0 && c->index == idx) {
+ if (!brl_options)
+ selected_console = i;
+ return 0;
}
+ }
if (i == MAX_CMDLINECONSOLES)
return -E2BIG;
if (!brl_options)
selected_console = i;
- c = &console_cmdline[i];
strlcpy(c->name, name, sizeof(c->name));
c->options = options;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
- c->brl_options = brl_options;
-#endif
+ braille_set_options(c, brl_options);
+
c->index = idx;
return 0;
}
@@ -1790,20 +1785,8 @@ static int __init console_setup(char *str)
char *s, *options, *brl_options = NULL;
int idx;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
- if (!memcmp(str, "brl,", 4)) {
- brl_options = "";
- str += 4;
- } else if (!memcmp(str, "brl=", 4)) {
- brl_options = str + 4;
- str = strchr(brl_options, ',');
- if (!str) {
- printk(KERN_ERR "need port name after brl=\n");
- return 1;
- }
- *(str++) = 0;
- }
-#endif
+ if (_braille_console_setup(&str, &brl_options))
+ return 1;
/*
* Decode str into name, index, options.
@@ -1858,15 +1841,15 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
struct console_cmdline *c;
int i;
- for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
- if (strcmp(console_cmdline[i].name, name) == 0 &&
- console_cmdline[i].index == idx) {
- c = &console_cmdline[i];
- strlcpy(c->name, name_new, sizeof(c->name));
- c->name[sizeof(c->name) - 1] = 0;
- c->options = options;
- c->index = idx_new;
- return i;
+ for (i = 0, c = console_cmdline;
+ i < MAX_CMDLINECONSOLES && c->name[0];
+ i++, c++)
+ if (strcmp(c->name, name) == 0 && c->index == idx) {
+ strlcpy(c->name, name_new, sizeof(c->name));
+ c->name[sizeof(c->name) - 1] = 0;
+ c->options = options;
+ c->index = idx_new;
+ return i;
}
/* not found */
return -1;
@@ -2046,7 +2029,7 @@ void console_unlock(void)
console_cont_flush(text, sizeof(text));
again:
for (;;) {
- struct log *msg;
+ struct printk_log *msg;
size_t len;
int level;
@@ -2241,6 +2224,7 @@ void register_console(struct console *newcon)
int i;
unsigned long flags;
struct console *bcon = NULL;
+ struct console_cmdline *c;
/*
* before we register a new CON_BOOT console, make sure we don't
@@ -2288,30 +2272,25 @@ void register_console(struct console *newcon)
* See if this console matches one we selected on
* the command line.
*/
- for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0];
- i++) {
- if (strcmp(console_cmdline[i].name, newcon->name) != 0)
+ for (i = 0, c = console_cmdline;
+ i < MAX_CMDLINECONSOLES && c->name[0];
+ i++, c++) {
+ if (strcmp(c->name, newcon->name) != 0)
continue;
if (newcon->index >= 0 &&
- newcon->index != console_cmdline[i].index)
+ newcon->index != c->index)
continue;
if (newcon->index < 0)
- newcon->index = console_cmdline[i].index;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
- if (console_cmdline[i].brl_options) {
- newcon->flags |= CON_BRL;
- braille_register_console(newcon,
- console_cmdline[i].index,
- console_cmdline[i].options,
- console_cmdline[i].brl_options);
+ newcon->index = c->index;
+
+ if (_braille_register_console(newcon, c))
return;
- }
-#endif
+
if (newcon->setup &&
newcon->setup(newcon, console_cmdline[i].options) != 0)
break;
newcon->flags |= CON_ENABLED;
- newcon->index = console_cmdline[i].index;
+ newcon->index = c->index;
if (i == selected_console) {
newcon->flags |= CON_CONSDEV;
preferred_console = selected_console;
@@ -2394,13 +2373,13 @@ EXPORT_SYMBOL(register_console);
int unregister_console(struct console *console)
{
struct console *a, *b;
- int res = 1;
+ int res;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
- if (console->flags & CON_BRL)
- return braille_unregister_console(console);
-#endif
+ res = _braille_unregister_console(console);
+ if (res)
+ return res;
+ res = 1;
console_lock();
if (console_drivers == console) {
console_drivers=console->next;
@@ -2666,7 +2645,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
char *line, size_t size, size_t *len)
{
- struct log *msg;
+ struct printk_log *msg;
size_t l = 0;
bool ret = false;
@@ -2778,7 +2757,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
idx = dumper->cur_idx;
prev = 0;
while (seq < dumper->next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
l += msg_print_text(msg, prev, true, NULL, 0);
idx = log_next(idx);
@@ -2791,7 +2770,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
idx = dumper->cur_idx;
prev = 0;
while (l > size && seq < dumper->next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
l -= msg_print_text(msg, prev, true, NULL, 0);
idx = log_next(idx);
@@ -2806,7 +2785,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
l = 0;
prev = 0;
while (seq < dumper->next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct printk_log *msg = log_from_idx(idx);
l += msg_print_text(msg, prev, syslog, buf + l, size - l);
idx = log_next(idx);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4041f5747e7..a146ee327f6 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -469,7 +469,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
/* Architecture-specific hardware disable .. */
ptrace_disable(child);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- flush_ptrace_hw_breakpoint(child);
write_lock_irq(&tasklist_lock);
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb7bfe..05c39f03031 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,6 +933,8 @@ static int effective_prio(struct task_struct *p)
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
+ *
+ * Return: 1 if the task is currently executing. 0 otherwise.
*/
inline int task_curr(const struct task_struct *p)
{
@@ -1482,7 +1484,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
*
- * Returns %true if @p was woken up, %false if it was already running
+ * Return: %true if @p was woken up, %false if it was already running.
* or @state didn't match @p's state.
*/
static int
@@ -1491,7 +1493,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
unsigned long flags;
int cpu, success = 0;
- smp_wmb();
+ /*
+ * If we are going to wake up a thread waiting for CONDITION we
+ * need to ensure that CONDITION=1 done by the caller can not be
+ * reordered with p->state check below. This pairs with mb() in
+ * set_current_state() the waiting thread does.
+ */
+ smp_mb__before_spinlock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
if (!(p->state & state))
goto out;
@@ -1577,8 +1585,9 @@ out:
* @p: The process to be woken up.
*
* Attempt to wake up the nominated process and move it to the set of runnable
- * processes. Returns 1 if the process was woken up, 0 if it was already
- * running.
+ * processes.
+ *
+ * Return: 1 if the process was woken up, 0 if it was already running.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
@@ -2191,6 +2200,8 @@ void scheduler_tick(void)
* This makes sure that uptime, CFS vruntime, load
* balancing, etc... continue to move forward, even
* with a very low granularity.
+ *
+ * Return: Maximum deferment in nanoseconds.
*/
u64 scheduler_tick_max_deferment(void)
{
@@ -2394,6 +2405,12 @@ need_resched:
if (sched_feat(HRTICK))
hrtick_clear(rq);
+ /*
+ * Make sure that signal_pending_state()->signal_pending() below
+ * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+ * done by the caller to avoid the race with signal_wake_up().
+ */
+ smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
switch_count = &prev->nivcsw;
@@ -2796,8 +2813,8 @@ EXPORT_SYMBOL(wait_for_completion);
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible.
*
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -2829,8 +2846,8 @@ EXPORT_SYMBOL(wait_for_completion_io);
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible. The caller is accounted as waiting for IO.
*
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
@@ -2846,7 +2863,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
* This waits for completion of a specific task to be signaled. It is
* interruptible.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_interruptible(struct completion *x)
{
@@ -2865,8 +2882,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
@@ -2883,7 +2900,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
* This waits to be signaled for completion of a specific task. It can be
* interrupted by a kill signal.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_killable(struct completion *x)
{
@@ -2903,8 +2920,8 @@ EXPORT_SYMBOL(wait_for_completion_killable);
* signaled or for a specified timeout to expire. It can be
* interrupted by a kill signal. The timeout is in jiffies.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_killable_timeout(struct completion *x,
@@ -2918,7 +2935,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout);
* try_wait_for_completion - try to decrement a completion without blocking
* @x: completion structure
*
- * Returns: 0 if a decrement cannot be done without blocking
+ * Return: 0 if a decrement cannot be done without blocking
* 1 if a decrement succeeded.
*
* If a completion is being used as a counting completion,
@@ -2945,7 +2962,7 @@ EXPORT_SYMBOL(try_wait_for_completion);
* completion_done - Test to see if a completion has any waiters
* @x: completion structure
*
- * Returns: 0 if there are waiters (wait_for_completion() in progress)
+ * Return: 0 if there are waiters (wait_for_completion() in progress)
* 1 if there are no waiters.
*
*/
@@ -3182,7 +3199,7 @@ SYSCALL_DEFINE1(nice, int, increment)
* task_prio - return the priority value of a given task.
* @p: the task in question.
*
- * This is the priority value as seen by users in /proc.
+ * Return: The priority value as seen by users in /proc.
* RT tasks are offset by -200. Normal tasks are centered
* around 0, value goes from -16 to +15.
*/
@@ -3194,6 +3211,8 @@ int task_prio(const struct task_struct *p)
/**
* task_nice - return the nice value of a given task.
* @p: the task in question.
+ *
+ * Return: The nice value [ -20 ... 0 ... 19 ].
*/
int task_nice(const struct task_struct *p)
{
@@ -3204,6 +3223,8 @@ EXPORT_SYMBOL(task_nice);
/**
* idle_cpu - is a given cpu idle currently?
* @cpu: the processor in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
*/
int idle_cpu(int cpu)
{
@@ -3226,6 +3247,8 @@ int idle_cpu(int cpu)
/**
* idle_task - return the idle task for a given cpu.
* @cpu: the processor in question.
+ *
+ * Return: The idle task for the cpu @cpu.
*/
struct task_struct *idle_task(int cpu)
{
@@ -3235,6 +3258,8 @@ struct task_struct *idle_task(int cpu)
/**
* find_process_by_pid - find a process with a matching PID value.
* @pid: the pid in question.
+ *
+ * The task of @pid, if found. %NULL otherwise.
*/
static struct task_struct *find_process_by_pid(pid_t pid)
{
@@ -3432,6 +3457,8 @@ recheck:
* @policy: new policy.
* @param: structure containing the new RT priority.
*
+ * Return: 0 on success. An error code otherwise.
+ *
* NOTE that the task may be already dead.
*/
int sched_setscheduler(struct task_struct *p, int policy,
@@ -3451,6 +3478,8 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
* current context has permission. For example, this is needed in
* stop_machine(): we create temporary high priority worker threads,
* but our caller might not have that capability.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
int sched_setscheduler_nocheck(struct task_struct *p, int policy,
const struct sched_param *param)
@@ -3485,6 +3514,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
* @pid: the pid in question.
* @policy: new policy.
* @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
struct sched_param __user *, param)
@@ -3500,6 +3531,8 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
* sys_sched_setparam - set/change the RT priority of a thread
* @pid: the pid in question.
* @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{
@@ -3509,6 +3542,9 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
/**
* sys_sched_getscheduler - get the policy (scheduling class) of a thread
* @pid: the pid in question.
+ *
+ * Return: On success, the policy of the thread. Otherwise, a negative error
+ * code.
*/
SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
{
@@ -3535,6 +3571,9 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
* sys_sched_getparam - get the RT priority of a thread
* @pid: the pid in question.
* @param: structure containing the RT priority.
+ *
+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
+ * code.
*/
SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
{
@@ -3659,6 +3698,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
* @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
unsigned long __user *, user_mask_ptr)
@@ -3710,6 +3751,8 @@ out_unlock:
* @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
unsigned long __user *, user_mask_ptr)
@@ -3744,6 +3787,8 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
*
* This function yields the current CPU to other tasks. If there are no
* other threads running on this CPU then this function will return.
+ *
+ * Return: 0.
*/
SYSCALL_DEFINE0(sched_yield)
{
@@ -3869,7 +3914,7 @@ EXPORT_SYMBOL(yield);
* It's the caller's job to ensure that the target task struct
* can't go away on us before we can do any checks.
*
- * Returns:
+ * Return:
* true (>0) if we indeed boosted the target task.
* false (0) if we failed to boost the target.
* -ESRCH if there's no task to yield to.
@@ -3972,8 +4017,9 @@ long __sched io_schedule_timeout(long timeout)
* sys_sched_get_priority_max - return maximum RT priority.
* @policy: scheduling class.
*
- * this syscall returns the maximum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the maximum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
*/
SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
{
@@ -3997,8 +4043,9 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
* sys_sched_get_priority_min - return minimum RT priority.
* @policy: scheduling class.
*
- * this syscall returns the minimum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the minimum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
*/
SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
{
@@ -4024,6 +4071,9 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
*
* this syscall writes the default timeslice value of a given process
* into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
*/
SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
struct timespec __user *, interval)
@@ -6632,6 +6682,8 @@ void normalize_rt_tasks(void)
* @cpu: the processor in question.
*
* ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+ *
+ * Return: The current task for @cpu.
*/
struct task_struct *curr_task(int cpu)
{
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 1095e878a46..8b836b376d9 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -62,7 +62,7 @@ static int convert_prio(int prio)
* any discrepancies created by racing against the uncertainty of the current
* priority configuration.
*
- * Returns: (int)bool - CPUs were found
+ * Return: (int)bool - CPUs were found
*/
int cpupri_find(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask)
@@ -203,7 +203,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
* cpupri_init - initialize the cpupri structure
* @cp: The cpupri context
*
- * Returns: -ENOMEM if memory fails.
+ * Return: -ENOMEM on memory allocation failure.
*/
int cpupri_init(struct cpupri *cp)
{
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bb456f44b7b..68f1609ca14 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -851,7 +851,7 @@ void task_numa_fault(int node, int pages, bool migrated)
{
struct task_struct *p = current;
- if (!sched_feat_numa(NUMA))
+ if (!numabalancing_enabled)
return;
/* FIXME: Allocate task-specific structure for placement policy here */
@@ -2032,6 +2032,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
*/
update_entity_load_avg(curr, 1);
update_cfs_rq_blocked_load(cfs_rq, 1);
+ update_cfs_shares(cfs_rq);
#ifdef CONFIG_SCHED_HRTICK
/*
@@ -4280,6 +4281,8 @@ struct sg_lb_stats {
* get_sd_load_idx - Obtain the load index for a given sched domain.
* @sd: The sched_domain whose load_idx is to be obtained.
* @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+ *
+ * Return: The load index.
*/
static inline int get_sd_load_idx(struct sched_domain *sd,
enum cpu_idle_type idle)
@@ -4574,6 +4577,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
*
* Determine if @sg is a busier group than the previously selected
* busiest group.
+ *
+ * Return: %true if @sg is a busier group than the previously selected
+ * busiest group. %false otherwise.
*/
static bool update_sd_pick_busiest(struct lb_env *env,
struct sd_lb_stats *sds,
@@ -4691,7 +4697,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
* assuming lower CPU number will be equivalent to lower a SMT thread
* number.
*
- * Returns 1 when packing is required and a task should be moved to
+ * Return: 1 when packing is required and a task should be moved to
* this CPU. The amount of the imbalance is returned in *imbalance.
*
* @env: The load balancing environment.
@@ -4869,7 +4875,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* @balance: Pointer to a variable indicating if this_cpu
* is the appropriate cpu to perform load balancing at this_level.
*
- * Returns: - the busiest group if imbalance exists.
+ * Return: - The busiest group if imbalance exists.
* - If no imbalance and user has opted for power-savings balance,
* return the least loaded group whose CPUs can be
* put to idle by rebalancing its tasks onto our group.
@@ -5786,7 +5792,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
entity_tick(cfs_rq, se, queued);
}
- if (sched_feat_numa(NUMA))
+ if (numabalancing_enabled)
task_tick_numa(rq, curr);
update_rq_runnable_avg(rq, 1);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ac09d98490a..07f6fc468e1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2346,7 +2346,11 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
int write, void *data)
{
if (write) {
- *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
+ unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
+
+ if (jif > INT_MAX)
+ return 1;
+ *valp = (int)jif;
} else {
int val = *valp;
unsigned long lval;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a326f27d7f0..0b479a6a22b 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -121,7 +121,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
BUG_ON(bits > 32);
WARN_ON(!irqs_disabled());
read_sched_clock = read;
- sched_clock_mask = (1 << bits) - 1;
+ sched_clock_mask = (1ULL << bits) - 1;
cd.rate = rate;
/* calculate the mult/shift to convert counter ticks to ns. */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e80183f4a6c..e8a1516cc0a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -182,7 +182,8 @@ static bool can_stop_full_tick(void)
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
- WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
+ WARN_ONCE(have_nohz_full_mask,
+ "NO_HZ FULL will not work with unstable sched clock");
return false;
}
#endif
@@ -343,8 +344,6 @@ static int tick_nohz_init_all(void)
void __init tick_nohz_init(void)
{
- int cpu;
-
if (!have_nohz_full_mask) {
if (tick_nohz_init_all() < 0)
return;
@@ -827,13 +826,10 @@ void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
- if (ts->inidle) {
- /* Cancel the timer because CPU already waken up from the C-states*/
- menu_hrtimer_cancel();
+ if (ts->inidle)
__tick_nohz_idle_enter(ts);
- } else {
+ else
tick_nohz_full_stop_tick(ts);
- }
}
/**
@@ -931,8 +927,6 @@ void tick_nohz_idle_exit(void)
ts->inidle = 0;
- /* Cancel the timer because CPU already waken up from the C-states*/
- menu_hrtimer_cancel();
if (ts->idle_active || ts->tick_stopped)
now = ktime_get();
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 3bdf2832301..61ed862cdd3 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now)
static int timer_list_show(struct seq_file *m, void *v)
{
struct timer_list_iter *iter = v;
- u64 now = ktime_to_ns(ktime_get());
if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, now);
+ timer_list_header(m, iter->now);
else if (!iter->second_pass)
print_cpu(m, iter->cpu, iter->now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -298,33 +297,41 @@ void sysrq_timer_list_show(void)
return;
}
-static void *timer_list_start(struct seq_file *file, loff_t *offset)
+static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
- struct timer_list_iter *iter = file->private;
-
- if (!*offset) {
- iter->cpu = -1;
- iter->now = ktime_to_ns(ktime_get());
- } else if (iter->cpu >= nr_cpu_ids) {
+ for (; offset; offset--) {
+ iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
+ if (iter->cpu >= nr_cpu_ids) {
#ifdef CONFIG_GENERIC_CLOCKEVENTS
- if (!iter->second_pass) {
- iter->cpu = -1;
- iter->second_pass = true;
- } else
- return NULL;
+ if (!iter->second_pass) {
+ iter->cpu = -1;
+ iter->second_pass = true;
+ } else
+ return NULL;
#else
- return NULL;
+ return NULL;
#endif
+ }
}
return iter;
}
+static void *timer_list_start(struct seq_file *file, loff_t *offset)
+{
+ struct timer_list_iter *iter = file->private;
+
+ if (!*offset)
+ iter->now = ktime_to_ns(ktime_get());
+ iter->cpu = -1;
+ iter->second_pass = false;
+ return move_iter(iter, *offset);
+}
+
static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
- iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
++*offset;
- return timer_list_start(file, offset);
+ return move_iter(iter, 1);
}
static void timer_list_stop(struct seq_file *seq, void *v)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8ce9eefc5bb..a6d098c6df3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2169,12 +2169,57 @@ static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
-static int ops_traces_mod(struct ftrace_ops *ops)
+static inline int ops_traces_mod(struct ftrace_ops *ops)
{
- struct ftrace_hash *hash;
+ /*
+ * Filter_hash being empty will default to trace module.
+ * But notrace hash requires a test of individual module functions.
+ */
+ return ftrace_hash_empty(ops->filter_hash) &&
+ ftrace_hash_empty(ops->notrace_hash);
+}
+
+/*
+ * Check if the current ops references the record.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static inline bool
+ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+ /* If ops isn't enabled, ignore it */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return 0;
+
+ /* If ops traces all mods, we already accounted for it */
+ if (ops_traces_mod(ops))
+ return 0;
+
+ /* The function must be in the filter */
+ if (!ftrace_hash_empty(ops->filter_hash) &&
+ !ftrace_lookup_ip(ops->filter_hash, rec->ip))
+ return 0;
+
+ /* If in notrace hash, we ignore it too */
+ if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
+ return 0;
+
+ return 1;
+}
+
+static int referenced_filters(struct dyn_ftrace *rec)
+{
+ struct ftrace_ops *ops;
+ int cnt = 0;
- hash = ops->filter_hash;
- return ftrace_hash_empty(hash);
+ for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
+ if (ops_references_rec(ops, rec))
+ cnt++;
+ }
+
+ return cnt;
}
static int ftrace_update_code(struct module *mod)
@@ -2183,6 +2228,7 @@ static int ftrace_update_code(struct module *mod)
struct dyn_ftrace *p;
cycle_t start, stop;
unsigned long ref = 0;
+ bool test = false;
int i;
/*
@@ -2196,9 +2242,12 @@ static int ftrace_update_code(struct module *mod)
for (ops = ftrace_ops_list;
ops != &ftrace_list_end; ops = ops->next) {
- if (ops->flags & FTRACE_OPS_FL_ENABLED &&
- ops_traces_mod(ops))
- ref++;
+ if (ops->flags & FTRACE_OPS_FL_ENABLED) {
+ if (ops_traces_mod(ops))
+ ref++;
+ else
+ test = true;
+ }
}
}
@@ -2208,12 +2257,16 @@ static int ftrace_update_code(struct module *mod)
for (pg = ftrace_new_pgs; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
+ int cnt = ref;
+
/* If something went wrong, bail without enabling anything */
if (unlikely(ftrace_disabled))
return -1;
p = &pg->records[i];
- p->flags = ref;
+ if (test)
+ cnt += referenced_filters(p);
+ p->flags = cnt;
/*
* Do the initial record conversion from mcount jump
@@ -2233,7 +2286,7 @@ static int ftrace_update_code(struct module *mod)
* conversion puts the module to the correct state, thus
* passing the ftrace_make_call check.
*/
- if (ftrace_start_up && ref) {
+ if (ftrace_start_up && cnt) {
int failed = __ftrace_replace_code(p, 1);
if (failed)
ftrace_bug(failed, p->ip);
@@ -3384,6 +3437,12 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return add_hash_entry(hash, ip);
}
+static void ftrace_ops_update_code(struct ftrace_ops *ops)
+{
+ if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+}
+
static int
ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
unsigned long ip, int remove, int reset, int enable)
@@ -3426,9 +3485,8 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
mutex_lock(&ftrace_lock);
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
- if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
- && ftrace_enabled)
- ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+ if (!ret)
+ ftrace_ops_update_code(ops);
mutex_unlock(&ftrace_lock);
@@ -3655,9 +3713,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
mutex_lock(&ftrace_lock);
ret = ftrace_hash_move(iter->ops, filter_hash,
orig_hash, iter->hash);
- if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
- && ftrace_enabled)
- ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+ if (!ret)
+ ftrace_ops_update_code(iter->ops);
mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 882ec1dd151..496f94d5769 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -243,20 +243,25 @@ int filter_current_check_discard(struct ring_buffer *buffer,
}
EXPORT_SYMBOL_GPL(filter_current_check_discard);
-cycle_t ftrace_now(int cpu)
+cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
{
u64 ts;
/* Early boot up does not have a buffer yet */
- if (!global_trace.trace_buffer.buffer)
+ if (!buf->buffer)
return trace_clock_local();
- ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
- ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
+ ts = ring_buffer_time_stamp(buf->buffer, cpu);
+ ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
return ts;
}
+cycle_t ftrace_now(int cpu)
+{
+ return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
+}
+
/**
* tracing_is_enabled - Show if global_trace has been disabled
*
@@ -1211,7 +1216,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
/* Make sure all commits have finished */
synchronize_sched();
- buf->time_start = ftrace_now(buf->cpu);
+ buf->time_start = buffer_ftrace_now(buf, buf->cpu);
for_each_online_cpu(cpu)
ring_buffer_reset_cpu(buffer, cpu);
@@ -1219,11 +1224,6 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
ring_buffer_record_enable(buffer);
}
-void tracing_reset_current(int cpu)
-{
- tracing_reset(&global_trace.trace_buffer, cpu);
-}
-
/* Must have trace_types_lock held */
void tracing_reset_all_online_cpus(void)
{
@@ -4151,6 +4151,7 @@ waitagain:
memset(&iter->seq, 0,
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
+ cpumask_clear(iter->started);
iter->pos = -1;
trace_event_read_lock();
@@ -4468,7 +4469,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
/* disable tracing ? */
if (trace_flags & TRACE_ITER_STOP_ON_FREE)
- tracing_off();
+ tracer_tracing_off(tr);
/* resize the ring buffer to 0 */
tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
@@ -4633,12 +4634,12 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
* New clock may not be consistent with the previous clock.
* Reset the buffer so that it doesn't have incomparable timestamps.
*/
- tracing_reset_online_cpus(&global_trace.trace_buffer);
+ tracing_reset_online_cpus(&tr->trace_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
- tracing_reset_online_cpus(&global_trace.max_buffer);
+ tracing_reset_online_cpus(&tr->max_buffer);
#endif
mutex_unlock(&trace_types_lock);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 898f868833f..29a7ebcfb42 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -409,33 +409,42 @@ static void put_system(struct ftrace_subsystem_dir *dir)
mutex_unlock(&event_mutex);
}
-/*
- * Open and update trace_array ref count.
- * Must have the current trace_array passed to it.
- */
-static int tracing_open_generic_file(struct inode *inode, struct file *filp)
+static void remove_subsystem(struct ftrace_subsystem_dir *dir)
{
- struct ftrace_event_file *file = inode->i_private;
- struct trace_array *tr = file->tr;
- int ret;
+ if (!dir)
+ return;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ if (!--dir->nr_events) {
+ debugfs_remove_recursive(dir->entry);
+ list_del(&dir->list);
+ __put_system_dir(dir);
+ }
+}
- ret = tracing_open_generic(inode, filp);
- if (ret < 0)
- trace_array_put(tr);
- return ret;
+static void *event_file_data(struct file *filp)
+{
+ return ACCESS_ONCE(file_inode(filp)->i_private);
}
-static int tracing_release_generic_file(struct inode *inode, struct file *filp)
+static void remove_event_file_dir(struct ftrace_event_file *file)
{
- struct ftrace_event_file *file = inode->i_private;
- struct trace_array *tr = file->tr;
+ struct dentry *dir = file->dir;
+ struct dentry *child;
- trace_array_put(tr);
+ if (dir) {
+ spin_lock(&dir->d_lock); /* probably unneeded */
+ list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
+ if (child->d_inode) /* probably unneeded */
+ child->d_inode->i_private = NULL;
+ }
+ spin_unlock(&dir->d_lock);
- return 0;
+ debugfs_remove_recursive(dir);
+ }
+
+ list_del(&file->list);
+ remove_subsystem(file->system);
+ kmem_cache_free(file_cachep, file);
}
/*
@@ -679,15 +688,25 @@ static ssize_t
event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file = filp->private_data;
+ struct ftrace_event_file *file;
+ unsigned long flags;
char buf[4] = "0";
- if (file->flags & FTRACE_EVENT_FL_ENABLED &&
- !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+ mutex_lock(&event_mutex);
+ file = event_file_data(filp);
+ if (likely(file))
+ flags = file->flags;
+ mutex_unlock(&event_mutex);
+
+ if (!file)
+ return -ENODEV;
+
+ if (flags & FTRACE_EVENT_FL_ENABLED &&
+ !(flags & FTRACE_EVENT_FL_SOFT_DISABLED))
strcpy(buf, "1");
- if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
- file->flags & FTRACE_EVENT_FL_SOFT_MODE)
+ if (flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
+ flags & FTRACE_EVENT_FL_SOFT_MODE)
strcat(buf, "*");
strcat(buf, "\n");
@@ -699,13 +718,10 @@ static ssize_t
event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file = filp->private_data;
+ struct ftrace_event_file *file;
unsigned long val;
int ret;
- if (!file)
- return -EINVAL;
-
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
if (ret)
return ret;
@@ -717,8 +733,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
switch (val) {
case 0:
case 1:
+ ret = -ENODEV;
mutex_lock(&event_mutex);
- ret = ftrace_event_enable_disable(file, val);
+ file = event_file_data(filp);
+ if (likely(file))
+ ret = ftrace_event_enable_disable(file, val);
mutex_unlock(&event_mutex);
break;
@@ -825,7 +844,7 @@ enum {
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_call *call = m->private;
+ struct ftrace_event_call *call = event_file_data(m->private);
struct list_head *common_head = &ftrace_common_fields;
struct list_head *head = trace_get_fields(call);
struct list_head *node = v;
@@ -857,7 +876,7 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
static int f_show(struct seq_file *m, void *v)
{
- struct ftrace_event_call *call = m->private;
+ struct ftrace_event_call *call = event_file_data(m->private);
struct ftrace_event_field *field;
const char *array_descriptor;
@@ -910,6 +929,11 @@ static void *f_start(struct seq_file *m, loff_t *pos)
void *p = (void *)FORMAT_HEADER;
loff_t l = 0;
+ /* ->stop() is called even if ->start() fails */
+ mutex_lock(&event_mutex);
+ if (!event_file_data(m->private))
+ return ERR_PTR(-ENODEV);
+
while (l < *pos && p)
p = f_next(m, p, &l);
@@ -918,6 +942,7 @@ static void *f_start(struct seq_file *m, loff_t *pos)
static void f_stop(struct seq_file *m, void *p)
{
+ mutex_unlock(&event_mutex);
}
static const struct seq_operations trace_format_seq_ops = {
@@ -929,7 +954,6 @@ static const struct seq_operations trace_format_seq_ops = {
static int trace_format_open(struct inode *inode, struct file *file)
{
- struct ftrace_event_call *call = inode->i_private;
struct seq_file *m;
int ret;
@@ -938,7 +962,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
return ret;
m = file->private_data;
- m->private = call;
+ m->private = file;
return 0;
}
@@ -946,14 +970,18 @@ static int trace_format_open(struct inode *inode, struct file *file)
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
- struct ftrace_event_call *call = filp->private_data;
+ int id = (long)event_file_data(filp);
char buf[32];
int len;
if (*ppos)
return 0;
- len = sprintf(buf, "%d\n", call->event.type);
+ if (unlikely(!id))
+ return -ENODEV;
+
+ len = sprintf(buf, "%d\n", id);
+
return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
}
@@ -961,21 +989,28 @@ static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call;
struct trace_seq *s;
- int r;
+ int r = -ENODEV;
if (*ppos)
return 0;
s = kmalloc(sizeof(*s), GFP_KERNEL);
+
if (!s)
return -ENOMEM;
trace_seq_init(s);
- print_event_filter(call, s);
- r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+ mutex_lock(&event_mutex);
+ call = event_file_data(filp);
+ if (call)
+ print_event_filter(call, s);
+ mutex_unlock(&event_mutex);
+
+ if (call)
+ r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
kfree(s);
@@ -986,9 +1021,9 @@ static ssize_t
event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call;
char *buf;
- int err;
+ int err = -ENODEV;
if (cnt >= PAGE_SIZE)
return -EINVAL;
@@ -1003,7 +1038,12 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
}
buf[cnt] = '\0';
- err = apply_event_filter(call, buf);
+ mutex_lock(&event_mutex);
+ call = event_file_data(filp);
+ if (call)
+ err = apply_event_filter(call, buf);
+ mutex_unlock(&event_mutex);
+
free_page((unsigned long) buf);
if (err < 0)
return err;
@@ -1225,10 +1265,9 @@ static const struct file_operations ftrace_set_event_fops = {
};
static const struct file_operations ftrace_enable_fops = {
- .open = tracing_open_generic_file,
+ .open = tracing_open_generic,
.read = event_enable_read,
.write = event_enable_write,
- .release = tracing_release_generic_file,
.llseek = default_llseek,
};
@@ -1240,7 +1279,6 @@ static const struct file_operations ftrace_event_format_fops = {
};
static const struct file_operations ftrace_event_id_fops = {
- .open = tracing_open_generic,
.read = event_id_read,
.llseek = default_llseek,
};
@@ -1488,8 +1526,8 @@ event_create_dir(struct dentry *parent,
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
- trace_create_file("id", 0444, file->dir, call,
- id);
+ trace_create_file("id", 0444, file->dir,
+ (void *)(long)call->event.type, id);
#endif
/*
@@ -1514,33 +1552,16 @@ event_create_dir(struct dentry *parent,
return 0;
}
-static void remove_subsystem(struct ftrace_subsystem_dir *dir)
-{
- if (!dir)
- return;
-
- if (!--dir->nr_events) {
- debugfs_remove_recursive(dir->entry);
- list_del(&dir->list);
- __put_system_dir(dir);
- }
-}
-
static void remove_event_from_tracers(struct ftrace_event_call *call)
{
struct ftrace_event_file *file;
struct trace_array *tr;
do_for_each_event_file_safe(tr, file) {
-
if (file->event_call != call)
continue;
- list_del(&file->list);
- debugfs_remove_recursive(file->dir);
- remove_subsystem(file->system);
- kmem_cache_free(file_cachep, file);
-
+ remove_event_file_dir(file);
/*
* The do_for_each_event_file_safe() is
* a double loop. After finding the call for this
@@ -1692,16 +1713,53 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
destroy_preds(call);
}
+static int probe_remove_event_call(struct ftrace_event_call *call)
+{
+ struct trace_array *tr;
+ struct ftrace_event_file *file;
+
+#ifdef CONFIG_PERF_EVENTS
+ if (call->perf_refcount)
+ return -EBUSY;
+#endif
+ do_for_each_event_file(tr, file) {
+ if (file->event_call != call)
+ continue;
+ /*
+ * We can't rely on ftrace_event_enable_disable(enable => 0)
+ * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
+ * TRACE_REG_UNREGISTER.
+ */
+ if (file->flags & FTRACE_EVENT_FL_ENABLED)
+ return -EBUSY;
+ /*
+ * The do_for_each_event_file_safe() is
+ * a double loop. After finding the call for this
+ * trace_array, we use break to jump to the next
+ * trace_array.
+ */
+ break;
+ } while_for_each_event_file();
+
+ __trace_remove_event_call(call);
+
+ return 0;
+}
+
/* Remove an event_call */
-void trace_remove_event_call(struct ftrace_event_call *call)
+int trace_remove_event_call(struct ftrace_event_call *call)
{
+ int ret;
+
mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
down_write(&trace_event_sem);
- __trace_remove_event_call(call);
+ ret = probe_remove_event_call(call);
up_write(&trace_event_sem);
mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
+
+ return ret;
}
#define for_each_event(event, start, end) \
@@ -2270,12 +2328,8 @@ __trace_remove_event_dirs(struct trace_array *tr)
{
struct ftrace_event_file *file, *next;
- list_for_each_entry_safe(file, next, &tr->events, list) {
- list_del(&file->list);
- debugfs_remove_recursive(file->dir);
- remove_subsystem(file->system);
- kmem_cache_free(file_cachep, file);
- }
+ list_for_each_entry_safe(file, next, &tr->events, list)
+ remove_event_file_dir(file);
}
static void
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0c7b75a8acc..97daa8cf958 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -637,17 +637,15 @@ static void append_filter_err(struct filter_parse_state *ps,
free_page((unsigned long) buf);
}
+/* caller must hold event_mutex */
void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{
- struct event_filter *filter;
+ struct event_filter *filter = call->filter;
- mutex_lock(&event_mutex);
- filter = call->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_puts(s, "none\n");
- mutex_unlock(&event_mutex);
}
void print_subsystem_event_filter(struct event_subsystem *system,
@@ -1841,23 +1839,22 @@ static int create_system_filter(struct event_subsystem *system,
return err;
}
+/* caller must hold event_mutex */
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
struct event_filter *filter;
- int err = 0;
-
- mutex_lock(&event_mutex);
+ int err;
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable(call);
filter = call->filter;
if (!filter)
- goto out_unlock;
+ return 0;
RCU_INIT_POINTER(call->filter, NULL);
/* Make sure the filter is not being used */
synchronize_sched();
__free_filter(filter);
- goto out_unlock;
+ return 0;
}
err = create_filter(call, filter_string, true, &filter);
@@ -1884,8 +1881,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
__free_filter(tmp);
}
}
-out_unlock:
- mutex_unlock(&event_mutex);
return err;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3811487e7a7..243f6834d02 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -95,7 +95,7 @@ static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
}
static int register_probe_event(struct trace_probe *tp);
-static void unregister_probe_event(struct trace_probe *tp);
+static int unregister_probe_event(struct trace_probe *tp);
static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);
@@ -351,9 +351,12 @@ static int unregister_trace_probe(struct trace_probe *tp)
if (trace_probe_is_enabled(tp))
return -EBUSY;
+ /* Will fail if probe is being used by ftrace or perf */
+ if (unregister_probe_event(tp))
+ return -EBUSY;
+
__unregister_trace_probe(tp);
list_del(&tp->list);
- unregister_probe_event(tp);
return 0;
}
@@ -632,7 +635,9 @@ static int release_all_trace_probes(void)
/* TODO: Use batch unregistration */
while (!list_empty(&probe_list)) {
tp = list_entry(probe_list.next, struct trace_probe, list);
- unregister_trace_probe(tp);
+ ret = unregister_trace_probe(tp);
+ if (ret)
+ goto end;
free_trace_probe(tp);
}
@@ -1247,11 +1252,15 @@ static int register_probe_event(struct trace_probe *tp)
return ret;
}
-static void unregister_probe_event(struct trace_probe *tp)
+static int unregister_probe_event(struct trace_probe *tp)
{
+ int ret;
+
/* tp->event is unregistered in trace_remove_event_call() */
- trace_remove_event_call(&tp->call);
- kfree(tp->call.print_fmt);
+ ret = trace_remove_event_call(&tp->call);
+ if (!ret)
+ kfree(tp->call.print_fmt);
+ return ret;
}
/* Make a debugfs interface for controlling probe points */
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index a23d2d71188..272261b5f94 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -70,7 +70,7 @@ struct trace_uprobe {
(sizeof(struct probe_arg) * (n)))
static int register_uprobe_event(struct trace_uprobe *tu);
-static void unregister_uprobe_event(struct trace_uprobe *tu);
+static int unregister_uprobe_event(struct trace_uprobe *tu);
static DEFINE_MUTEX(uprobe_lock);
static LIST_HEAD(uprobe_list);
@@ -164,11 +164,17 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
}
/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
-static void unregister_trace_uprobe(struct trace_uprobe *tu)
+static int unregister_trace_uprobe(struct trace_uprobe *tu)
{
+ int ret;
+
+ ret = unregister_uprobe_event(tu);
+ if (ret)
+ return ret;
+
list_del(&tu->list);
- unregister_uprobe_event(tu);
free_trace_uprobe(tu);
+ return 0;
}
/* Register a trace_uprobe and probe_event */
@@ -181,9 +187,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
/* register as an event */
old_tp = find_probe_event(tu->call.name, tu->call.class->system);
- if (old_tp)
+ if (old_tp) {
/* delete old event */
- unregister_trace_uprobe(old_tp);
+ ret = unregister_trace_uprobe(old_tp);
+ if (ret)
+ goto end;
+ }
ret = register_uprobe_event(tu);
if (ret) {
@@ -256,6 +265,8 @@ static int create_trace_uprobe(int argc, char **argv)
group = UPROBE_EVENT_SYSTEM;
if (is_delete) {
+ int ret;
+
if (!event) {
pr_info("Delete command needs an event name.\n");
return -EINVAL;
@@ -269,9 +280,9 @@ static int create_trace_uprobe(int argc, char **argv)
return -ENOENT;
}
/* delete an event */
- unregister_trace_uprobe(tu);
+ ret = unregister_trace_uprobe(tu);
mutex_unlock(&uprobe_lock);
- return 0;
+ return ret;
}
if (argc < 2) {
@@ -408,16 +419,20 @@ fail_address_parse:
return ret;
}
-static void cleanup_all_probes(void)
+static int cleanup_all_probes(void)
{
struct trace_uprobe *tu;
+ int ret = 0;
mutex_lock(&uprobe_lock);
while (!list_empty(&uprobe_list)) {
tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
- unregister_trace_uprobe(tu);
+ ret = unregister_trace_uprobe(tu);
+ if (ret)
+ break;
}
mutex_unlock(&uprobe_lock);
+ return ret;
}
/* Probes listing interfaces */
@@ -462,8 +477,13 @@ static const struct seq_operations probes_seq_op = {
static int probes_open(struct inode *inode, struct file *file)
{
- if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
- cleanup_all_probes();
+ int ret;
+
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ ret = cleanup_all_probes();
+ if (ret)
+ return ret;
+ }
return seq_open(file, &probes_seq_op);
}
@@ -968,12 +988,17 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return ret;
}
-static void unregister_uprobe_event(struct trace_uprobe *tu)
+static int unregister_uprobe_event(struct trace_uprobe *tu)
{
+ int ret;
+
/* tu->event is unregistered in trace_remove_event_call() */
- trace_remove_event_call(&tu->call);
+ ret = trace_remove_event_call(&tu->call);
+ if (ret)
+ return ret;
kfree(tu->call.print_fmt);
tu->call.print_fmt = NULL;
+ return 0;
}
/* Make a trace interface for controling probe points */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index d8c30db06c5..9064b919a40 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -62,6 +62,9 @@ int create_user_ns(struct cred *new)
kgid_t group = new->egid;
int ret;
+ if (parent_ns->level > 32)
+ return -EUSERS;
+
/*
* Verify that we can not violate the policy of which files
* may be accessed that is specified by the root directory,
@@ -92,6 +95,7 @@ int create_user_ns(struct cred *new)
atomic_set(&ns->count, 1);
/* Leave the new->user_ns reference with the new user namespace. */
ns->parent = parent_ns;
+ ns->level = parent_ns->level + 1;
ns->owner = owner;
ns->group = group;
@@ -105,16 +109,21 @@ int create_user_ns(struct cred *new)
int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
{
struct cred *cred;
+ int err = -ENOMEM;
if (!(unshare_flags & CLONE_NEWUSER))
return 0;
cred = prepare_creds();
- if (!cred)
- return -ENOMEM;
+ if (cred) {
+ err = create_user_ns(cred);
+ if (err)
+ put_cred(cred);
+ else
+ *new_cred = cred;
+ }
- *new_cred = cred;
- return create_user_ns(cred);
+ return err;
}
void free_user_ns(struct user_namespace *ns)
diff --git a/kernel/wait.c b/kernel/wait.c
index dec68bd4e9d..d550920e040 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -363,8 +363,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
/**
* wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @word: The word being waited on, a kernel virtual address
- * @bit: The bit of the word being waited on
+ * @p: The atomic_t being waited on, a kernel virtual address
*
* Wake up anyone waiting for the atomic_t to go to zero.
*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d1b5f066265..5f8ee91abdf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2201,6 +2201,15 @@ __acquires(&pool->lock)
dump_stack();
}
+ /*
+ * The following prevents a kworker from hogging CPU on !PREEMPT
+ * kernels, where a requeueing work item waiting for something to
+ * happen could deadlock with stop_machine as such work item could
+ * indefinitely requeue itself while all other CPUs are trapped in
+ * stop_machine.
+ */
+ cond_resched();
+
spin_lock_irq(&pool->lock);
/* clear cpu intensive status */
@@ -2817,6 +2826,19 @@ already_gone:
return false;
}
+static bool __flush_work(struct work_struct *work)
+{
+ struct wq_barrier barr;
+
+ if (start_flush_work(work, &barr)) {
+ wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
+ return true;
+ } else {
+ return false;
+ }
+}
+
/**
* flush_work - wait for a work to finish executing the last queueing instance
* @work: the work to flush
@@ -2830,18 +2852,10 @@ already_gone:
*/
bool flush_work(struct work_struct *work)
{
- struct wq_barrier barr;
-
lock_map_acquire(&work->lockdep_map);
lock_map_release(&work->lockdep_map);
- if (start_flush_work(work, &barr)) {
- wait_for_completion(&barr.done);
- destroy_work_on_stack(&barr.work);
- return true;
- } else {
- return false;
- }
+ return __flush_work(work);
}
EXPORT_SYMBOL_GPL(flush_work);
@@ -3414,6 +3428,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
{
to->nice = from->nice;
cpumask_copy(to->cpumask, from->cpumask);
+ /*
+ * Unlike hash and equality test, this function doesn't ignore
+ * ->no_numa as it is used for both pool and wq attrs. Instead,
+ * get_unbound_pool() explicitly clears ->no_numa after copying.
+ */
+ to->no_numa = from->no_numa;
}
/* hash value of the content of @attr */
@@ -3581,6 +3601,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
copy_workqueue_attrs(pool->attrs, attrs);
+ /*
+ * no_numa isn't a worker_pool attribute, always clear it. See
+ * 'struct workqueue_attrs' comments for detail.
+ */
+ pool->attrs->no_numa = false;
+
/* if cpumask is contained inside a NUMA node, we belong to that node */
if (wq_numa_enabled) {
for_each_node(node) {
@@ -4759,7 +4785,14 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
schedule_work_on(cpu, &wfc.work);
- flush_work(&wfc.work);
+
+ /*
+ * The work item is on-stack and can't lead to deadlock through
+ * flushing. Use __flush_work() to avoid spurious lockdep warnings
+ * when work_on_cpu()s are nested.
+ */
+ __flush_work(&wfc.work);
+
return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);