aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAndrey Konovalov <andrey.konovalov@linaro.org>2014-04-15 01:23:46 +0400
committerAndrey Konovalov <andrey.konovalov@linaro.org>2014-04-15 01:23:46 +0400
commit2c41c8eb2b3014ddb4de53fec533edfa47a96db3 (patch)
tree2a11d9b2fbc72598589523742eac98af3a09d3ff /kernel
parentdbb07139ef78699b07137e3974aa9638cfa2d57a (diff)
parent2609fbf2f5158f57b1c41a5fa2c18fffc6016342 (diff)
Automatically merging tracking-linaro-android-3.14 into merge-linux-linaro-core-tracking
Conflicting files:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c34
-rw-r--r--kernel/cpu.c20
-rw-r--r--kernel/debug/debug_core.c12
-rw-r--r--kernel/debug/kdb/kdb_io.c12
-rw-r--r--kernel/fork.c19
-rw-r--r--kernel/irq/pm.c12
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/power/Kconfig15
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/suspend.c15
-rw-r--r--kernel/power/suspend_time.c111
-rw-r--r--kernel/power/wakelock.c7
-rw-r--r--kernel/power/wakeup_reason.c140
-rw-r--r--kernel/printk/printk.c8
-rw-r--r--kernel/sched/core.c32
-rw-r--r--kernel/sys.c152
-rw-r--r--kernel/sysctl.c17
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/gpu-traces.c23
-rw-r--r--kernel/trace/trace.c104
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_functions_graph.c43
-rw-r--r--kernel/trace/trace_output.c182
-rw-r--r--kernel/watchdog.c123
25 files changed, 1035 insertions, 63 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0c753ddd223..61e6358e740 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2110,6 +2110,25 @@ out_free_group_list:
return retval;
}
+static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+ int i;
+ int ret;
+
+ for_each_css(css, i, cgrp) {
+ if (css->ss->allow_attach) {
+ ret = css->ss->allow_attach(css, tset);
+ if (ret)
+ return ret;
+ } else {
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
@@ -2141,9 +2160,18 @@ retry_find_task:
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
- rcu_read_unlock();
- ret = -EACCES;
- goto out_unlock_cgroup;
+ /*
+ * if the default permission check fails, give each
+ * cgroup a chance to extend the permission check
+ */
+ struct cgroup_taskset tset = { };
+ tset.single.task = tsk;
+ tset.single.cgrp = cgrp;
+ ret = cgroup_allow_attach(cgrp, &tset);
+ if (ret) {
+ rcu_read_unlock();
+ goto out_unlock_cgroup;
+ }
}
} else
tsk = current;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index deff2e69376..2434ed358ee 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -720,3 +720,23 @@ void init_cpu_online(const struct cpumask *src)
{
cpumask_copy(to_cpumask(cpu_online_bits), src);
}
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+
+void idle_notifier_call_chain(unsigned long val)
+{
+ atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_call_chain);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 334b3980ffc..61d1d14b762 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -86,6 +86,10 @@ static int kgdb_use_con;
bool dbg_is_early = true;
/* Next cpu to become the master debug core */
int dbg_switch_cpu;
+/* Flag for entering kdb when a panic occurs */
+static bool break_on_panic = true;
+/* Flag for entering kdb when an exception occurs */
+static bool break_on_exception = true;
/* Use kdb or gdbserver mode */
int dbg_kdb_mode = 1;
@@ -100,6 +104,8 @@ early_param("kgdbcon", opt_kgdb_con);
module_param(kgdb_use_con, int, 0644);
module_param(kgdbreboot, int, 0644);
+module_param(break_on_panic, bool, 0644);
+module_param(break_on_exception, bool, 0644);
/*
* Holds information about breakpoints in a kernel. These breakpoints are
@@ -682,6 +688,9 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
if (arch_kgdb_ops.enable_nmi)
arch_kgdb_ops.enable_nmi(0);
+ if (unlikely(signo != SIGTRAP && !break_on_exception))
+ return 1;
+
memset(ks, 0, sizeof(struct kgdb_state));
ks->cpu = raw_smp_processor_id();
ks->ex_vector = evector;
@@ -813,6 +822,9 @@ static int kgdb_panic_event(struct notifier_block *self,
unsigned long val,
void *data)
{
+ if (!break_on_panic)
+ return NOTIFY_DONE;
+
if (dbg_kdb_mode)
kdb_printf("PANIC: %s\n", (char *)data);
kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 14ff4849262..4b0fb2fb779 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize)
int i;
int diag, dtab_count;
int key;
-
+ static int last_crlf;
diag = kdbgetintenv("DTABCOUNT", &dtab_count);
if (diag)
@@ -237,6 +237,9 @@ poll_again:
return buffer;
if (key != 9)
tab = 0;
+ if (key != 10 && key != 13)
+ last_crlf = 0;
+
switch (key) {
case 8: /* backspace */
if (cp > buffer) {
@@ -254,7 +257,12 @@ poll_again:
*cp = tmp;
}
break;
- case 13: /* enter */
+ case 10: /* new line */
+ case 13: /* carriage return */
+ /* handle \n after \r */
+ if (last_crlf && last_crlf != key)
+ break;
+ last_crlf = key;
*lastchar++ = '\n';
*lastchar++ = '\0';
if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/fork.c b/kernel/fork.c
index a17621c6cd4..cc081f25626 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -198,6 +198,9 @@ struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
+/* Notifier list called when a task struct is freed */
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+
static void account_kernel_stack(struct thread_info *ti, int account)
{
struct zone *zone = page_zone(virt_to_page(ti));
@@ -231,6 +234,18 @@ static inline void put_signal_struct(struct signal_struct *sig)
free_signal_struct(sig);
}
+int task_free_register(struct notifier_block *n)
+{
+ return atomic_notifier_chain_register(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_register);
+
+int task_free_unregister(struct notifier_block *n)
+{
+ return atomic_notifier_chain_unregister(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_unregister);
+
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
@@ -242,6 +257,7 @@ void __put_task_struct(struct task_struct *tsk)
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
+ atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
if (!profile_handoff_task(tsk))
free_task(tsk);
}
@@ -690,7 +706,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
mm = get_task_mm(task);
if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode)) {
+ !ptrace_may_access(task, mode) &&
+ !capable(CAP_SYS_RESOURCE)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index abcd6ca86cb..c72b7a43beb 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -103,14 +103,14 @@ int check_wakeup_irqs(void)
int irq;
for_each_irq_desc(irq, desc) {
- /*
- * Only interrupts which are marked as wakeup source
- * and have not been disabled before the suspend check
- * can abort suspend.
- */
if (irqd_is_wakeup_set(&desc->irq_data)) {
- if (desc->depth == 1 && desc->istate & IRQS_PENDING)
+ if (desc->istate & IRQS_PENDING) {
+ pr_info("Wakeup IRQ %d %s pending, suspend aborted\n",
+ irq,
+ desc->action && desc->action->name ?
+ desc->action->name : "");
return -EBUSY;
+ }
continue;
}
/*
diff --git a/kernel/panic.c b/kernel/panic.c
index 6d630037509..7b8d5d257fb 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -27,6 +27,9 @@
#define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18
+/* Machine specific panic information string */
+char *mach_panic_string;
+
int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
static unsigned long tainted_mask;
static int pause_on_oops;
@@ -380,6 +383,11 @@ late_initcall(init_oops_id);
void print_oops_end_marker(void)
{
init_oops_id();
+
+ if (mach_panic_string)
+ printk(KERN_WARNING "Board Information: %s\n",
+ mach_panic_string);
+
printk(KERN_WARNING "---[ end trace %016llx ]---\n",
(unsigned long long)oops_id);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 2fac9cc79b3..6c78fe5ee69 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,6 +18,14 @@ config SUSPEND_FREEZER
Turning OFF this setting is NOT recommended! If in doubt, say Y.
+config HAS_WAKELOCK
+ bool
+ default y
+
+config WAKELOCK
+ bool
+ default y
+
config HIBERNATE_CALLBACKS
bool
@@ -309,3 +317,10 @@ config PM_GENERIC_DOMAINS_RUNTIME
config CPU_PM
bool
depends on SUSPEND || CPU_IDLE
+
+config SUSPEND_TIME
+ bool "Log time spent in suspend"
+ ---help---
+ Prints the time spent in suspend in the kernel log, and
+ keeps statistics on the time spent in suspend in
+ /sys/kernel/debug/suspend_time
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bff11e..74c713ba61b 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -11,5 +11,8 @@ obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
block_io.o
obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
+obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
+
+obj-$(CONFIG_SUSPEND) += wakeup_reason.o
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 62ee437b5c7..4f1e813a2a5 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -25,6 +25,7 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/ftrace.h>
+#include <linux/rtc.h>
#include <trace/events/power.h>
#include "power.h"
@@ -358,6 +359,18 @@ static int enter_state(suspend_state_t state)
return error;
}
+static void pm_suspend_marker(char *annotation)
+{
+ struct timespec ts;
+ struct rtc_time tm;
+
+ getnstimeofday(&ts);
+ rtc_time_to_tm(ts.tv_sec, &tm);
+ pr_info("PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n",
+ annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
+}
+
/**
* pm_suspend - Externally visible function for suspending the system.
* @state: System sleep state to enter.
@@ -372,6 +385,7 @@ int pm_suspend(suspend_state_t state)
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
+ pm_suspend_marker("entry");
error = enter_state(state);
if (error) {
suspend_stats.fail++;
@@ -379,6 +393,7 @@ int pm_suspend(suspend_state_t state)
} else {
suspend_stats.success++;
}
+ pm_suspend_marker("exit");
return error;
}
EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c
new file mode 100644
index 00000000000..d2a65da9f22
--- /dev/null
+++ b/kernel/power/suspend_time.c
@@ -0,0 +1,111 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/syscore_ops.h>
+#include <linux/time.h>
+
+static struct timespec suspend_time_before;
+static unsigned int time_in_suspend_bins[32];
+
+#ifdef CONFIG_DEBUG_FS
+static int suspend_time_debug_show(struct seq_file *s, void *data)
+{
+ int bin;
+ seq_printf(s, "time (secs) count\n");
+ seq_printf(s, "------------------\n");
+ for (bin = 0; bin < 32; bin++) {
+ if (time_in_suspend_bins[bin] == 0)
+ continue;
+ seq_printf(s, "%4d - %4d %4u\n",
+ bin ? 1 << (bin - 1) : 0, 1 << bin,
+ time_in_suspend_bins[bin]);
+ }
+ return 0;
+}
+
+static int suspend_time_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, suspend_time_debug_show, NULL);
+}
+
+static const struct file_operations suspend_time_debug_fops = {
+ .open = suspend_time_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init suspend_time_debug_init(void)
+{
+ struct dentry *d;
+
+ d = debugfs_create_file("suspend_time", 0755, NULL, NULL,
+ &suspend_time_debug_fops);
+ if (!d) {
+ pr_err("Failed to create suspend_time debug file\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+late_initcall(suspend_time_debug_init);
+#endif
+
+static int suspend_time_syscore_suspend(void)
+{
+ read_persistent_clock(&suspend_time_before);
+
+ return 0;
+}
+
+static void suspend_time_syscore_resume(void)
+{
+ struct timespec after;
+
+ read_persistent_clock(&after);
+
+ after = timespec_sub(after, suspend_time_before);
+
+ time_in_suspend_bins[fls(after.tv_sec)]++;
+
+ pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec,
+ after.tv_nsec / NSEC_PER_MSEC);
+}
+
+static struct syscore_ops suspend_time_syscore_ops = {
+ .suspend = suspend_time_syscore_suspend,
+ .resume = suspend_time_syscore_resume,
+};
+
+static int suspend_time_syscore_init(void)
+{
+ register_syscore_ops(&suspend_time_syscore_ops);
+
+ return 0;
+}
+
+static void suspend_time_syscore_exit(void)
+{
+ unregister_syscore_ops(&suspend_time_syscore_ops);
+}
+module_init(suspend_time_syscore_init);
+module_exit(suspend_time_syscore_exit);
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 8f50de394d2..c8fba338007 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -9,7 +9,6 @@
* manipulate wakelocks on Android.
*/
-#include <linux/capability.h>
#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -189,9 +188,6 @@ int pm_wake_lock(const char *buf)
size_t len;
int ret = 0;
- if (!capable(CAP_BLOCK_SUSPEND))
- return -EPERM;
-
while (*str && !isspace(*str))
str++;
@@ -235,9 +231,6 @@ int pm_wake_unlock(const char *buf)
size_t len;
int ret = 0;
- if (!capable(CAP_BLOCK_SUSPEND))
- return -EPERM;
-
len = strlen(buf);
if (!len)
return -EINVAL;
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 00000000000..188a6bfacf5
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,140 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/wakeup_reason.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ int irq_no, buf_offset = 0;
+ struct irq_desc *desc;
+ spin_lock(&resume_reason_lock);
+ for (irq_no = 0; irq_no < irqcount; irq_no++) {
+ desc = irq_to_desc(irq_list[irq_no]);
+ if (desc && desc->action && desc->action->name)
+ buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+ irq_list[irq_no], desc->action->name);
+ else
+ buf_offset += sprintf(buf + buf_offset, "%d\n",
+ irq_list[irq_no]);
+ }
+ spin_unlock(&resume_reason_lock);
+ return buf_offset;
+}
+
+static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666,
+ reason_show, NULL);
+
+static struct attribute *attrs[] = {
+ &resume_reason.attr,
+ NULL,
+};
+static struct attribute_group attr_group = {
+ .attrs = attrs,
+};
+
+/*
+ * logs all the wake up reasons to the kernel
+ * stores the irqs to expose them to the userspace via sysfs
+ */
+void log_wakeup_reason(int irq)
+{
+ struct irq_desc *desc;
+ desc = irq_to_desc(irq);
+ if (desc && desc->action && desc->action->name)
+ printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq,
+ desc->action->name);
+ else
+ printk(KERN_INFO "Resume caused by IRQ %d\n", irq);
+
+ spin_lock(&resume_reason_lock);
+ if (irqcount == MAX_WAKEUP_REASON_IRQS) {
+ spin_unlock(&resume_reason_lock);
+ printk(KERN_WARNING "Resume caused by more than %d IRQs\n",
+ MAX_WAKEUP_REASON_IRQS);
+ return;
+ }
+
+ irq_list[irqcount++] = irq;
+ spin_unlock(&resume_reason_lock);
+}
+
+/* Detects a suspend and clears all the previous wake up reasons*/
+static int wakeup_reason_pm_event(struct notifier_block *notifier,
+ unsigned long pm_event, void *unused)
+{
+ switch (pm_event) {
+ case PM_SUSPEND_PREPARE:
+ spin_lock(&resume_reason_lock);
+ irqcount = 0;
+ spin_unlock(&resume_reason_lock);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block wakeup_reason_pm_notifier_block = {
+ .notifier_call = wakeup_reason_pm_event,
+};
+
+/* Initializes the sysfs parameter
+ * registers the pm_event notifier
+ */
+int __init wakeup_reason_init(void)
+{
+ int retval;
+ spin_lock_init(&resume_reason_lock);
+ retval = register_pm_notifier(&wakeup_reason_pm_notifier_block);
+ if (retval)
+ printk(KERN_WARNING "[%s] failed to register PM notifier %d\n",
+ __func__, retval);
+
+ wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj);
+ if (!wakeup_reason) {
+ printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n",
+ __func__);
+ return 1;
+ }
+ retval = sysfs_create_group(wakeup_reason, &attr_group);
+ if (retval) {
+ kobject_put(wakeup_reason);
+ printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n",
+ __func__, retval);
+ }
+ return 0;
+}
+
+late_initcall(wakeup_reason_init);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 4dae9cbe925..8dbd28cf831 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -54,6 +54,10 @@
#include "console_cmdline.h"
#include "braille.h"
+#ifdef CONFIG_EARLY_PRINTK_DIRECT
+extern void printascii(char *);
+#endif
+
/* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
@@ -1568,6 +1572,10 @@ asmlinkage int vprintk_emit(int facility, int level,
}
}
+#ifdef CONFIG_EARLY_PRINTK_DIRECT
+ printascii(text);
+#endif
+
if (level == -1)
level = default_message_loglevel;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f5c6635b806..a55b70c9830 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6942,13 +6942,24 @@ static inline int preempt_count_equals(int preempt_offset)
return (nested == preempt_offset);
}
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+ __might_sleep_init_called = 1;
+ return 0;
+}
+early_initcall(__might_sleep_init);
+
void __might_sleep(const char *file, int line, int preempt_offset)
{
static unsigned long prev_jiffy; /* ratelimiting */
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
- system_state != SYSTEM_RUNNING || oops_in_progress)
+ oops_in_progress)
+ return;
+ if (system_state != SYSTEM_RUNNING &&
+ (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
return;
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;
@@ -7608,6 +7619,24 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
sched_offline_group(tg);
}
+static int cpu_cgroup_allow_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
+{
+ const struct cred *cred = current_cred(), *tcred;
+ struct task_struct *task;
+
+ cgroup_taskset_for_each(task, css, tset) {
+ tcred = __task_cred(task);
+
+ if ((current != task) && !capable(CAP_SYS_NICE) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+ !uid_eq(cred->euid, tcred->suid))
+ return -EACCES;
+ }
+
+ return 0;
+}
+
static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
@@ -7978,6 +8007,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
.css_offline = cpu_cgroup_css_offline,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
+ .allow_attach = cpu_cgroup_allow_attach,
.exit = cpu_cgroup_exit,
.subsys_id = cpu_cgroup_subsys_id,
.base_cftypes = cpu_files,
diff --git a/kernel/sys.c b/kernel/sys.c
index c0a58be780a..d0e9efaea8e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -41,6 +41,8 @@
#include <linux/syscore_ops.h>
#include <linux/version.h>
#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -1833,6 +1835,153 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
}
#endif
+#ifdef CONFIG_MMU
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ const char __user *name_addr)
+{
+ struct mm_struct * mm = vma->vm_mm;
+ int error = 0;
+ pgoff_t pgoff;
+
+ if (name_addr == vma_get_anon_name(vma)) {
+ *prev = vma;
+ goto out;
+ }
+
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma),
+ name_addr);
+ if (*prev) {
+ vma = *prev;
+ goto success;
+ }
+
+ *prev = vma;
+
+ if (start != vma->vm_start) {
+ error = split_vma(mm, vma, start, 1);
+ if (error)
+ goto out;
+ }
+
+ if (end != vma->vm_end) {
+ error = split_vma(mm, vma, end, 0);
+ if (error)
+ goto out;
+ }
+
+success:
+ if (!vma->vm_file)
+ vma->shared.anon_name = name_addr;
+
+out:
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+ unsigned long arg)
+{
+ unsigned long tmp;
+ struct vm_area_struct * vma, *prev;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ * - this matches the handling in madvise.
+ */
+ vma = find_vma_prev(current->mm, start, &prev);
+ if (vma && start > vma->vm_start)
+ prev = vma;
+
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ return error;
+
+ /* Here start < (end|vma->vm_end). */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ if (start >= end)
+ return error;
+ }
+
+ /* Here vma->vm_start <= start < (end|vma->vm_end) */
+ tmp = vma->vm_end;
+ if (end < tmp)
+ tmp = end;
+
+ /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+ error = prctl_update_vma_anon_name(vma, &prev, start, end,
+ (const char __user *)arg);
+ if (error)
+ return error;
+ start = tmp;
+ if (prev && start < prev->vm_end)
+ start = prev->vm_end;
+ error = unmapped_error;
+ if (start >= end)
+ return error;
+ if (prev)
+ vma = prev->vm_next;
+ else /* madvise_remove dropped mmap_sem */
+ vma = find_vma(current->mm, start);
+ }
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long len_in, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ int error;
+ unsigned long len;
+ unsigned long end;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+ /* Check to see whether len was rounded up from small -ve to zero */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ switch (opt) {
+ case PR_SET_VMA_ANON_NAME:
+ error = prctl_set_vma_anon_name(start, end, arg);
+ break;
+ default:
+ error = -EINVAL;
+ }
+
+ up_write(&mm->mmap_sem);
+
+ return error;
+}
+#else /* CONFIG_MMU */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long len_in, unsigned long arg)
+{
+ return -EINVAL;
+}
+#endif
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -1996,6 +2145,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
return current->no_new_privs ? 1 : 0;
+ case PR_SET_VMA:
+ error = prctl_set_vma(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 49e13e1f8fe..797bfa049be 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -104,6 +104,8 @@ extern char core_pattern[];
extern unsigned int core_pipe_limit;
#endif
extern int pid_max;
+extern int extra_free_kbytes;
+extern int min_free_order_shift;
extern int pid_max_min, pid_max_max;
extern int percpu_pagelist_fraction;
extern int compat_log;
@@ -1313,6 +1315,21 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero,
},
{
+ .procname = "extra_free_kbytes",
+ .data = &extra_free_kbytes,
+ .maxlen = sizeof(extra_free_kbytes),
+ .mode = 0644,
+ .proc_handler = min_free_kbytes_sysctl_handler,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "min_free_order_shift",
+ .data = &min_free_order_shift,
+ .maxlen = sizeof(min_free_order_shift),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
.procname = "percpu_pagelist_fraction",
.data = &percpu_pagelist_fraction,
.maxlen = sizeof(percpu_pagelist_fraction),
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 015f85aaca0..e24c188cbbc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,6 +82,9 @@ config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
bool
+config GPU_TRACEPOINTS
+ bool
+
config CONTEXT_SWITCH_TRACER
bool
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 1378e84fbe3..30013ca8ba3 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -61,5 +61,6 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c
new file mode 100644
index 00000000000..a4b3f00faee
--- /dev/null
+++ b/kernel/trace/gpu-traces.c
@@ -0,0 +1,23 @@
+/*
+ * GPU tracepoints
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gpu.h>
+
+EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch);
+EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 24c1f238255..f03bde8a44b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -786,6 +786,7 @@ static const char *trace_options[] = {
"irq-info",
"markers",
"function-trace",
+ "print-tgid",
NULL
};
@@ -1298,6 +1299,7 @@ void tracing_reset_all_online_cpus(void)
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
+static unsigned saved_tgids[SAVED_CMDLINES];
static int cmdline_idx;
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1486,6 +1488,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
}
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ saved_tgids[idx] = tsk->tgid;
arch_spin_unlock(&trace_cmdline_lock);
}
@@ -1521,6 +1524,25 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}
+int trace_find_tgid(int pid)
+{
+ unsigned map;
+ int tgid;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+ map = map_pid_to_cmdline[pid];
+ if (map != NO_CMDLINE_MAP)
+ tgid = saved_tgids[map];
+ else
+ tgid = -1;
+
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+
+ return tgid;
+}
+
void tracing_record_cmdline(struct task_struct *tsk)
{
if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
@@ -2485,6 +2507,13 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
seq_puts(m, "# | | | | |\n");
}
+static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+ print_event_info(buf, m);
+ seq_puts(m, "# TASK-PID TGID CPU# TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | | |\n");
+}
+
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
{
print_event_info(buf, m);
@@ -2497,6 +2526,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
seq_puts(m, "# | | | |||| | |\n");
}
+static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+ print_event_info(buf, m);
+ seq_puts(m, "# _-----=> irqs-off\n");
+ seq_puts(m, "# / _----=> need-resched\n");
+ seq_puts(m, "# | / _---=> hardirq/softirq\n");
+ seq_puts(m, "# || / _--=> preempt-depth\n");
+ seq_puts(m, "# ||| / delay\n");
+ seq_puts(m, "# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | |||| | |\n");
+}
+
void
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
{
@@ -2797,9 +2838,15 @@ void trace_default_header(struct seq_file *m)
} else {
if (!(trace_flags & TRACE_ITER_VERBOSE)) {
if (trace_flags & TRACE_ITER_IRQ_INFO)
- print_func_help_header_irq(iter->trace_buffer, m);
+ if (trace_flags & TRACE_ITER_TGID)
+ print_func_help_header_irq_tgid(iter->trace_buffer, m);
+ else
+ print_func_help_header_irq(iter->trace_buffer, m);
else
- print_func_help_header(iter->trace_buffer, m);
+ if (trace_flags & TRACE_ITER_TGID)
+ print_func_help_header_tgid(iter->trace_buffer, m);
+ else
+ print_func_help_header(iter->trace_buffer, m);
}
}
}
@@ -3700,9 +3747,53 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
}
static const struct file_operations tracing_saved_cmdlines_fops = {
- .open = tracing_open_generic,
- .read = tracing_saved_cmdlines_read,
- .llseek = generic_file_llseek,
+ .open = tracing_open_generic,
+ .read = tracing_saved_cmdlines_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t
+tracing_saved_tgids_read(struct file *file, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char *file_buf;
+ char *buf;
+ int len = 0;
+ int pid;
+ int i;
+
+ file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL);
+ if (!file_buf)
+ return -ENOMEM;
+
+ buf = file_buf;
+
+ for (i = 0; i < SAVED_CMDLINES; i++) {
+ int tgid;
+ int r;
+
+ pid = map_cmdline_to_pid[i];
+ if (pid == -1 || pid == NO_CMDLINE_MAP)
+ continue;
+
+ tgid = trace_find_tgid(pid);
+ r = sprintf(buf, "%d %d\n", pid, tgid);
+ buf += r;
+ len += r;
+ }
+
+ len = simple_read_from_buffer(ubuf, cnt, ppos,
+ file_buf, len);
+
+ kfree(file_buf);
+
+ return len;
+}
+
+static const struct file_operations tracing_saved_tgids_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_saved_tgids_read,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -6231,6 +6322,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("trace_marker", 0220, d_tracer,
tr, &tracing_mark_fops);
+ trace_create_file("saved_tgids", 0444, d_tracer,
+ tr, &tracing_saved_tgids_fops);
+
trace_create_file("trace_clock", 0644, d_tracer, tr,
&trace_clock_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 02b592f2d4b..2c72f3d558e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -645,6 +645,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
extern cycle_t ftrace_now(int cpu);
extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
@@ -891,6 +892,7 @@ enum trace_iterator_flags {
TRACE_ITER_IRQ_INFO = 0x800000,
TRACE_ITER_MARKERS = 0x1000000,
TRACE_ITER_FUNCTION = 0x2000000,
+ TRACE_ITER_TGID = 0x4000000,
};
/*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 0b99120d395..4c2daacd8a2 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -46,6 +46,8 @@ struct fgraph_data {
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
#define TRACE_GRAPH_PRINT_IRQS 0x40
+#define TRACE_GRAPH_PRINT_FLAT 0x80
+
static unsigned int max_depth;
@@ -64,6 +66,8 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+ /* Use standard trace formatting rather than hierarchical */
+ { TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) },
{ } /* Empty entry */
};
@@ -1284,6 +1288,9 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
int cpu = iter->cpu;
int ret;
+ if (flags & TRACE_GRAPH_PRINT_FLAT)
+ return TRACE_TYPE_UNHANDLED;
+
if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
return TRACE_TYPE_HANDLED;
@@ -1341,13 +1348,6 @@ print_graph_function(struct trace_iterator *iter)
return print_graph_function_flags(iter, tracer_flags.val);
}
-static enum print_line_t
-print_graph_function_event(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- return print_graph_function(iter);
-}
-
static void print_lat_header(struct seq_file *s, u32 flags)
{
static const char spaces[] = " " /* 16 spaces */
@@ -1414,6 +1414,11 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
{
struct trace_iterator *iter = s->private;
+ if (flags & TRACE_GRAPH_PRINT_FLAT) {
+ trace_default_header(s);
+ return;
+ }
+
if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
return;
@@ -1484,20 +1489,6 @@ static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
return 0;
}
-static struct trace_event_functions graph_functions = {
- .trace = print_graph_function_event,
-};
-
-static struct trace_event graph_trace_entry_event = {
- .type = TRACE_GRAPH_ENT,
- .funcs = &graph_functions,
-};
-
-static struct trace_event graph_trace_ret_event = {
- .type = TRACE_GRAPH_RET,
- .funcs = &graph_functions
-};
-
static struct tracer graph_trace __tracer_data = {
.name = "function_graph",
.open = graph_trace_open,
@@ -1573,16 +1564,6 @@ static __init int init_graph_trace(void)
{
max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
- if (!register_ftrace_event(&graph_trace_entry_event)) {
- pr_warning("Warning: could not register graph trace events\n");
- return 1;
- }
-
- if (!register_ftrace_event(&graph_trace_ret_event)) {
- pr_warning("Warning: could not register graph trace events\n");
- return 1;
- }
-
return register_tracer(&graph_trace);
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ed32284fbe3..1083b614ce4 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -717,11 +717,25 @@ int trace_print_context(struct trace_iterator *iter)
unsigned long secs, usec_rem;
char comm[TASK_COMM_LEN];
int ret;
+ int tgid;
trace_find_cmdline(entry->pid, comm);
- ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
- comm, entry->pid, iter->cpu);
+ ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+ if (!ret)
+ return 0;
+
+ if (trace_flags & TRACE_ITER_TGID) {
+ tgid = trace_find_tgid(entry->pid);
+ if (tgid < 0)
+ ret = trace_seq_puts(s, "(-----) ");
+ else
+ ret = trace_seq_printf(s, "(%5d) ", tgid);
+ if (!ret)
+ return 0;
+ }
+
+ ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
if (!ret)
return 0;
@@ -1050,6 +1064,168 @@ static struct trace_event trace_fn_event = {
.funcs = &trace_fn_funcs,
};
+/* TRACE_GRAPH_ENT */
+static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct trace_seq *s = &iter->seq;
+ struct ftrace_graph_ent_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_puts(s, "graph_ent: func="))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!seq_print_ip_sym(s, field->graph_ent.func, flags))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_puts(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ent_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "%lx %d\n",
+ field->graph_ent.func,
+ field->graph_ent.depth))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ent_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.func);
+ SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ent_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->graph_ent.func);
+ SEQ_PUT_FIELD_RET(s, field->graph_ent.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ent_funcs = {
+ .trace = trace_graph_ent_trace,
+ .raw = trace_graph_ent_raw,
+ .hex = trace_graph_ent_hex,
+ .binary = trace_graph_ent_bin,
+};
+
+static struct trace_event trace_graph_ent_event = {
+ .type = TRACE_GRAPH_ENT,
+ .funcs = &trace_graph_ent_funcs,
+};
+
+/* TRACE_GRAPH_RET */
+static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct ftrace_graph_ret_entry *field;
+
+ trace_assign_type(field, entry);
+
+ if (!trace_seq_puts(s, "graph_ret: func="))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!seq_print_ip_sym(s, field->ret.func, flags))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_puts(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ret_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n",
+ field->ret.func,
+ field->ret.calltime,
+ field->ret.rettime,
+ field->ret.overrun,
+ field->ret.depth));
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ret_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.func);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.calltime);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.rettime);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.overrun);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ret_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->ret.func);
+ SEQ_PUT_FIELD_RET(s, field->ret.calltime);
+ SEQ_PUT_FIELD_RET(s, field->ret.rettime);
+ SEQ_PUT_FIELD_RET(s, field->ret.overrun);
+ SEQ_PUT_FIELD_RET(s, field->ret.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ret_funcs = {
+ .trace = trace_graph_ret_trace,
+ .raw = trace_graph_ret_raw,
+ .hex = trace_graph_ret_hex,
+ .binary = trace_graph_ret_bin,
+};
+
+static struct trace_event trace_graph_ret_event = {
+ .type = TRACE_GRAPH_RET,
+ .funcs = &trace_graph_ret_funcs,
+};
+
/* TRACE_CTX an TRACE_WAKE */
static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
char *delim)
@@ -1440,6 +1616,8 @@ static struct trace_event trace_print_event = {
static struct trace_event *events[] __initdata = {
&trace_fn_event,
+ &trace_graph_ent_event,
+ &trace_graph_ret_event,
&trace_ctx_event,
&trace_wake_event,
&trace_stack_event,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 4431610f049..68fbd3f169f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -45,6 +45,11 @@ static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static cpumask_t __read_mostly watchdog_cpus;
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
@@ -178,7 +183,7 @@ void touch_softlockup_watchdog_sync(void)
__raw_get_cpu_var(watchdog_touch_ts) = 0;
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
/* watchdog detector functions */
static int is_hardlockup(void)
{
@@ -192,6 +197,76 @@ static int is_hardlockup(void)
}
#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+ cpumask_t cpus = watchdog_cpus;
+ unsigned int next_cpu;
+
+ next_cpu = cpumask_next(cpu, &cpus);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(&cpus);
+
+ if (next_cpu == cpu)
+ return nr_cpu_ids;
+
+ return next_cpu;
+}
+
+static int is_hardlockup_other_cpu(unsigned int cpu)
+{
+ unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+ if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+ return 1;
+
+ per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+ return 0;
+}
+
+static void watchdog_check_hardlockup_other_cpu(void)
+{
+ unsigned int next_cpu;
+
+ /*
+ * Test for hardlockups every 3 samples. The sample period is
+ * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+ * watchdog_thresh (over by 20%).
+ */
+ if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
+ return;
+
+ /* check for a hardlockup on the next cpu */
+ next_cpu = watchdog_next_cpu(smp_processor_id());
+ if (next_cpu >= nr_cpu_ids)
+ return;
+
+ smp_rmb();
+
+ if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
+ per_cpu(watchdog_nmi_touch, next_cpu) = false;
+ return;
+ }
+
+ if (is_hardlockup_other_cpu(next_cpu)) {
+ /* only warn once */
+ if (per_cpu(hard_watchdog_warn, next_cpu) == true)
+ return;
+
+ if (hardlockup_panic)
+ panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+ else
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+
+ per_cpu(hard_watchdog_warn, next_cpu) = true;
+ } else {
+ per_cpu(hard_watchdog_warn, next_cpu) = false;
+ }
+}
+#else
+static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
+#endif
+
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
@@ -203,7 +278,7 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
@@ -251,7 +326,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
__this_cpu_write(hard_watchdog_warn, false);
return;
}
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
static void watchdog_interrupt_count(void)
{
@@ -271,6 +346,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
/* kick the hardlockup detector */
watchdog_interrupt_count();
+ /* test for hardlockups on the next cpu */
+ watchdog_check_hardlockup_other_cpu();
+
/* kick the softlockup detector */
wake_up_process(__this_cpu_read(softlockup_watchdog));
@@ -395,7 +473,7 @@ static void watchdog(unsigned int cpu)
__touch_watchdog();
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
/*
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
@@ -471,9 +549,44 @@ static void watchdog_nmi_disable(unsigned int cpu)
return;
}
#else
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static int watchdog_nmi_enable(unsigned int cpu)
+{
+ /*
+ * The new cpu will be marked online before the first hrtimer interrupt
+ * runs on it. If another cpu tests for a hardlockup on the new cpu
+ * before it has run its first hrtimer, it will get a false positive.
+ * Touch the watchdog on the new cpu to delay the first check for at
+ * least 3 sampling periods to guarantee one hrtimer has run on the new
+ * cpu.
+ */
+ per_cpu(watchdog_nmi_touch, cpu) = true;
+ smp_wmb();
+ cpumask_set_cpu(cpu, &watchdog_cpus);
+ return 0;
+}
+
+static void watchdog_nmi_disable(unsigned int cpu)
+{
+ unsigned int next_cpu = watchdog_next_cpu(cpu);
+
+ /*
+ * Offlining this cpu will cause the cpu before this one to start
+ * checking the one after this one. If this cpu just finished checking
+ * the next cpu and updating hrtimer_interrupts_saved, and then the
+ * previous cpu checks it within one sample period, it will trigger a
+ * false positive. Touch the watchdog on the next cpu to prevent it.
+ */
+ if (next_cpu < nr_cpu_ids)
+ per_cpu(watchdog_nmi_touch, next_cpu) = true;
+ smp_wmb();
+ cpumask_clear_cpu(cpu, &watchdog_cpus);
+}
+#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
static struct smp_hotplug_thread watchdog_threads = {
.store = &softlockup_watchdog,