aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/Makefile2
-rw-r--r--kernel/events/callchain.c191
-rw-r--r--kernel/events/core.c296
-rw-r--r--kernel/events/internal.h39
-rw-r--r--kernel/jump_label.c49
-rw-r--r--kernel/lockdep.c30
-rw-r--r--kernel/trace/ftrace.c715
-rw-r--r--kernel/trace/trace.c105
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events_filter.c283
-rw-r--r--kernel/trace/trace_irqsoff.c13
-rw-r--r--kernel/trace/trace_output.c16
-rw-r--r--kernel/trace/trace_sched_wakeup.c13
-rw-r--r--kernel/trace/trace_stack.c30
14 files changed, 1146 insertions, 638 deletions
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 89e5e8aa4c3..22d901f9caf 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_core.o = -pg
endif
-obj-y := core.o ring_buffer.o
+obj-y := core.o ring_buffer.o callchain.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
new file mode 100644
index 00000000000..057e24b665c
--- /dev/null
+++ b/kernel/events/callchain.c
@@ -0,0 +1,191 @@
+/*
+ * Performance events callchain code, extracted from core.c:
+ *
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * For licensing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct callchain_cpus_entries {
+ struct rcu_head rcu_head;
+ struct perf_callchain_entry *cpu_entries[0];
+};
+
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
+static atomic_t nr_callchain_events;
+static DEFINE_MUTEX(callchain_mutex);
+static struct callchain_cpus_entries *callchain_cpus_entries;
+
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+static void release_callchain_buffers_rcu(struct rcu_head *head)
+{
+ struct callchain_cpus_entries *entries;
+ int cpu;
+
+ entries = container_of(head, struct callchain_cpus_entries, rcu_head);
+
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+
+ kfree(entries);
+}
+
+static void release_callchain_buffers(void)
+{
+ struct callchain_cpus_entries *entries;
+
+ entries = callchain_cpus_entries;
+ rcu_assign_pointer(callchain_cpus_entries, NULL);
+ call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
+}
+
+static int alloc_callchain_buffers(void)
+{
+ int cpu;
+ int size;
+ struct callchain_cpus_entries *entries;
+
+ /*
+ * We can't use the percpu allocation API for data that can be
+ * accessed from NMI. Use a temporary manual per cpu allocation
+ * until that gets sorted out.
+ */
+ size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
+
+ entries = kzalloc(size, GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+
+ for_each_possible_cpu(cpu) {
+ entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!entries->cpu_entries[cpu])
+ goto fail;
+ }
+
+ rcu_assign_pointer(callchain_cpus_entries, entries);
+
+ return 0;
+
+fail:
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+ kfree(entries);
+
+ return -ENOMEM;
+}
+
+int get_callchain_buffers(void)
+{
+ int err = 0;
+ int count;
+
+ mutex_lock(&callchain_mutex);
+
+ count = atomic_inc_return(&nr_callchain_events);
+ if (WARN_ON_ONCE(count < 1)) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (count > 1) {
+ /* If the allocation failed, give up */
+ if (!callchain_cpus_entries)
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ err = alloc_callchain_buffers();
+ if (err)
+ release_callchain_buffers();
+exit:
+ mutex_unlock(&callchain_mutex);
+
+ return err;
+}
+
+void put_callchain_buffers(void)
+{
+ if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
+ release_callchain_buffers();
+ mutex_unlock(&callchain_mutex);
+ }
+}
+
+static struct perf_callchain_entry *get_callchain_entry(int *rctx)
+{
+ int cpu;
+ struct callchain_cpus_entries *entries;
+
+ *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+ if (*rctx == -1)
+ return NULL;
+
+ entries = rcu_dereference(callchain_cpus_entries);
+ if (!entries)
+ return NULL;
+
+ cpu = smp_processor_id();
+
+ return &entries->cpu_entries[cpu][*rctx];
+}
+
+static void
+put_callchain_entry(int rctx)
+{
+ put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+}
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+ int rctx;
+ struct perf_callchain_entry *entry;
+
+
+ entry = get_callchain_entry(&rctx);
+ if (rctx == -1)
+ return NULL;
+
+ if (!entry)
+ goto exit_put;
+
+ entry->nr = 0;
+
+ if (!user_mode(regs)) {
+ perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+ perf_callchain_kernel(entry, regs);
+ if (current->mm)
+ regs = task_pt_regs(current);
+ else
+ regs = NULL;
+ }
+
+ if (regs) {
+ perf_callchain_store(entry, PERF_CONTEXT_USER);
+ perf_callchain_user(entry, regs);
+ }
+
+exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 58690af323e..24e3a4b1c89 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -128,7 +128,7 @@ enum event_type_t {
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
-struct jump_label_key perf_sched_events __read_mostly;
+struct jump_label_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static atomic_t nr_mmap_events __read_mostly;
@@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event,
if (!is_software_event(event))
cpuctx->active_oncpu--;
ctx->nr_active--;
+ if (event->attr.freq && event->attr.sample_freq)
+ ctx->nr_freq--;
if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
}
@@ -1325,6 +1327,7 @@ retry:
}
raw_spin_unlock_irq(&ctx->lock);
}
+EXPORT_SYMBOL_GPL(perf_event_disable);
static void perf_set_shadow_time(struct perf_event *event,
struct perf_event_context *ctx,
@@ -1406,6 +1409,8 @@ event_sched_in(struct perf_event *event,
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
+ if (event->attr.freq && event->attr.sample_freq)
+ ctx->nr_freq++;
if (event->attr.exclusive)
cpuctx->exclusive = 1;
@@ -1662,8 +1667,7 @@ retry:
* Note: this works for group members as well as group leaders
* since the non-leader members' sibling_lists will be empty.
*/
-static void __perf_event_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
+static void __perf_event_mark_enabled(struct perf_event *event)
{
struct perf_event *sub;
u64 tstamp = perf_event_time(event);
@@ -1701,7 +1705,7 @@ static int __perf_event_enable(void *info)
*/
perf_cgroup_set_timestamp(current, ctx);
- __perf_event_mark_enabled(event, ctx);
+ __perf_event_mark_enabled(event);
if (!event_filter_match(event)) {
if (is_cgroup_event(event))
@@ -1782,7 +1786,7 @@ void perf_event_enable(struct perf_event *event)
retry:
if (!ctx->is_active) {
- __perf_event_mark_enabled(event, ctx);
+ __perf_event_mark_enabled(event);
goto out;
}
@@ -1809,6 +1813,7 @@ retry:
out:
raw_spin_unlock_irq(&ctx->lock);
}
+EXPORT_SYMBOL_GPL(perf_event_enable);
int perf_event_refresh(struct perf_event *event, int refresh)
{
@@ -2327,6 +2332,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
u64 interrupts, now;
s64 delta;
+ if (!ctx->nr_freq)
+ return;
+
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
@@ -2382,12 +2390,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
struct perf_event_context *ctx = NULL;
- int rotate = 0, remove = 1;
+ int rotate = 0, remove = 1, freq = 0;
if (cpuctx->ctx.nr_events) {
remove = 0;
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
+ if (cpuctx->ctx.nr_freq)
+ freq = 1;
}
ctx = cpuctx->task_ctx;
@@ -2395,33 +2405,40 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
remove = 0;
if (ctx->nr_events != ctx->nr_active)
rotate = 1;
+ if (ctx->nr_freq)
+ freq = 1;
}
+ if (!rotate && !freq)
+ goto done;
+
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(cpuctx->ctx.pmu);
- perf_ctx_adjust_freq(&cpuctx->ctx, interval);
- if (ctx)
- perf_ctx_adjust_freq(ctx, interval);
- if (!rotate)
- goto done;
+ if (freq) {
+ perf_ctx_adjust_freq(&cpuctx->ctx, interval);
+ if (ctx)
+ perf_ctx_adjust_freq(ctx, interval);
+ }
- cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
- if (ctx)
- ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+ if (rotate) {
+ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+ if (ctx)
+ ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
- rotate_ctx(&cpuctx->ctx);
- if (ctx)
- rotate_ctx(ctx);
+ rotate_ctx(&cpuctx->ctx);
+ if (ctx)
+ rotate_ctx(ctx);
- perf_event_sched_in(cpuctx, ctx, current);
+ perf_event_sched_in(cpuctx, ctx, current);
+ }
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
done:
if (remove)
list_del_init(&cpuctx->rotation_list);
-
- perf_pmu_enable(cpuctx->ctx.pmu);
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
void perf_event_task_tick(void)
@@ -2448,7 +2465,7 @@ static int event_enable_on_exec(struct perf_event *event,
if (event->state >= PERF_EVENT_STATE_INACTIVE)
return 0;
- __perf_event_mark_enabled(event, ctx);
+ __perf_event_mark_enabled(event);
return 1;
}
@@ -2480,13 +2497,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
raw_spin_lock(&ctx->lock);
task_ctx_sched_out(ctx);
- list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
- ret = event_enable_on_exec(event, ctx);
- if (ret)
- enabled = 1;
- }
-
- list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ list_for_each_entry(event, &ctx->event_list, event_entry) {
ret = event_enable_on_exec(event, ctx);
if (ret)
enabled = 1;
@@ -2574,215 +2585,6 @@ static u64 perf_event_read(struct perf_event *event)
}
/*
- * Callchain support
- */
-
-struct callchain_cpus_entries {
- struct rcu_head rcu_head;
- struct perf_callchain_entry *cpu_entries[0];
-};
-
-static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
-static atomic_t nr_callchain_events;
-static DEFINE_MUTEX(callchain_mutex);
-struct callchain_cpus_entries *callchain_cpus_entries;
-
-
-__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
-}
-
-__weak void perf_callchain_user(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
-}
-
-static void release_callchain_buffers_rcu(struct rcu_head *head)
-{
- struct callchain_cpus_entries *entries;
- int cpu;
-
- entries = container_of(head, struct callchain_cpus_entries, rcu_head);
-
- for_each_possible_cpu(cpu)
- kfree(entries->cpu_entries[cpu]);
-
- kfree(entries);
-}
-
-static void release_callchain_buffers(void)
-{
- struct callchain_cpus_entries *entries;
-
- entries = callchain_cpus_entries;
- rcu_assign_pointer(callchain_cpus_entries, NULL);
- call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
-}
-
-static int alloc_callchain_buffers(void)
-{
- int cpu;
- int size;
- struct callchain_cpus_entries *entries;
-
- /*
- * We can't use the percpu allocation API for data that can be
- * accessed from NMI. Use a temporary manual per cpu allocation
- * until that gets sorted out.
- */
- size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
-
- entries = kzalloc(size, GFP_KERNEL);
- if (!entries)
- return -ENOMEM;
-
- size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
-
- for_each_possible_cpu(cpu) {
- entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
- cpu_to_node(cpu));
- if (!entries->cpu_entries[cpu])
- goto fail;
- }
-
- rcu_assign_pointer(callchain_cpus_entries, entries);
-
- return 0;
-
-fail:
- for_each_possible_cpu(cpu)
- kfree(entries->cpu_entries[cpu]);
- kfree(entries);
-
- return -ENOMEM;
-}
-
-static int get_callchain_buffers(void)
-{
- int err = 0;
- int count;
-
- mutex_lock(&callchain_mutex);
-
- count = atomic_inc_return(&nr_callchain_events);
- if (WARN_ON_ONCE(count < 1)) {
- err = -EINVAL;
- goto exit;
- }
-
- if (count > 1) {
- /* If the allocation failed, give up */
- if (!callchain_cpus_entries)
- err = -ENOMEM;
- goto exit;
- }
-
- err = alloc_callchain_buffers();
- if (err)
- release_callchain_buffers();
-exit:
- mutex_unlock(&callchain_mutex);
-
- return err;
-}
-
-static void put_callchain_buffers(void)
-{
- if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
- release_callchain_buffers();
- mutex_unlock(&callchain_mutex);
- }
-}
-
-static int get_recursion_context(int *recursion)
-{
- int rctx;
-
- if (in_nmi())
- rctx = 3;
- else if (in_irq())
- rctx = 2;
- else if (in_softirq())
- rctx = 1;
- else
- rctx = 0;
-
- if (recursion[rctx])
- return -1;
-
- recursion[rctx]++;
- barrier();
-
- return rctx;
-}
-
-static inline void put_recursion_context(int *recursion, int rctx)
-{
- barrier();
- recursion[rctx]--;
-}
-
-static struct perf_callchain_entry *get_callchain_entry(int *rctx)
-{
- int cpu;
- struct callchain_cpus_entries *entries;
-
- *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
- if (*rctx == -1)
- return NULL;
-
- entries = rcu_dereference(callchain_cpus_entries);
- if (!entries)
- return NULL;
-
- cpu = smp_processor_id();
-
- return &entries->cpu_entries[cpu][*rctx];
-}
-
-static void
-put_callchain_entry(int rctx)
-{
- put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
-}
-
-static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
- int rctx;
- struct perf_callchain_entry *entry;
-
-
- entry = get_callchain_entry(&rctx);
- if (rctx == -1)
- return NULL;
-
- if (!entry)
- goto exit_put;
-
- entry->nr = 0;
-
- if (!user_mode(regs)) {
- perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
- perf_callchain_kernel(entry, regs);
- if (current->mm)
- regs = task_pt_regs(current);
- else
- regs = NULL;
- }
-
- if (regs) {
- perf_callchain_store(entry, PERF_CONTEXT_USER);
- perf_callchain_user(entry, regs);
- }
-
-exit_put:
- put_callchain_entry(rctx);
-
- return entry;
-}
-
-/*
* Initialize the perf_event context in a task_struct:
*/
static void __perf_event_init_context(struct perf_event_context *ctx)
@@ -2946,7 +2748,7 @@ static void free_event(struct perf_event *event)
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_dec(&perf_sched_events);
+ jump_label_dec_deferred(&perf_sched_events);
if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
@@ -2957,7 +2759,7 @@ static void free_event(struct perf_event *event)
put_callchain_buffers();
if (is_cgroup_event(event)) {
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
- jump_label_dec(&perf_sched_events);
+ jump_label_dec_deferred(&perf_sched_events);
}
}
@@ -4820,7 +4622,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
struct hw_perf_event *hwc = &event->hw;
int throttle = 0;
- data->period = event->hw.last_period;
if (!overflow)
overflow = perf_swevent_set_period(event);
@@ -4854,6 +4655,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;
+ if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
+ data->period = nr;
+ return perf_swevent_overflow(event, 1, data, regs);
+ } else
+ data->period = event->hw.last_period;
+
if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
return perf_swevent_overflow(event, 1, data, regs);
@@ -5981,7 +5788,7 @@ done:
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_inc(&perf_sched_events);
+ jump_label_inc(&perf_sched_events.key);
if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
@@ -6219,7 +6026,7 @@ SYSCALL_DEFINE5(perf_event_open,
* - that may need work on context switch
*/
atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
- jump_label_inc(&perf_sched_events);
+ jump_label_inc(&perf_sched_events.key);
}
/*
@@ -7065,6 +6872,9 @@ void __init perf_event_init(void)
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
+
+ /* do not patch jump label more than once per second */
+ jump_label_rate_limit(&perf_sched_events, HZ);
}
static int __init perf_event_sysfs_init(void)
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 64568a69937..b0b107f90af 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -1,6 +1,10 @@
#ifndef _KERNEL_EVENTS_INTERNAL_H
#define _KERNEL_EVENTS_INTERNAL_H
+#include <linux/hardirq.h>
+
+/* Buffer handling */
+
#define RING_BUFFER_WRITABLE 0x01
struct ring_buffer {
@@ -67,7 +71,7 @@ static inline int page_order(struct ring_buffer *rb)
}
#endif
-static unsigned long perf_data_size(struct ring_buffer *rb)
+static inline unsigned long perf_data_size(struct ring_buffer *rb)
{
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
}
@@ -96,4 +100,37 @@ __output_copy(struct perf_output_handle *handle,
} while (len);
}
+/* Callchain handling */
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+extern int get_callchain_buffers(void);
+extern void put_callchain_buffers(void);
+
+static inline int get_recursion_context(int *recursion)
+{
+ int rctx;
+
+ if (in_nmi())
+ rctx = 3;
+ else if (in_irq())
+ rctx = 2;
+ else if (in_softirq())
+ rctx = 1;
+ else
+ rctx = 0;
+
+ if (recursion[rctx])
+ return -1;
+
+ recursion[rctx]++;
+ barrier();
+
+ return rctx;
+}
+
+static inline void put_recursion_context(int *recursion, int rctx)
+{
+ barrier();
+ recursion[rctx]--;
+}
+
#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 66ff7109f69..30c3c770813 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -72,15 +72,46 @@ void jump_label_inc(struct jump_label_key *key)
jump_label_unlock();
}
-void jump_label_dec(struct jump_label_key *key)
+static void __jump_label_dec(struct jump_label_key *key,
+ unsigned long rate_limit, struct delayed_work *work)
{
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
return;
- jump_label_update(key, JUMP_LABEL_DISABLE);
+ if (rate_limit) {
+ atomic_inc(&key->enabled);
+ schedule_delayed_work(work, rate_limit);
+ } else
+ jump_label_update(key, JUMP_LABEL_DISABLE);
+
jump_label_unlock();
}
+static void jump_label_update_timeout(struct work_struct *work)
+{
+ struct jump_label_key_deferred *key =
+ container_of(work, struct jump_label_key_deferred, work.work);
+ __jump_label_dec(&key->key, 0, NULL);
+}
+
+void jump_label_dec(struct jump_label_key *key)
+{
+ __jump_label_dec(key, 0, NULL);
+}
+
+void jump_label_dec_deferred(struct jump_label_key_deferred *key)
+{
+ __jump_label_dec(&key->key, key->timeout, &key->work);
+}
+
+
+void jump_label_rate_limit(struct jump_label_key_deferred *key,
+ unsigned long rl)
+{
+ key->timeout = rl;
+ INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
+}
+
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
{
if (entry->code <= (unsigned long)end &&
@@ -111,7 +142,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
* running code can override this to make the non-live update case
* cheaper.
*/
-void __weak arch_jump_label_transform_static(struct jump_entry *entry,
+void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
arch_jump_label_transform(entry, type);
@@ -217,8 +248,13 @@ void jump_label_apply_nops(struct module *mod)
if (iter_start == iter_stop)
return;
- for (iter = iter_start; iter < iter_stop; iter++)
- arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
+ for (iter = iter_start; iter < iter_stop; iter++) {
+ struct jump_label_key *iterk;
+
+ iterk = (struct jump_label_key *)(unsigned long)iter->key;
+ arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
+ JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
+ }
}
static int jump_label_add_module(struct module *mod)
@@ -258,8 +294,7 @@ static int jump_label_add_module(struct module *mod)
key->next = jlm;
if (jump_label_enabled(key))
- __jump_label_update(key, iter, iter_stop,
- JUMP_LABEL_ENABLE);
+ __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
}
return 0;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b2e08c932d9..24f176c9fc9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -499,36 +499,32 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
usage[i] = '\0';
}
-static int __print_lock_name(struct lock_class *class)
+static void __print_lock_name(struct lock_class *class)
{
char str[KSYM_NAME_LEN];
const char *name;
name = class->name;
- if (!name)
- name = __get_key_name(class->key, str);
-
- return printk("%s", name);
-}
-
-static void print_lock_name(struct lock_class *class)
-{
- char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
- const char *name;
-
- get_usage_chars(class, usage);
-
- name = class->name;
if (!name) {
name = __get_key_name(class->key, str);
- printk(" (%s", name);
+ printk("%s", name);
} else {
- printk(" (%s", name);
+ printk("%s", name);
if (class->name_version > 1)
printk("#%d", class->name_version);
if (class->subclass)
printk("/%d", class->subclass);
}
+}
+
+static void print_lock_name(struct lock_class *class)
+{
+ char usage[LOCK_USAGE_CHARS];
+
+ get_usage_chars(class, usage);
+
+ printk(" (");
+ __print_lock_name(class);
printk("){%s}", usage);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b1e8943fed1..683d559a0ee 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,11 +22,13 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
+#include <linux/bsearch.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <linux/ctype.h>
+#include <linux/sort.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
@@ -947,13 +949,6 @@ struct ftrace_func_probe {
struct rcu_head rcu;
};
-enum {
- FTRACE_ENABLE_CALLS = (1 << 0),
- FTRACE_DISABLE_CALLS = (1 << 1),
- FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
- FTRACE_START_FUNC_RET = (1 << 3),
- FTRACE_STOP_FUNC_RET = (1 << 4),
-};
struct ftrace_func_entry {
struct hlist_node hlist;
unsigned long ip;
@@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = {
.filter_hash = EMPTY_HASH,
};
-static struct dyn_ftrace *ftrace_new_addrs;
-
static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
struct ftrace_page *next;
+ struct dyn_ftrace *records;
int index;
- struct dyn_ftrace records[];
+ int size;
};
-#define ENTRIES_PER_PAGE \
- ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+static struct ftrace_page *ftrace_new_pgs;
+
+#define ENTRY_SIZE sizeof(struct dyn_ftrace)
+#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
/* estimate from running different kernels */
#define NR_TO_INIT 10000
@@ -1003,7 +999,10 @@ struct ftrace_page {
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
-static struct dyn_ftrace *ftrace_free_records;
+static bool ftrace_hash_empty(struct ftrace_hash *hash)
+{
+ return !hash || !hash->count;
+}
static struct ftrace_func_entry *
ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
@@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
struct hlist_head *hhd;
struct hlist_node *n;
- if (!hash->count)
+ if (ftrace_hash_empty(hash))
return NULL;
if (hash->size_bits > 0)
@@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
return NULL;
/* Empty hash? */
- if (!hash || !hash->count)
+ if (ftrace_hash_empty(hash))
return new_hash;
size = 1 << hash->size_bits;
@@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
filter_hash = rcu_dereference_raw(ops->filter_hash);
notrace_hash = rcu_dereference_raw(ops->notrace_hash);
- if ((!filter_hash || !filter_hash->count ||
+ if ((ftrace_hash_empty(filter_hash) ||
ftrace_lookup_ip(filter_hash, ip)) &&
- (!notrace_hash || !notrace_hash->count ||
+ (ftrace_hash_empty(notrace_hash) ||
!ftrace_lookup_ip(notrace_hash, ip)))
ret = 1;
else
@@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
} \
}
+
+static int ftrace_cmp_recs(const void *a, const void *b)
+{
+ const struct dyn_ftrace *reca = a;
+ const struct dyn_ftrace *recb = b;
+
+ if (reca->ip > recb->ip)
+ return 1;
+ if (reca->ip < recb->ip)
+ return -1;
+ return 0;
+}
+
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns 1 if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_location(unsigned long ip)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ struct dyn_ftrace key;
+
+ key.ip = ip;
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ rec = bsearch(&key, pg->records, pg->index,
+ sizeof(struct dyn_ftrace),
+ ftrace_cmp_recs);
+ if (rec)
+ return 1;
+ }
+
+ return 0;
+}
+
static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (filter_hash) {
hash = ops->filter_hash;
other_hash = ops->notrace_hash;
- if (!hash || !hash->count)
+ if (ftrace_hash_empty(hash))
all = 1;
} else {
inc = !inc;
@@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
* If the notrace hash has no items,
* then there's nothing to do.
*/
- if (hash && !hash->count)
+ if (ftrace_hash_empty(hash))
return;
}
@@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
match = 1;
} else {
- in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
- in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
+ in_hash = !!ftrace_lookup_ip(hash, rec->ip);
+ in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
/*
*
@@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (filter_hash && in_hash && !in_other_hash)
match = 1;
else if (!filter_hash && in_hash &&
- (in_other_hash || !other_hash->count))
+ (in_other_hash || ftrace_hash_empty(other_hash)))
match = 1;
}
if (!match)
@@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}
-static void ftrace_free_rec(struct dyn_ftrace *rec)
-{
- rec->freelist = ftrace_free_records;
- ftrace_free_records = rec;
- rec->flags |= FTRACE_FL_FREE;
-}
-
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
{
- struct dyn_ftrace *rec;
-
- /* First check for freed records */
- if (ftrace_free_records) {
- rec = ftrace_free_records;
-
- if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
- FTRACE_WARN_ON_ONCE(1);
- ftrace_free_records = NULL;
+ if (ftrace_pages->index == ftrace_pages->size) {
+ /* We should have allocated enough */
+ if (WARN_ON(!ftrace_pages->next))
return NULL;
- }
-
- ftrace_free_records = rec->freelist;
- memset(rec, 0, sizeof(*rec));
- return rec;
- }
-
- if (ftrace_pages->index == ENTRIES_PER_PAGE) {
- if (!ftrace_pages->next) {
- /* allocate another page */
- ftrace_pages->next =
- (void *)get_zeroed_page(GFP_KERNEL);
- if (!ftrace_pages->next)
- return NULL;
- }
ftrace_pages = ftrace_pages->next;
}
@@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip)
return NULL;
rec->ip = ip;
- rec->newlist = ftrace_new_addrs;
- ftrace_new_addrs = rec;
return rec;
}
@@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
-static void ftrace_bug(int failed, unsigned long ip)
+/**
+ * ftrace_bug - report and shutdown function tracer
+ * @failed: The failed type (EFAULT, EINVAL, EPERM)
+ * @ip: The address that failed
+ *
+ * The arch code that enables or disables the function tracing
+ * can call ftrace_bug() when it has detected a problem in
+ * modifying the code. @failed should be one of either:
+ * EFAULT - if the problem happens on reading the @ip address
+ * EINVAL - if what is read at @ip is not what was expected
+ * EPERM - if the problem happens on writting to the @ip address
+ */
+void ftrace_bug(int failed, unsigned long ip)
{
switch (failed) {
case -EFAULT:
@@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end)
return 0;
}
-
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
{
- unsigned long ftrace_addr;
unsigned long flag = 0UL;
- ftrace_addr = (unsigned long)FTRACE_ADDR;
-
/*
- * If we are enabling tracing:
+ * If we are updating calls:
*
* If the record has a ref count, then we need to enable it
* because someone is using it.
*
* Otherwise we make sure its disabled.
*
- * If we are disabling tracing, then disable all records that
+ * If we are disabling calls, then disable all records that
* are enabled.
*/
if (enable && (rec->flags & ~FTRACE_FL_MASK))
@@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
/* If the state of this record hasn't changed, then do nothing */
if ((rec->flags & FTRACE_FL_ENABLED) == flag)
- return 0;
+ return FTRACE_UPDATE_IGNORE;
if (flag) {
- rec->flags |= FTRACE_FL_ENABLED;
+ if (update)
+ rec->flags |= FTRACE_FL_ENABLED;
+ return FTRACE_UPDATE_MAKE_CALL;
+ }
+
+ if (update)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+
+ return FTRACE_UPDATE_MAKE_NOP;
+}
+
+/**
+ * ftrace_update_record, set a record that now is tracing or not
+ * @rec: the record to update
+ * @enable: set to 1 if the record is tracing, zero to force disable
+ *
+ * The records that represent all functions that can be traced need
+ * to be updated when tracing has been enabled.
+ */
+int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+{
+ return ftrace_check_record(rec, enable, 1);
+}
+
+/**
+ * ftrace_test_record, check if the record has been enabled or not
+ * @rec: the record to test
+ * @enable: set to 1 to check if enabled, 0 if it is disabled
+ *
+ * The arch code may need to test if a record is already set to
+ * tracing to determine how to modify the function code that it
+ * represents.
+ */
+int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+{
+ return ftrace_check_record(rec, enable, 0);
+}
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ ret = ftrace_update_record(rec, enable);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
return ftrace_make_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
}
- rec->flags &= ~FTRACE_FL_ENABLED;
- return ftrace_make_nop(NULL, rec, ftrace_addr);
+ return -1; /* unknow ftrace bug */
}
-static void ftrace_replace_code(int enable)
+static void ftrace_replace_code(int update)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
@@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable)
return;
do_for_each_ftrace_rec(pg, rec) {
- /* Skip over free records */
- if (rec->flags & FTRACE_FL_FREE)
- continue;
-
- failed = __ftrace_replace_code(rec, enable);
+ failed = __ftrace_replace_code(rec, update);
if (failed) {
ftrace_bug(failed, rec->ip);
/* Stop processing */
@@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable)
} while_for_each_ftrace_rec();
}
+struct ftrace_rec_iter {
+ struct ftrace_page *pg;
+ int index;
+};
+
+/**
+ * ftrace_rec_iter_start, start up iterating over traced functions
+ *
+ * Returns an iterator handle that is used to iterate over all
+ * the records that represent address locations where functions
+ * are traced.
+ *
+ * May return NULL if no records are available.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_start(void)
+{
+ /*
+ * We only use a single iterator.
+ * Protected by the ftrace_lock mutex.
+ */
+ static struct ftrace_rec_iter ftrace_rec_iter;
+ struct ftrace_rec_iter *iter = &ftrace_rec_iter;
+
+ iter->pg = ftrace_pages_start;
+ iter->index = 0;
+
+ /* Could have empty pages */
+ while (iter->pg && !iter->pg->index)
+ iter->pg = iter->pg->next;
+
+ if (!iter->pg)
+ return NULL;
+
+ return iter;
+}
+
+/**
+ * ftrace_rec_iter_next, get the next record to process.
+ * @iter: The handle to the iterator.
+ *
+ * Returns the next iterator after the given iterator @iter.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
+{
+ iter->index++;
+
+ if (iter->index >= iter->pg->index) {
+ iter->pg = iter->pg->next;
+ iter->index = 0;
+
+ /* Could have empty pages */
+ while (iter->pg && !iter->pg->index)
+ iter->pg = iter->pg->next;
+ }
+
+ if (!iter->pg)
+ return NULL;
+
+ return iter;
+}
+
+/**
+ * ftrace_rec_iter_record, get the record at the iterator location
+ * @iter: The current iterator location
+ *
+ * Returns the record that the current @iter is at.
+ */
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
+{
+ return &iter->pg->records[iter->index];
+}
+
static int
ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
@@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
- /*
- * Do not call function tracer while we update the code.
- * We are in stop machine, no worrying about races.
- */
- function_trace_stop++;
-
- if (*command & FTRACE_ENABLE_CALLS)
+ if (*command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
else if (*command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
@@ -1636,21 +1769,33 @@ static int __ftrace_modify_code(void *data)
else if (*command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
-#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
- /*
- * For archs that call ftrace_test_stop_func(), we must
- * wait till after we update all the function callers
- * before we update the callback. This keeps different
- * ops that record different functions from corrupting
- * each other.
- */
- __ftrace_trace_function = __ftrace_trace_function_delay;
-#endif
- function_trace_stop--;
-
return 0;
}
+/**
+ * ftrace_run_stop_machine, go back to the stop machine method
+ * @command: The command to tell ftrace what to do
+ *
+ * If an arch needs to fall back to the stop machine method, the
+ * it can call this function.
+ */
+void ftrace_run_stop_machine(int command)
+{
+ stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+/**
+ * arch_ftrace_update_code, modify the code to trace or not trace
+ * @command: The command that needs to be done
+ *
+ * Archs can override this function if it does not need to
+ * run stop_machine() to modify code.
+ */
+void __weak arch_ftrace_update_code(int command)
+{
+ ftrace_run_stop_machine(command);
+}
+
static void ftrace_run_update_code(int command)
{
int ret;
@@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command)
FTRACE_WARN_ON(ret);
if (ret)
return;
+ /*
+ * Do not call function tracer while we update the code.
+ * We are in stop machine.
+ */
+ function_trace_stop++;
- stop_machine(__ftrace_modify_code, &command, NULL);
+ /*
+ * By default we use stop_machine() to modify the code.
+ * But archs can do what ever they want as long as it
+ * is safe. The stop_machine() is the safest, but also
+ * produces the most overhead.
+ */
+ arch_ftrace_update_code(command);
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ /*
+ * For archs that call ftrace_test_stop_func(), we must
+ * wait till after we update all the function callers
+ * before we update the callback. This keeps different
+ * ops that record different functions from corrupting
+ * each other.
+ */
+ __ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+ function_trace_stop--;
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
@@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return -ENODEV;
ftrace_start_up++;
- command |= FTRACE_ENABLE_CALLS;
+ command |= FTRACE_UPDATE_CALLS;
/* ops marked global share the filter hashes */
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
@@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
if (ops != &global_ops || !global_start_up)
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
- if (!ftrace_start_up)
- command |= FTRACE_DISABLE_CALLS;
+ command |= FTRACE_UPDATE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
@@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void)
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
if (ftrace_start_up)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
}
static void ftrace_shutdown_sysctl(void)
@@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops)
struct ftrace_hash *hash;
hash = ops->filter_hash;
- return !!(!hash || !hash->count);
+ return ftrace_hash_empty(hash);
}
static int ftrace_update_code(struct module *mod)
{
+ struct ftrace_page *pg;
struct dyn_ftrace *p;
cycle_t start, stop;
unsigned long ref = 0;
+ int i;
/*
* When adding a module, we need to check if tracers are
@@ -1817,46 +1986,44 @@ static int ftrace_update_code(struct module *mod)
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
- while (ftrace_new_addrs) {
+ for (pg = ftrace_new_pgs; pg; pg = pg->next) {
- /* If something went wrong, bail without enabling anything */
- if (unlikely(ftrace_disabled))
- return -1;
+ for (i = 0; i < pg->index; i++) {
+ /* If something went wrong, bail without enabling anything */
+ if (unlikely(ftrace_disabled))
+ return -1;
- p = ftrace_new_addrs;
- ftrace_new_addrs = p->newlist;
- p->flags = ref;
+ p = &pg->records[i];
+ p->flags = ref;
- /*
- * Do the initial record conversion from mcount jump
- * to the NOP instructions.
- */
- if (!ftrace_code_disable(mod, p)) {
- ftrace_free_rec(p);
- /* Game over */
- break;
- }
+ /*
+ * Do the initial record conversion from mcount jump
+ * to the NOP instructions.
+ */
+ if (!ftrace_code_disable(mod, p))
+ break;
- ftrace_update_cnt++;
+ ftrace_update_cnt++;
- /*
- * If the tracing is enabled, go ahead and enable the record.
- *
- * The reason not to enable the record immediatelly is the
- * inherent check of ftrace_make_nop/ftrace_make_call for
- * correct previous instructions. Making first the NOP
- * conversion puts the module to the correct state, thus
- * passing the ftrace_make_call check.
- */
- if (ftrace_start_up && ref) {
- int failed = __ftrace_replace_code(p, 1);
- if (failed) {
- ftrace_bug(failed, p->ip);
- ftrace_free_rec(p);
+ /*
+ * If the tracing is enabled, go ahead and enable the record.
+ *
+ * The reason not to enable the record immediatelly is the
+ * inherent check of ftrace_make_nop/ftrace_make_call for
+ * correct previous instructions. Making first the NOP
+ * conversion puts the module to the correct state, thus
+ * passing the ftrace_make_call check.
+ */
+ if (ftrace_start_up && ref) {
+ int failed = __ftrace_replace_code(p, 1);
+ if (failed)
+ ftrace_bug(failed, p->ip);
}
}
}
+ ftrace_new_pgs = NULL;
+
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod)
return 0;
}
-static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+static int ftrace_allocate_records(struct ftrace_page *pg, int count)
{
- struct ftrace_page *pg;
+ int order;
int cnt;
- int i;
- /* allocate a few pages */
- ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
- if (!ftrace_pages_start)
- return -1;
+ if (WARN_ON(!count))
+ return -EINVAL;
+
+ order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
/*
- * Allocate a few more pages.
- *
- * TODO: have some parser search vmlinux before
- * final linking to find all calls to ftrace.
- * Then we can:
- * a) know how many pages to allocate.
- * and/or
- * b) set up the table then.
- *
- * The dynamic code is still necessary for
- * modules.
+ * We want to fill as much as possible. No more than a page
+ * may be empty.
*/
+ while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
+ order--;
- pg = ftrace_pages = ftrace_pages_start;
+ again:
+ pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
- cnt = num_to_init / ENTRIES_PER_PAGE;
- pr_info("ftrace: allocating %ld entries in %d pages\n",
- num_to_init, cnt + 1);
+ if (!pg->records) {
+ /* if we can't allocate this size, try something smaller */
+ if (!order)
+ return -ENOMEM;
+ order >>= 1;
+ goto again;
+ }
- for (i = 0; i < cnt; i++) {
- pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+ cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
+ pg->size = cnt;
- /* If we fail, we'll try later anyway */
- if (!pg->next)
+ if (cnt > count)
+ cnt = count;
+
+ return cnt;
+}
+
+static struct ftrace_page *
+ftrace_allocate_pages(unsigned long num_to_init)
+{
+ struct ftrace_page *start_pg;
+ struct ftrace_page *pg;
+ int order;
+ int cnt;
+
+ if (!num_to_init)
+ return 0;
+
+ start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
+ if (!pg)
+ return NULL;
+
+ /*
+ * Try to allocate as much as possible in one continues
+ * location that fills in all of the space. We want to
+ * waste as little space as possible.
+ */
+ for (;;) {
+ cnt = ftrace_allocate_records(pg, num_to_init);
+ if (cnt < 0)
+ goto free_pages;
+
+ num_to_init -= cnt;
+ if (!num_to_init)
break;
+ pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
+ if (!pg->next)
+ goto free_pages;
+
pg = pg->next;
}
- return 0;
+ return start_pg;
+
+ free_pages:
+ while (start_pg) {
+ order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+ free_pages((unsigned long)pg->records, order);
+ start_pg = pg->next;
+ kfree(pg);
+ pg = start_pg;
+ }
+ pr_info("ftrace: FAILED to allocate memory for functions\n");
+ return NULL;
}
-enum {
- FTRACE_ITER_FILTER = (1 << 0),
- FTRACE_ITER_NOTRACE = (1 << 1),
- FTRACE_ITER_PRINTALL = (1 << 2),
- FTRACE_ITER_HASH = (1 << 3),
- FTRACE_ITER_ENABLED = (1 << 4),
-};
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+{
+ int cnt;
+
+ if (!num_to_init) {
+ pr_info("ftrace: No functions to be traced?\n");
+ return -1;
+ }
+
+ cnt = num_to_init / ENTRIES_PER_PAGE;
+ pr_info("ftrace: allocating %ld entries in %d pages\n",
+ num_to_init, cnt + 1);
+
+ return 0;
+}
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
@@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l;
+ if (!(iter->flags & FTRACE_ITER_DO_HASH))
+ return NULL;
+
if (iter->func_pos > *pos)
return NULL;
@@ -2023,7 +2244,7 @@ static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
- struct ftrace_ops *ops = &global_ops;
+ struct ftrace_ops *ops = iter->ops;
struct dyn_ftrace *rec = NULL;
if (unlikely(ftrace_disabled))
@@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
}
} else {
rec = &iter->pg->records[iter->idx++];
- if ((rec->flags & FTRACE_FL_FREE) ||
-
- ((iter->flags & FTRACE_ITER_FILTER) &&
+ if (((iter->flags & FTRACE_ITER_FILTER) &&
!(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
static void *t_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
- struct ftrace_ops *ops = &global_ops;
+ struct ftrace_ops *ops = iter->ops;
void *p = NULL;
loff_t l;
@@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
* off, we can short cut and just print out that all
* functions are enabled.
*/
- if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
+ if (iter->flags & FTRACE_ITER_FILTER &&
+ ftrace_hash_empty(ops->filter_hash)) {
if (*pos > 0)
return t_hash_start(m, pos);
iter->flags |= FTRACE_ITER_PRINTALL;
@@ -2126,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
break;
}
- if (!p) {
- if (iter->flags & FTRACE_ITER_FILTER)
- return t_hash_start(m, pos);
-
- return NULL;
- }
+ if (!p)
+ return t_hash_start(m, pos);
return iter;
}
@@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
+ iter->ops = &global_ops;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
iter->pg = ftrace_pages_start;
iter->flags = FTRACE_ITER_ENABLED;
+ iter->ops = &global_ops;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
mutex_unlock(&ftrace_lock);
}
-static int
+/**
+ * ftrace_regex_open - initialize function tracer filter files
+ * @ops: The ftrace_ops that hold the hash filters
+ * @flag: The type of filter to process
+ * @inode: The inode, usually passed in to your open routine
+ * @file: The file, usually passed in to your open routine
+ *
+ * ftrace_regex_open() initializes the filter files for the
+ * @ops. Depending on @flag it may process the filter hash or
+ * the notrace hash of @ops. With this called from the open
+ * routine, you can use ftrace_filter_write() for the write
+ * routine if @flag has FTRACE_ITER_FILTER set, or
+ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
+ * ftrace_regex_lseek() should be used as the lseek routine, and
+ * release must call ftrace_regex_release().
+ */
+int
ftrace_regex_open(struct ftrace_ops *ops, int flag,
struct inode *inode, struct file *file)
{
@@ -2306,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
static int
ftrace_filter_open(struct inode *inode, struct file *file)
{
- return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
- inode, file);
+ return ftrace_regex_open(&global_ops,
+ FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
+ inode, file);
}
static int
@@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
inode, file);
}
-static loff_t
+loff_t
ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
{
loff_t ret;
@@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff,
goto out_unlock;
do_for_each_ftrace_rec(pg, rec) {
-
if (ftrace_match_record(rec, mod, search, search_len, type)) {
ret = enter_record(hash, rec, not);
if (ret < 0) {
@@ -2871,14 +3105,14 @@ out_unlock:
return ret;
}
-static ssize_t
+ssize_t
ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
}
-static ssize_t
+ssize_t
ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
@@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
&& ftrace_enabled)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
mutex_unlock(&ftrace_lock);
@@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-static void __init
-set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
{
char *func;
@@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
static void __init set_ftrace_early_filters(void)
{
if (ftrace_filter_buf[0])
- set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
+ ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
if (ftrace_notrace_buf[0])
- set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
+ ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (ftrace_graph_buf[0])
set_ftrace_early_graph(ftrace_graph_buf);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
}
-static int
-ftrace_regex_release(struct inode *inode, struct file *file)
+int ftrace_regex_release(struct inode *inode, struct file *file)
{
struct seq_file *m = (struct seq_file *)file->private_data;
struct ftrace_iterator *iter;
@@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash, iter->hash);
if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
&& ftrace_enabled)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
mutex_unlock(&ftrace_lock);
}
@@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
do_for_each_ftrace_rec(pg, rec) {
- if (rec->flags & FTRACE_FL_FREE)
- continue;
-
if (ftrace_match_record(rec, NULL, search, search_len, type)) {
/* if it is in the array */
exists = false;
@@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
return 0;
}
+static void ftrace_swap_recs(void *a, void *b, int size)
+{
+ struct dyn_ftrace *reca = a;
+ struct dyn_ftrace *recb = b;
+ struct dyn_ftrace t;
+
+ t = *reca;
+ *reca = *recb;
+ *recb = t;
+}
+
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
+ struct ftrace_page *pg;
+ unsigned long count;
unsigned long *p;
unsigned long addr;
unsigned long flags = 0; /* Shut up gcc */
+ int ret = -ENOMEM;
+
+ count = end - start;
+
+ if (!count)
+ return 0;
+
+ pg = ftrace_allocate_pages(count);
+ if (!pg)
+ return -ENOMEM;
mutex_lock(&ftrace_lock);
+
+ /*
+ * Core and each module needs their own pages, as
+ * modules will free them when they are removed.
+ * Force a new page to be allocated for modules.
+ */
+ if (!mod) {
+ WARN_ON(ftrace_pages || ftrace_pages_start);
+ /* First initialization */
+ ftrace_pages = ftrace_pages_start = pg;
+ } else {
+ if (!ftrace_pages)
+ goto out;
+
+ if (WARN_ON(ftrace_pages->next)) {
+ /* Hmm, we have free pages? */
+ while (ftrace_pages->next)
+ ftrace_pages = ftrace_pages->next;
+ }
+
+ ftrace_pages->next = pg;
+ ftrace_pages = pg;
+ }
+
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
@@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod,
*/
if (!addr)
continue;
- ftrace_record_ip(addr);
+ if (!ftrace_record_ip(addr))
+ break;
}
+ /* These new locations need to be initialized */
+ ftrace_new_pgs = pg;
+
+ /* Make each individual set of pages sorted by ips */
+ for (; pg; pg = pg->next)
+ sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
+ ftrace_cmp_recs, ftrace_swap_recs);
+
/*
* We only need to disable interrupts on start up
* because we are modifying code that an interrupt
@@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod,
ftrace_update_code(mod);
if (!mod)
local_irq_restore(flags);
+ ret = 0;
+ out:
mutex_unlock(&ftrace_lock);
- return 0;
+ return ret;
}
#ifdef CONFIG_MODULES
+
+#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
+
void ftrace_release_mod(struct module *mod)
{
struct dyn_ftrace *rec;
+ struct ftrace_page **last_pg;
struct ftrace_page *pg;
+ int order;
mutex_lock(&ftrace_lock);
if (ftrace_disabled)
goto out_unlock;
- do_for_each_ftrace_rec(pg, rec) {
+ /*
+ * Each module has its own ftrace_pages, remove
+ * them from the list.
+ */
+ last_pg = &ftrace_pages_start;
+ for (pg = ftrace_pages_start; pg; pg = *last_pg) {
+ rec = &pg->records[0];
if (within_module_core(rec->ip, mod)) {
/*
- * rec->ip is changed in ftrace_free_rec()
- * It should not between s and e if record was freed.
+ * As core pages are first, the first
+ * page should never be a module page.
*/
- FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
- ftrace_free_rec(rec);
- }
- } while_for_each_ftrace_rec();
+ if (WARN_ON(pg == ftrace_pages_start))
+ goto out_unlock;
+
+ /* Check if we are deleting the last page */
+ if (pg == ftrace_pages)
+ ftrace_pages = next_to_ftrace_page(last_pg);
+
+ *last_pg = pg->next;
+ order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+ free_pages((unsigned long)pg->records, order);
+ kfree(pg);
+ } else
+ last_pg = &pg->next;
+ }
out_unlock:
mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f2bd275bb60..7392070ffc3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -338,7 +338,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
- TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
+ TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
+ TRACE_ITER_IRQ_INFO;
static int trace_stop_count;
static DEFINE_RAW_SPINLOCK(tracing_start_lock);
@@ -426,6 +427,7 @@ static const char *trace_options[] = {
"record-cmd",
"overwrite",
"disable_on_free",
+ "irq-info",
NULL
};
@@ -1843,6 +1845,33 @@ static void s_stop(struct seq_file *m, void *p)
trace_event_read_unlock();
}
+static void
+get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
+{
+ unsigned long count;
+ int cpu;
+
+ *total = 0;
+ *entries = 0;
+
+ for_each_tracing_cpu(cpu) {
+ count = ring_buffer_entries_cpu(tr->buffer, cpu);
+ /*
+ * If this buffer has skipped entries, then we hold all
+ * entries for the trace and we need to ignore the
+ * ones before the time stamp.
+ */
+ if (tr->data[cpu]->skipped_entries) {
+ count -= tr->data[cpu]->skipped_entries;
+ /* total is the same as the entries */
+ *total += count;
+ } else
+ *total += count +
+ ring_buffer_overrun_cpu(tr->buffer, cpu);
+ *entries += count;
+ }
+}
+
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
@@ -1855,12 +1884,35 @@ static void print_lat_help_header(struct seq_file *m)
seq_puts(m, "# \\ / ||||| \\ | / \n");
}
-static void print_func_help_header(struct seq_file *m)
+static void print_event_info(struct trace_array *tr, struct seq_file *m)
+{
+ unsigned long total;
+ unsigned long entries;
+
+ get_total_entries(tr, &total, &entries);
+ seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
+ entries, total, num_online_cpus());
+ seq_puts(m, "#\n");
+}
+
+static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
{
- seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
+ print_event_info(tr, m);
+ seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
seq_puts(m, "# | | | | |\n");
}
+static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
+{
+ print_event_info(tr, m);
+ seq_puts(m, "# _-----=> irqs-off\n");
+ seq_puts(m, "# / _----=> need-resched\n");
+ seq_puts(m, "# | / _---=> hardirq/softirq\n");
+ seq_puts(m, "# || / _--=> preempt-depth\n");
+ seq_puts(m, "# ||| / delay\n");
+ seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | |||| | |\n");
+}
void
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
@@ -1869,32 +1921,14 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
struct trace_array *tr = iter->tr;
struct trace_array_cpu *data = tr->data[tr->cpu];
struct tracer *type = current_trace;
- unsigned long entries = 0;
- unsigned long total = 0;
- unsigned long count;
+ unsigned long entries;
+ unsigned long total;
const char *name = "preemption";
- int cpu;
if (type)
name = type->name;
-
- for_each_tracing_cpu(cpu) {
- count = ring_buffer_entries_cpu(tr->buffer, cpu);
- /*
- * If this buffer has skipped entries, then we hold all
- * entries for the trace and we need to ignore the
- * ones before the time stamp.
- */
- if (tr->data[cpu]->skipped_entries) {
- count -= tr->data[cpu]->skipped_entries;
- /* total is the same as the entries */
- total += count;
- } else
- total += count +
- ring_buffer_overrun_cpu(tr->buffer, cpu);
- entries += count;
- }
+ get_total_entries(tr, &total, &entries);
seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
name, UTS_RELEASE);
@@ -2140,6 +2174,21 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
return print_trace_fmt(iter);
}
+void trace_latency_header(struct seq_file *m)
+{
+ struct trace_iterator *iter = m->private;
+
+ /* print nothing if the buffers are empty */
+ if (trace_empty(iter))
+ return;
+
+ if (iter->iter_flags & TRACE_FILE_LAT_FMT)
+ print_trace_header(m, iter);
+
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_lat_help_header(m);
+}
+
void trace_default_header(struct seq_file *m)
{
struct trace_iterator *iter = m->private;
@@ -2155,8 +2204,12 @@ void trace_default_header(struct seq_file *m)
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_lat_help_header(m);
} else {
- if (!(trace_flags & TRACE_ITER_VERBOSE))
- print_func_help_header(m);
+ if (!(trace_flags & TRACE_ITER_VERBOSE)) {
+ if (trace_flags & TRACE_ITER_IRQ_INFO)
+ print_func_help_header_irq(iter->tr, m);
+ else
+ print_func_help_header(iter->tr, m);
+ }
}
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 092e1f8d18d..2c2657462ac 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -370,6 +370,7 @@ void trace_graph_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
+void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -654,6 +655,7 @@ enum trace_iterator_flags {
TRACE_ITER_RECORD_CMD = 0x100000,
TRACE_ITER_OVERWRITE = 0x200000,
TRACE_ITER_STOP_ON_FREE = 0x400000,
+ TRACE_ITER_IRQ_INFO = 0x800000,
};
/*
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 95dc31efd6d..24aee712745 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,6 +27,12 @@
#include "trace.h"
#include "trace_output.h"
+#define DEFAULT_SYS_FILTER_MESSAGE \
+ "### global filter ###\n" \
+ "# Use this to set filters for multiple events.\n" \
+ "# Only events with the given fields will be affected.\n" \
+ "# If no events are modified, an error message will be displayed here"
+
enum filter_op_ids
{
OP_OR,
@@ -646,7 +652,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
- trace_seq_printf(s, "none\n");
+ trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
mutex_unlock(&event_mutex);
}
@@ -1732,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
return -ENOMEM;
}
+static int create_filter_start(char *filter_str, bool set_str,
+ struct filter_parse_state **psp,
+ struct event_filter **filterp)
+{
+ struct event_filter *filter;
+ struct filter_parse_state *ps = NULL;
+ int err = 0;
+
+ WARN_ON_ONCE(*psp || *filterp);
+
+ /* allocate everything, and if any fails, free all and fail */
+ filter = __alloc_filter();
+ if (filter && set_str)
+ err = replace_filter_string(filter, filter_str);
+
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+
+ if (!filter || !ps || err) {
+ kfree(ps);
+ __free_filter(filter);
+ return -ENOMEM;
+ }
+
+ /* we're committed to creating a new filter */
+ *filterp = filter;
+ *psp = ps;
+
+ parse_init(ps, filter_ops, filter_str);
+ err = filter_parse(ps);
+ if (err && set_str)
+ append_filter_err(ps, filter);
+ return err;
+}
+
+static void create_filter_finish(struct filter_parse_state *ps)
+{
+ if (ps) {
+ filter_opstack_clear(ps);
+ postfix_clear(ps);
+ kfree(ps);
+ }
+}
+
+/**
+ * create_filter - create a filter for a ftrace_event_call
+ * @call: ftrace_event_call to create a filter for
+ * @filter_str: filter string
+ * @set_str: remember @filter_str and enable detailed error in filter
+ * @filterp: out param for created filter (always updated on return)
+ *
+ * Creates a filter for @call with @filter_str. If @set_str is %true,
+ * @filter_str is copied and recorded in the new filter.
+ *
+ * On success, returns 0 and *@filterp points to the new filter. On
+ * failure, returns -errno and *@filterp may point to %NULL or to a new
+ * filter. In the latter case, the returned filter contains error
+ * information if @set_str is %true and the caller is responsible for
+ * freeing it.
+ */
+static int create_filter(struct ftrace_event_call *call,
+ char *filter_str, bool set_str,
+ struct event_filter **filterp)
+{
+ struct event_filter *filter = NULL;
+ struct filter_parse_state *ps = NULL;
+ int err;
+
+ err = create_filter_start(filter_str, set_str, &ps, &filter);
+ if (!err) {
+ err = replace_preds(call, filter, ps, filter_str, false);
+ if (err && set_str)
+ append_filter_err(ps, filter);
+ }
+ create_filter_finish(ps);
+
+ *filterp = filter;
+ return err;
+}
+
+/**
+ * create_system_filter - create a filter for an event_subsystem
+ * @system: event_subsystem to create a filter for
+ * @filter_str: filter string
+ * @filterp: out param for created filter (always updated on return)
+ *
+ * Identical to create_filter() except that it creates a subsystem filter
+ * and always remembers @filter_str.
+ */
+static int create_system_filter(struct event_subsystem *system,
+ char *filter_str, struct event_filter **filterp)
+{
+ struct event_filter *filter = NULL;
+ struct filter_parse_state *ps = NULL;
+ int err;
+
+ err = create_filter_start(filter_str, true, &ps, &filter);
+ if (!err) {
+ err = replace_system_preds(system, ps, filter_str);
+ if (!err) {
+ /* System filters just show a default message */
+ kfree(filter->filter_string);
+ filter->filter_string = NULL;
+ } else {
+ append_filter_err(ps, filter);
+ }
+ }
+ create_filter_finish(ps);
+
+ *filterp = filter;
+ return err;
+}
+
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
- struct filter_parse_state *ps;
struct event_filter *filter;
- struct event_filter *tmp;
int err = 0;
mutex_lock(&event_mutex);
@@ -1753,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
goto out_unlock;
}
- err = -ENOMEM;
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto out_unlock;
-
- filter = __alloc_filter();
- if (!filter) {
- kfree(ps);
- goto out_unlock;
- }
-
- replace_filter_string(filter, filter_string);
-
- parse_init(ps, filter_ops, filter_string);
- err = filter_parse(ps);
- if (err) {
- append_filter_err(ps, filter);
- goto out;
- }
+ err = create_filter(call, filter_string, true, &filter);
- err = replace_preds(call, filter, ps, filter_string, false);
- if (err) {
- filter_disable(call);
- append_filter_err(ps, filter);
- } else
- call->flags |= TRACE_EVENT_FL_FILTERED;
-out:
/*
* Always swap the call filter with the new filter
* even if there was an error. If there was an error
* in the filter, we disable the filter and show the error
* string
*/
- tmp = call->filter;
- rcu_assign_pointer(call->filter, filter);
- if (tmp) {
- /* Make sure the call is done with the filter */
- synchronize_sched();
- __free_filter(tmp);
+ if (filter) {
+ struct event_filter *tmp = call->filter;
+
+ if (!err)
+ call->flags |= TRACE_EVENT_FL_FILTERED;
+ else
+ filter_disable(call);
+
+ rcu_assign_pointer(call->filter, filter);
+
+ if (tmp) {
+ /* Make sure the call is done with the filter */
+ synchronize_sched();
+ __free_filter(tmp);
+ }
}
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
out_unlock:
mutex_unlock(&event_mutex);
@@ -1805,7 +1902,6 @@ out_unlock:
int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string)
{
- struct filter_parse_state *ps;
struct event_filter *filter;
int err = 0;
@@ -1829,38 +1925,15 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
goto out_unlock;
}
- err = -ENOMEM;
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto out_unlock;
-
- filter = __alloc_filter();
- if (!filter)
- goto out;
-
- replace_filter_string(filter, filter_string);
- /*
- * No event actually uses the system filter
- * we can free it without synchronize_sched().
- */
- __free_filter(system->filter);
- system->filter = filter;
-
- parse_init(ps, filter_ops, filter_string);
- err = filter_parse(ps);
- if (err) {
- append_filter_err(ps, system->filter);
- goto out;
+ err = create_system_filter(system, filter_string, &filter);
+ if (filter) {
+ /*
+ * No event actually uses the system filter
+ * we can free it without synchronize_sched().
+ */
+ __free_filter(system->filter);
+ system->filter = filter;
}
-
- err = replace_system_preds(system, ps, filter_string);
- if (err)
- append_filter_err(ps, system->filter);
-
-out:
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
out_unlock:
mutex_unlock(&event_mutex);
@@ -1882,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
{
int err;
struct event_filter *filter;
- struct filter_parse_state *ps;
struct ftrace_event_call *call;
mutex_lock(&event_mutex);
@@ -1897,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
if (event->filter)
goto out_unlock;
- filter = __alloc_filter();
- if (!filter) {
- err = PTR_ERR(filter);
- goto out_unlock;
- }
-
- err = -ENOMEM;
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto free_filter;
-
- parse_init(ps, filter_ops, filter_str);
- err = filter_parse(ps);
- if (err)
- goto free_ps;
-
- err = replace_preds(call, filter, ps, filter_str, false);
+ err = create_filter(call, filter_str, false, &filter);
if (!err)
event->filter = filter;
-
-free_ps:
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
-
-free_filter:
- if (err)
+ else
__free_filter(filter);
out_unlock:
@@ -1942,43 +1991,6 @@ out_unlock:
#define CREATE_TRACE_POINTS
#include "trace_events_filter_test.h"
-static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
- struct event_filter **pfilter)
-{
- struct event_filter *filter;
- struct filter_parse_state *ps;
- int err = -ENOMEM;
-
- filter = __alloc_filter();
- if (!filter)
- goto out;
-
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto free_filter;
-
- parse_init(ps, filter_ops, filter_str);
- err = filter_parse(ps);
- if (err)
- goto free_ps;
-
- err = replace_preds(call, filter, ps, filter_str, false);
- if (!err)
- *pfilter = filter;
-
- free_ps:
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
-
- free_filter:
- if (err)
- __free_filter(filter);
-
- out:
- return err;
-}
-
#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
{ \
.filter = FILTER, \
@@ -2097,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
struct test_filter_data_t *d = &test_filter_data[i];
int err;
- err = test_get_filter(d->filter, &event_ftrace_test_filter,
- &filter);
+ err = create_filter(&event_ftrace_test_filter, d->filter,
+ false, &filter);
if (err) {
printk(KERN_INFO
"Failed to get filter for '%s', err %d\n",
d->filter, err);
+ __free_filter(filter);
break;
}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 20dad0d7a16..99d20e92036 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -280,9 +280,20 @@ static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
}
static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
-static void irqsoff_print_header(struct seq_file *s) { }
static void irqsoff_trace_open(struct trace_iterator *iter) { }
static void irqsoff_trace_close(struct trace_iterator *iter) { }
+
+#ifdef CONFIG_FUNCTION_TRACER
+static void irqsoff_print_header(struct seq_file *s)
+{
+ trace_default_header(s);
+}
+#else
+static void irqsoff_print_header(struct seq_file *s)
+{
+ trace_latency_header(s);
+}
+#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 51999309a6c..0d6ff355594 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -627,11 +627,23 @@ int trace_print_context(struct trace_iterator *iter)
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned long secs = (unsigned long)t;
char comm[TASK_COMM_LEN];
+ int ret;
trace_find_cmdline(entry->pid, comm);
- return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
- comm, entry->pid, iter->cpu, secs, usec_rem);
+ ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
+ comm, entry->pid, iter->cpu);
+ if (!ret)
+ return 0;
+
+ if (trace_flags & TRACE_ITER_IRQ_INFO) {
+ ret = trace_print_lat_fmt(s, entry);
+ if (!ret)
+ return 0;
+ }
+
+ return trace_seq_printf(s, " %5lu.%06lu: ",
+ secs, usec_rem);
}
int trace_print_lat_context(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e4a70c0c71b..ff791ea48b5 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -280,9 +280,20 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
}
static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
-static void wakeup_print_header(struct seq_file *s) { }
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
+
+#ifdef CONFIG_FUNCTION_TRACER
+static void wakeup_print_header(struct seq_file *s)
+{
+ trace_default_header(s);
+}
+#else
+static void wakeup_print_header(struct seq_file *s)
+{
+ trace_latency_header(s);
+}
+#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 77575b386d9..d4545f49242 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,6 +13,9 @@
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/fs.h>
+
+#include <asm/setup.h>
+
#include "trace.h"
#define STACK_TRACE_ENTRIES 500
@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = stack_trace_call,
- .flags = FTRACE_OPS_FL_GLOBAL,
};
static ssize_t
@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
.release = seq_release,
};
+static int
+stack_trace_filter_open(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+ inode, file);
+}
+
+static const struct file_operations stack_trace_filter_fops = {
+ .open = stack_trace_filter_open,
+ .read = seq_read,
+ .write = ftrace_filter_write,
+ .llseek = ftrace_regex_lseek,
+ .release = ftrace_regex_release,
+};
+
int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
return ret;
}
+static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
+
static __init int enable_stacktrace(char *str)
{
+ if (strncmp(str, "_filter=", 8) == 0)
+ strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
+
stack_tracer_enabled = 1;
last_stack_tracer_enabled = 1;
return 1;
@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
+ trace_create_file("stack_trace_filter", 0444, d_tracer,
+ NULL, &stack_trace_filter_fops);
+
+ if (stack_trace_filter_buf[0])
+ ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
+
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);