diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Kconfig | 18 | ||||
-rw-r--r-- | arch/arm/common/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/common/bL_switcher.c | 826 | ||||
-rw-r--r-- | arch/arm/common/bL_switcher_dummy_if.c | 71 | ||||
-rw-r--r-- | arch/arm/common/mcpm_entry.c | 12 | ||||
-rw-r--r-- | arch/arm/common/mcpm_head.S | 16 | ||||
-rw-r--r-- | arch/arm/include/asm/bL_switcher.h | 77 | ||||
-rw-r--r-- | arch/arm/include/asm/hardirq.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/mcpm.h | 8 | ||||
-rw-r--r-- | arch/arm/include/asm/pmu.h | 27 | ||||
-rw-r--r-- | arch/arm/include/asm/smp.h | 2 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 18 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_cpu.c | 335 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v6.c | 12 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 466 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_xscale.c | 20 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 21 |
17 files changed, 1716 insertions, 217 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b6ab107fb63..8070e5fb391 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1640,6 +1640,24 @@ config BIG_LITTLE help This option enables support for the big.LITTLE architecture. +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 53dc0c820ff..cfb69286952 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -19,3 +19,5 @@ obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a CFLAGS_REMOVE_mcpm_entry.o = -pg +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 00000000000..be46c594f95 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,826 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cpu_pm.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/clockchips.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/notifier.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/moduleparam.h> + +#include <asm/smp_plat.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/suspend.h> +#include <asm/mcpm.h> +#include <asm/bL_switcher.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power_cpu_migrate.h> + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id)); + return id & MPIDR_HWID_BITMASK; +} + +/* + * Get a global nanosecond time stamp for tracing. + */ +static s64 get_ns(void) +{ + struct timespec ts; + getnstimeofday(&ts); + return timespec_to_ns(&ts); +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_arg) +{ + unsigned ib_mpidr, ib_cpu, ib_cluster; + long volatile handshake, **handshake_ptr = _arg; + + pr_debug("%s\n", __func__); + + ib_mpidr = cpu_logical_map(smp_processor_id()); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + /* Advertise our handshake location */ + if (handshake_ptr) { + handshake = 0; + *handshake_ptr = &handshake; + } else + handshake = -1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* + * Let's wait until our inbound is alive. + */ + while (!handshake) { + wfe(); + smp_mb(); + } + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. + */ +#define STACK_SIZE 512 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + void *stack = current_thread_info() + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += clusterid * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; +static int bL_switcher_cpu_pairing[NR_CPUS]; + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, this_cpu, that_cpu; + unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; + struct completion inbound_alive; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; + long volatile *handshake_ptr; + int ipi_nr, ret; + + this_cpu = smp_processor_id(); + ob_mpidr = read_mpidr(); + ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); + ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); + BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); + + if (new_cluster_id == ob_cluster) + return 0; + + that_cpu = bL_switcher_cpu_pairing[this_cpu]; + ib_mpidr = cpu_logical_map(that_cpu); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", + this_cpu, ob_mpidr, ib_mpidr); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); + mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); + + /* Install our "inbound alive" notifier. */ + init_completion(&inbound_alive); + ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); + ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]); + mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in bL_power_down(). + */ + gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0); + + /* + * Wait for the inbound to come up. This allows for other + * tasks to be scheduled in the mean time. + */ + wait_for_completion(&inbound_alive); + mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0); + + /* + * From this point we are entering the switch critical zone + * and can't sleep/schedule anymore. + */ + local_irq_disable(); + local_fiq_disable(); + trace_cpu_migrate_begin(get_ns(), ob_mpidr); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); + + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* + * Swap the physical CPUs in the logical map for this logical CPU. + * This must be flushed to RAM as the resume code + * needs to access it while the caches are still disabled. + */ + cpu_logical_map(this_cpu) = ib_mpidr; + cpu_logical_map(that_cpu) = ob_mpidr; + sync_cache_w(&cpu_logical_map(this_cpu)); + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); + BUG_ON(mpidr != ib_mpidr); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + + trace_cpu_migrate_finish(get_ns(), ib_mpidr); + local_fiq_enable(); + local_irq_enable(); + + *handshake_ptr = 1; + dsb_sev(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct bL_thread { + spinlock_t lock; + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; + struct completion started; + bL_switch_completion_handler completer; + void *completer_cookie; +}; + +static struct bL_thread bL_threads[NR_CPUS]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + bL_switch_completion_handler completer; + void *completer_cookie; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + + spin_lock(&t->lock); + cluster = t->wanted_cluster; + completer = t->completer; + completer_cookie = t->completer_cookie; + t->wanted_cluster = -1; + t->completer = NULL; + spin_unlock(&t->lock); + + if (cluster != -1) { + bL_switch_to(cluster); + + if (completer) + completer(completer_cookie); + } + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct * bL_switcher_thread_create(int cpu, void *arg) +{ + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; +} + +/* + * bL_switch_request_cb - Switch to a specific cluster for the given CPU, + * with completion notification via a callback + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * @completer: switch completion callback. if non-NULL, + * @completer(@completer_cookie) will be called on completion of + * the switch, in non-atomic context. + * @completer_cookie: opaque context argument for @completer. + * + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. + * + * If a @completer callback function is supplied, it will be called when + * the switch is complete. This can be used to determine asynchronously + * when the switch is complete, regardless of when bL_switch_request() + * returns. When @completer is supplied, no new switch request is permitted + * for the affected CPU until after the switch is complete, and @completer + * has returned. + */ +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; + + spin_lock(&t->lock); + if (t->completer) { + spin_unlock(&t->lock); + return -EBUSY; + } + t->completer = completer; + t->completer_cookie = completer_cookie; + t->wanted_cluster = new_cluster_id; + spin_unlock(&t->lock); + wake_up(&t->wq); + return 0; +} + +EXPORT_SYMBOL_GPL(bL_switch_request_cb); + +/* + * Activation and configuration code. + */ + +static DEFINE_MUTEX(bL_switcher_activation_lock); +static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier); +static unsigned int bL_switcher_active; +static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS]; +static cpumask_t bL_switcher_removed_logical_cpus; + +int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_register_notifier); + +int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier); + +static int bL_activation_notify(unsigned long val) +{ + int ret; + + ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL); + if (ret & NOTIFY_STOP_MASK) + pr_err("%s: notifier chain failed with status 0x%x\n", + __func__, ret); + return notifier_to_errno(ret); +} + +static void bL_switcher_restore_cpus(void) +{ + int i; + + for_each_cpu(i, &bL_switcher_removed_logical_cpus) + cpu_up(i); +} + +static int bL_switcher_halve_cpus(void) +{ + int i, j, cluster_0, gic_id, ret; + unsigned int cpu, cluster, mask; + cpumask_t available_cpus; + + /* First pass to validate what we have */ + mask = 0; + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster >= 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER)) + return -EINVAL; + mask |= (1 << cluster); + } + if (mask != 3) { + pr_err("%s: no CPU pairing possible\n", __func__); + return -EINVAL; + } + + /* + * Now let's do the pairing. We match each CPU with another CPU + * from a different cluster. To get a uniform scheduling behavior + * without fiddling with CPU topology and compute capacity data, + * we'll use logical CPUs initially belonging to the same cluster. + */ + memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing)); + cpumask_copy(&available_cpus, cpu_online_mask); + cluster_0 = -1; + for_each_cpu(i, &available_cpus) { + int match = -1; + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster_0 == -1) + cluster_0 = cluster; + if (cluster != cluster_0) + continue; + cpumask_clear_cpu(i, &available_cpus); + for_each_cpu(j, &available_cpus) { + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1); + /* + * Let's remember the last match to create "odd" + * pairing on purpose in order for other code not + * to assume any relation between physical and + * logical CPU numbers. + */ + if (cluster != cluster_0) + match = j; + } + if (match != -1) { + bL_switcher_cpu_pairing[i] = match; + cpumask_clear_cpu(match, &available_cpus); + pr_info("CPU%d paired with CPU%d\n", i, match); + } + } + + /* + * Now we disable the unwanted CPUs i.e. everything that has no + * pairing information (that includes the pairing counterparts). + */ + cpumask_clear(&bL_switcher_removed_logical_cpus); + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + + /* Let's take note of the GIC ID for this CPU */ + gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + bL_switcher_restore_cpus(); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + + if (bL_switcher_cpu_pairing[i] != -1) { + bL_switcher_cpu_original_cluster[i] = cluster; + continue; + } + + ret = cpu_down(i); + if (ret) { + bL_switcher_restore_cpus(); + return ret; + } + cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); + } + + return 0; +} + +/* Determine the logical CPU a given physical CPU is grouped on. */ +int bL_switcher_get_logical_index(u32 mpidr) +{ + int cpu; + + if (!bL_switcher_active) + return -EUNATCH; + + mpidr &= MPIDR_HWID_BITMASK; + for_each_online_cpu(cpu) { + int pairing = bL_switcher_cpu_pairing[cpu]; + if (pairing == -1) + continue; + if ((mpidr == cpu_logical_map(cpu)) || + (mpidr == cpu_logical_map(pairing))) + return cpu; + } + return -EINVAL; +} + +static void bL_switcher_trace_trigger_cpu(void *__always_unused info) +{ + trace_cpu_migrate_current(get_ns(), read_mpidr()); +} + +int bL_switcher_trace_trigger(void) +{ + int ret; + + preempt_disable(); + + bL_switcher_trace_trigger_cpu(NULL); + ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); + +static int bL_switcher_enable(void) +{ + int cpu, ret; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + if (bL_switcher_active) { + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return 0; + } + + pr_info("big.LITTLE switcher initializing\n"); + + ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE); + if (ret) + goto error; + + ret = bL_switcher_halve_cpus(); + if (ret) + goto error; + + bL_switcher_trace_trigger(); + + for_each_online_cpu(cpu) { + struct bL_thread *t = &bL_threads[cpu]; + spin_lock_init(&t->lock); + init_waitqueue_head(&t->wq); + init_completion(&t->started); + t->wanted_cluster = -1; + t->task = bL_switcher_thread_create(cpu, t); + } + + bL_switcher_active = 1; + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + pr_info("big.LITTLE switcher initialized\n"); + goto out; + +error: + pr_warning("big.LITTLE switcher initialization failed\n"); + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return ret; +} + +#ifdef CONFIG_SYSFS + +static void bL_switcher_disable(void) +{ + unsigned int cpu, cluster; + struct bL_thread *t; + struct task_struct *task; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + + if (!bL_switcher_active) + goto out; + + if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) { + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + goto out; + } + + bL_switcher_active = 0; + + /* + * To deactivate the switcher, we must shut down the switcher + * threads to prevent any other requests from being accepted. + * Then, if the final cluster for given logical CPU is not the + * same as the original one, we'll recreate a switcher thread + * just for the purpose of switching the CPU back without any + * possibility for interference from external requests. + */ + for_each_online_cpu(cpu) { + t = &bL_threads[cpu]; + task = t->task; + t->task = NULL; + if (!task || IS_ERR(task)) + continue; + kthread_stop(task); + /* no more switch may happen on this CPU at this point */ + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + init_completion(&t->started); + t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; + task = bL_switcher_thread_create(cpu, t); + if (!IS_ERR(task)) { + wait_for_completion(&t->started); + kthread_stop(task); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + } + /* If execution gets here, we're in trouble. */ + pr_crit("%s: unable to restore original cluster for CPU %d\n", + __func__, cpu); + pr_crit("%s: CPU %d can't be restored\n", + __func__, bL_switcher_cpu_pairing[cpu]); + cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu], + &bL_switcher_removed_logical_cpus); + } + + bL_switcher_restore_cpus(); + bL_switcher_trace_trigger(); + + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); +} + +static ssize_t bL_switcher_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", bL_switcher_active); +} + +static ssize_t bL_switcher_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + + switch (buf[0]) { + case '0': + bL_switcher_disable(); + ret = 0; + break; + case '1': + ret = bL_switcher_enable(); + break; + default: + ret = -EINVAL; + } + + return (ret >= 0) ? count : ret; +} + +static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret = bL_switcher_trace_trigger(); + + return ret ? ret : count; +} + +static struct kobj_attribute bL_switcher_active_attr = + __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); + +static struct kobj_attribute bL_switcher_trace_trigger_attr = + __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store); + +static struct attribute *bL_switcher_attrs[] = { + &bL_switcher_active_attr.attr, + &bL_switcher_trace_trigger_attr.attr, + NULL, +}; + +static struct attribute_group bL_switcher_attr_group = { + .attrs = bL_switcher_attrs, +}; + +static struct kobject *bL_switcher_kobj; + +static int __init bL_switcher_sysfs_init(void) +{ + int ret; + + bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); + if (!bL_switcher_kobj) + return -ENOMEM; + ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); + if (ret) + kobject_put(bL_switcher_kobj); + return ret; +} + +#endif /* CONFIG_SYSFS */ + +bool bL_switcher_get_enabled(void) +{ + mutex_lock(&bL_switcher_activation_lock); + + return bL_switcher_active; +} +EXPORT_SYMBOL_GPL(bL_switcher_get_enabled); + +void bL_switcher_put_enabled(void) +{ + mutex_unlock(&bL_switcher_activation_lock); +} +EXPORT_SYMBOL_GPL(bL_switcher_put_enabled); + +/* + * Veto any CPU hotplug operation while the switcher is active. + * We're just not ready to deal with that given the trickery involved. + */ +static int bL_switcher_hotplug_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + switch (action) { + case CPU_UP_PREPARE: + case CPU_DOWN_PREPARE: + if (bL_switcher_active) + return NOTIFY_BAD; + } + return NOTIFY_DONE; +} + +static struct notifier_block bL_switcher_hotplug_notifier = + { &bL_switcher_hotplug_callback, NULL, 0 }; + +static bool no_bL_switcher = true; +core_param(no_bL_switcher, no_bL_switcher, bool, 0644); + +static int __init bL_switcher_init(void) +{ + int ret; + + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + register_cpu_notifier(&bL_switcher_hotplug_notifier); + + if (!no_bL_switcher) { + ret = bL_switcher_enable(); + if (ret) + return ret; + } + +#ifdef CONFIG_SYSFS + ret = bL_switcher_sysfs_init(); + if (ret) + pr_err("%s: unable to create sysfs entry\n", __func__); +#endif + + return 0; +} + +late_initcall(bL_switcher_init); diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 00000000000..5e2dd197e72 --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -0,0 +1,71 @@ +/* + * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Dummy interface to user space for debugging purpose only. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <asm/uaccess.h> +#include <asm/bL_switcher.h> + +static ssize_t bL_switcher_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + unsigned char val[3]; + unsigned int cpu, cluster; + int ret; + + pr_debug("%s\n", __func__); + + if (len < 3) + return -EINVAL; + + if (copy_from_user(val, buf, 3)) + return -EFAULT; + + /* format: <cpu#>,<cluster#> */ + if (val[0] < '0' || val[0] > '4' || + val[1] != ',' || + val[2] < '0' || val[2] > '1') + return -EINVAL; + + cpu = val[0] - '0'; + cluster = val[2] - '0'; + ret = bL_switch_request(cpu, cluster); + + return ret ? : len; +} + +static const struct file_operations bL_switcher_fops = { + .write = bL_switcher_write, + .owner = THIS_MODULE, +}; + +static struct miscdevice bL_switcher_device = { + MISC_DYNAMIC_MINOR, + "b.L_switcher", + &bL_switcher_fops +}; + +static int __init bL_switcher_dummy_if_init(void) +{ + return misc_register(&bL_switcher_device); +} + +static void __exit bL_switcher_dummy_if_exit(void) +{ + misc_deregister(&bL_switcher_device); +} + +module_init(bL_switcher_dummy_if_init); +module_exit(bL_switcher_dummy_if_exit); diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 370236dd1a0..4a2b32fd53a 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); } +extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; + +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val) +{ + unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; + poke[0] = poke_phys_addr; + poke[1] = poke_val; + __cpuc_flush_dcache_area((void *)poke, 8); + outer_clean_range(__pa(poke), __pa(poke + 2)); +} + static const struct mcpm_platform_ops *platform_ops; int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 8178705c4b2..057e9c5a9e1 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8, r11} + ldmia r5, {r0, r6, r7, r8, r11} + add r0, r5, r0 @ r0 = mcpm_entry_early_pokes add r6, r5, r6 @ r6 = mcpm_entry_vectors ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys add r8, r5, r8 @ r8 = mcpm_sync add r11, r5, r11 @ r11 = first_man_locks + @ Perform an early poke, if any + add r0, r0, r4, lsl #3 + ldmia r0, {r0, r1} + teq r0, #0 + strne r1, [r0] + mov r0, #MCPM_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = sync cluster base @@ -195,7 +202,8 @@ mcpm_entry_gated: .align 2 -3: .word mcpm_entry_vectors - . +3: .word mcpm_entry_early_pokes - . + .word mcpm_entry_vectors - 3b .word mcpm_power_up_setup_phys - 3b .word mcpm_sync - 3b .word first_man_locks - 3b @@ -214,6 +222,10 @@ first_man_locks: ENTRY(mcpm_entry_vectors) .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_entry_early_pokes, #object +ENTRY(mcpm_entry_early_pokes) + .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_power_up_setup_phys, #object ENTRY(mcpm_power_up_setup_phys) .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 00000000000..87ebcbc8e3c --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h @@ -0,0 +1,77 @@ +/* + * arch/arm/include/asm/bL_switcher.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +#include <linux/compiler.h> +#include <linux/types.h> + +typedef void (*bL_switch_completion_handler)(void *cookie); + +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie); +static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +{ + return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL); +} + +/* + * Register here to be notified about runtime enabling/disabling of + * the switcher. + * + * The notifier chain is called with the switcher activation lock held: + * the switcher will not be enabled or disabled during callbacks. + * Callbacks must not call bL_switcher_{get,put}_enabled(). + */ +#define BL_NOTIFY_PRE_ENABLE 0 +#define BL_NOTIFY_POST_ENABLE 1 +#define BL_NOTIFY_PRE_DISABLE 2 +#define BL_NOTIFY_POST_DISABLE 3 + +#ifdef CONFIG_BL_SWITCHER + +int bL_switcher_register_notifier(struct notifier_block *nb); +int bL_switcher_unregister_notifier(struct notifier_block *nb); + +/* + * Use these functions to temporarily prevent enabling/disabling of + * the switcher. + * bL_switcher_get_enabled() returns true if the switcher is currently + * enabled. Each call to bL_switcher_get_enabled() must be followed + * by a call to bL_switcher_put_enabled(). These functions are not + * recursive. + */ +bool bL_switcher_get_enabled(void); +void bL_switcher_put_enabled(void); + +int bL_switcher_trace_trigger(void); +int bL_switcher_get_logical_index(u32 mpidr); + +#else +static inline int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline bool bL_switcher_get_enabled(void) { return false; } +static inline void bL_switcher_put_enabled(void) { } +static inline int bL_switcher_trace_trigger(void) { return 0; } +static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; } +#endif /* CONFIG_BL_SWITCHER */ + +#endif diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 3d7351c844a..fe3ea776dc3 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 7 +#define NR_IPI 8 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 0f7b7620e9a..7626a7fd493 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -42,6 +42,14 @@ extern void mcpm_entry_point(void); void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); /* + * This sets an early poke i.e a value to be poked into some address + * from very early assembly code before the CPU is ungated. The + * address must be physical, and if 0 then nothing will happen. + */ +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val); + +/* * CPU/cluster power operations API for higher subsystems to use. */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 0cd7824ca76..a7eaad37497 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -13,7 +13,9 @@ #define __ARM_PMU_H__ #include <linux/interrupt.h> +#include <linux/percpu.h> #include <linux/perf_event.h> +#include <linux/types.h> /* * struct arm_pmu_platdata - ARM PMU platform data @@ -71,6 +73,21 @@ struct cpupmu_regs { u32 pmxevtcnt[8]; }; +struct arm_cpu_pmu { + bool valid; + bool active; + + u32 mpidr; + int irq; + + struct perf_event *hw_events[ARMPMU_MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; + struct pmu_hw_events cpu_hw_events; + struct cpupmu_regs cpu_pmu_regs; + + void *logical_state; +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; @@ -93,16 +110,24 @@ struct arm_pmu { int (*map_event)(struct perf_event *event); void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *); void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *); + void (*cpu_init)(struct arm_pmu *, struct arm_cpu_pmu *); int num_events; atomic_t active_events; struct mutex reserve_mutex; u64 max_period; struct platform_device *plat_device; - struct pmu_hw_events *(*get_hw_events)(void); + struct pmu_hw_events *(*get_hw_events)(struct arm_pmu *); + + struct list_head class_pmus_list; + struct arm_cpu_pmu __percpu *cpu_pmus; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) +#define for_each_pmu(pmu, head) list_for_each_entry(pmu, head, class_pmus_list) + +#define to_this_cpu_pmu(arm_pmu) this_cpu_ptr((arm_pmu)->cpu_pmus) + extern const struct dev_pm_ops armpmu_dev_pm_ops; int armpmu_register(struct arm_pmu *armpmu, int type); diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index c5aa088c0a8..bfab8757b9e 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -83,6 +83,8 @@ extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); extern void smp_send_all_cpu_backtrace(void); +extern int register_ipi_completion(struct completion *completion, int cpu); + struct smp_operations { #ifdef CONFIG_SMP /* diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index d847c622a7b..a9da343b698 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -205,7 +205,7 @@ static void armpmu_del(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -223,7 +223,7 @@ static int armpmu_add(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -467,8 +467,14 @@ static int armpmu_event_init(struct perf_event *event) static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu); + int enabled; + + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; + + BUG_ON(!hw_events->used_mask); /* TEMPORARY */ + enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); if (enabled) armpmu->start(armpmu); @@ -477,6 +483,10 @@ static void armpmu_enable(struct pmu *pmu) static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); + + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; + armpmu->stop(armpmu); } diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 0b48a38e3cf..b3ae24f6afa 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -19,26 +19,40 @@ #define pr_fmt(fmt) "CPU PMU: " fmt #include <linux/bitmap.h> +#include <linux/cpumask.h> #include <linux/cpu_pm.h> #include <linux/export.h> #include <linux/kernel.h> +#include <linux/list.h> #include <linux/of.h> +#include <linux/percpu.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <asm/bL_switcher.h> #include <asm/cputype.h> #include <asm/irq_regs.h> #include <asm/pmu.h> +#include <asm/smp_plat.h> +#include <asm/topology.h> -/* Set at runtime when we know what CPU type we are. */ -static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu); +static LIST_HEAD(cpu_pmus_list); -static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); -static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); -static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); +#define cpu_for_each_pmu(pmu, cpu_pmu, cpu) \ + for_each_pmu(pmu, &cpu_pmus_list) \ + if (((cpu_pmu) = per_cpu_ptr((pmu)->cpu_pmus, cpu))->valid) -static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs); +static struct arm_pmu *__cpu_find_any_pmu(unsigned int cpu) +{ + struct arm_pmu *pmu; + struct arm_cpu_pmu *cpu_pmu; + + cpu_for_each_pmu(pmu, cpu_pmu, cpu) + return pmu; + + return NULL; +} /* * Despite the names, these two functions are CPU-specific and are used @@ -46,7 +60,7 @@ static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs); */ const char *perf_pmu_name(void) { - struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); + struct arm_pmu *pmu = __cpu_find_any_pmu(0); if (!pmu) return NULL; @@ -56,7 +70,7 @@ EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { - struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); + struct arm_pmu *pmu = __cpu_find_any_pmu(0); if (!pmu) return 0; @@ -70,51 +84,73 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #include "perf_event_v6.c" #include "perf_event_v7.c" -static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) +static struct pmu_hw_events *cpu_pmu_get_cpu_events(struct arm_pmu *pmu) { - return &__get_cpu_var(cpu_hw_events); + return &this_cpu_ptr(pmu->cpu_pmus)->cpu_hw_events; +} + +static int find_logical_cpu(u32 mpidr) +{ + int cpu = bL_switcher_get_logical_index(mpidr); + + if (cpu != -EUNATCH) + return cpu; + + return get_logical_index(mpidr); } -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +static void cpu_pmu_free_irq(struct arm_pmu *pmu) { - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - int cpu = -1; + int i; + int cpu; + struct arm_cpu_pmu *cpu_pmu; + + for_each_possible_cpu(i) { + if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i))) + continue; + + if (cpu_pmu->mpidr == -1) + continue; - irqs = min(pmu_device->num_resources, num_possible_cpus()); + cpu = find_logical_cpu(cpu_pmu->mpidr); + if (cpu < 0) + continue; - for (i = 0; i < irqs; ++i) { - cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + if (!cpumask_test_and_clear_cpu(cpu, &pmu->active_irqs)) continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, cpu_pmu); + if (cpu_pmu->irq >= 0) + free_irq(cpu_pmu->irq, pmu); } } -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +static int cpu_pmu_request_irq(struct arm_pmu *pmu, irq_handler_t handler) { int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - int cpu = -1; + int cpu; + struct arm_cpu_pmu *cpu_pmu; - if (!pmu_device) - return -ENODEV; + irqs = 0; + for_each_possible_cpu(i) + if (per_cpu_ptr(pmu->cpu_pmus, i)) + ++irqs; - irqs = min(pmu_device->num_resources, num_possible_cpus()); if (irqs < 1) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); - irq = platform_get_irq(pmu_device, i); + for_each_possible_cpu(i) { + if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i))) + continue; + + irq = cpu_pmu->irq; if (irq < 0) continue; + cpu = find_logical_cpu(cpu_pmu->mpidr); + if (cpu < 0 || cpu != i) + continue; + /* * If we have a single PMU interrupt that we can't shift, * assume that we're running on a uniprocessor machine and @@ -122,41 +158,51 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) */ if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); + irq, cpu); continue; } + pr_debug("%s: requesting IRQ %d for CPU%d\n", + pmu->name, irq, cpu); + err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu", - cpu_pmu); + pmu); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); return err; } - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + cpumask_set_cpu(cpu, &pmu->active_irqs); } return 0; } -static void cpu_pmu_init(struct arm_pmu *cpu_pmu) +static void cpu_pmu_init(struct arm_pmu *pmu) { int cpu; - for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) { - struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); - events->events = per_cpu(hw_events, cpu); - events->used_mask = per_cpu(used_mask, cpu); + for_each_cpu_mask(cpu, pmu->valid_cpus) { + struct arm_cpu_pmu *cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, cpu); + struct pmu_hw_events *events = &cpu_pmu->cpu_hw_events; + + events->events = cpu_pmu->hw_events; + events->used_mask = cpu_pmu->used_mask; raw_spin_lock_init(&events->pmu_lock); + + if (pmu->cpu_init) + pmu->cpu_init(pmu, cpu_pmu); + + cpu_pmu->valid = true; } - cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; + pmu->get_hw_events = cpu_pmu_get_cpu_events; + pmu->request_irq = cpu_pmu_request_irq; + pmu->free_irq = cpu_pmu_free_irq; /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1); + if (pmu->reset) + on_each_cpu_mask(&pmu->valid_cpus, pmu->reset, pmu, 1); } /* @@ -168,36 +214,42 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) static int __cpuinit cpu_pmu_notify(struct notifier_block *b, unsigned long action, void *hcpu) { - struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu); + struct arm_pmu *pmu; + struct arm_cpu_pmu *cpu_pmu; + int ret = NOTIFY_DONE; if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) return NOTIFY_DONE; - if (pmu && pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; + cpu_for_each_pmu(pmu, cpu_pmu, (unsigned int)hcpu) + if (pmu->reset) { + pmu->reset(pmu); + ret = NOTIFY_OK; + } - return NOTIFY_OK; + return ret; } static int cpu_pmu_pm_notify(struct notifier_block *b, unsigned long action, void *hcpu) { int cpu = smp_processor_id(); - struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu); - struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu); + struct arm_pmu *pmu; + struct arm_cpu_pmu *cpu_pmu; + int ret = NOTIFY_DONE; - if (!pmu) - return NOTIFY_DONE; + cpu_for_each_pmu(pmu, cpu_pmu, cpu) { + struct cpupmu_regs *pmuregs = &cpu_pmu->cpu_pmu_regs; - if (action == CPU_PM_ENTER && pmu->save_regs) { - pmu->save_regs(pmu, pmuregs); - } else if (action == CPU_PM_EXIT && pmu->restore_regs) { - pmu->restore_regs(pmu, pmuregs); + if (action == CPU_PM_ENTER && pmu->save_regs) + pmu->save_regs(pmu, pmuregs); + else if (action == CPU_PM_EXIT && pmu->restore_regs) + pmu->restore_regs(pmu, pmuregs); + + ret = NOTIFY_OK; } - return NOTIFY_OK; + return ret; } static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = { @@ -286,25 +338,100 @@ static int probe_current_pmu(struct arm_pmu *pmu) return ret; } +static void cpu_pmu_free(struct arm_pmu *pmu) +{ + if (!pmu) + return; + + free_percpu(pmu->cpu_pmus); + kfree(pmu); +} + +/* + * HACK: Find a b.L switcher partner for CPU cpu on the specified cluster + * This information should be obtained from an interface provided by the + * Switcher itself, if possible. + */ +#ifdef CONFIG_BL_SWITCHER +static int bL_get_partner(int cpu, int cluster) +{ + unsigned int i; + + + for_each_possible_cpu(i) { + if (cpu_topology[i].thread_id == cpu_topology[cpu].thread_id && + cpu_topology[i].core_id == cpu_topology[cpu].core_id && + cpu_topology[i].socket_id == cluster) + return i; + } + + return -1; /* no partner found */ +} +#else +static int bL_get_partner(int __always_unused cpu, int __always_unused cluster) +{ + return -1; +} +#endif + +static int find_irq(struct platform_device *pdev, + struct device_node *pmu_node, + struct device_node *cluster_node, + u32 mpidr) +{ + int irq = -1; + u32 cluster; + u32 core; + struct device_node *cores_node; + struct device_node *core_node = NULL; + + if (of_property_read_u32(cluster_node, "reg", &cluster) || + cluster != MPIDR_AFFINITY_LEVEL(mpidr, 1)) + goto error; + + cores_node = of_get_child_by_name(cluster_node, "cores"); + if (!cores_node) + goto error; + + for_each_child_of_node(cores_node, core_node) + if (!of_property_read_u32(core_node, "reg", &core) && + core == MPIDR_AFFINITY_LEVEL(mpidr, 0)) + break; + + if (!core_node) + goto error; + + irq = platform_get_irq(pdev, core); + +error: + of_node_put(core_node); + of_node_put(cores_node); + return irq; +} + static int cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; struct device_node *node = pdev->dev.of_node; struct arm_pmu *pmu; + struct arm_cpu_pmu __percpu *cpu_pmus; int ret = 0; - int cpu; pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!"); - return -ENOMEM; - } + if (!pmu) + goto error_nomem; + + pmu->cpu_pmus = cpu_pmus = alloc_percpu(struct arm_cpu_pmu); + if (!cpu_pmus) + goto error_nomem; if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { smp_call_func_t init_fn = (smp_call_func_t)of_id->data; struct device_node *ncluster; int cluster = -1; cpumask_t sibling_mask; + cpumask_t phys_sibling_mask; + unsigned int i; ncluster = of_parse_phandle(node, "cluster", 0); if (ncluster) { @@ -315,11 +442,59 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) cluster = be32_to_cpup(hwid); } /* set sibling mask to all cpu mask if socket is not specified */ - if (cluster == -1 || + /* + * In a switcher kernel, we affine all PMUs to CPUs and + * abstract the runtime presence/absence of PMUs at a lower + * level. + */ + if (cluster == -1 || IS_ENABLED(CONFIG_BL_SWITCHER) || cluster_to_logical_mask(cluster, &sibling_mask)) - cpumask_setall(&sibling_mask); + cpumask_copy(&sibling_mask, cpu_possible_mask); - smp_call_function_any(&sibling_mask, init_fn, pmu, 1); + if (bL_switcher_get_enabled()) + /* + * The switcher initialises late now, so it should not + * have initialised yet: + */ + BUG(); + + cpumask_copy(&phys_sibling_mask, cpu_possible_mask); + + /* + * HACK: Deduce how the switcher will modify the topology + * in order to fill in PMU<->CPU combinations which don't + * make sense when the switcher is disabled. Ideally, this + * knowledge should come from the swithcer somehow. + */ + for_each_possible_cpu(i) { + int cpu = i; + + per_cpu_ptr(cpu_pmus, i)->mpidr = -1; + per_cpu_ptr(cpu_pmus, i)->irq = -1; + + if (cpu_topology[i].socket_id != cluster) { + cpumask_clear_cpu(i, &phys_sibling_mask); + cpu = bL_get_partner(i, cluster); + } + + if (cpu == -1) + cpumask_clear_cpu(i, &sibling_mask); + else { + int irq = find_irq(pdev, node, ncluster, + cpu_logical_map(cpu)); + per_cpu_ptr(cpu_pmus, i)->mpidr = + cpu_logical_map(cpu); + per_cpu_ptr(cpu_pmus, i)->irq = irq; + } + } + + /* + * This relies on an MP view of the system to choose the right + * CPU to run init_fn: + */ + smp_call_function_any(&phys_sibling_mask, init_fn, pmu, 1); + + bL_switcher_put_enabled(); /* now set the valid_cpus after init */ cpumask_copy(&pmu->valid_cpus, &sibling_mask); @@ -327,24 +502,26 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) ret = probe_current_pmu(pmu); } - if (ret) { - pr_info("failed to probe PMU!"); - goto out_free; - } - - for_each_cpu_mask(cpu, pmu->valid_cpus) - per_cpu(cpu_pmu, cpu) = pmu; + if (ret) + goto error; pmu->plat_device = pdev; cpu_pmu_init(pmu); ret = armpmu_register(pmu, -1); - if (!ret) - return 0; + if (ret) + goto error; -out_free: - pr_info("failed to register PMU devices!"); - kfree(pmu); + list_add(&pmu->class_pmus_list, &cpu_pmus_list); + goto out; + +error_nomem: + pr_warn("out of memory\n"); + ret = -ENOMEM; +error: + pr_warn("failed to register PMU device(s)!\n"); + cpu_pmu_free(pmu); +out: return ret; } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 03664b0e8fa..a191bdb9ebd 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -439,7 +439,7 @@ static void armv6pmu_enable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { @@ -477,7 +477,7 @@ armv6pmu_handle_irq(int irq_num, unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu); struct pt_regs *regs; int idx; @@ -533,7 +533,7 @@ armv6pmu_handle_irq(int irq_num, static void armv6pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = armv6_pmcr_read(); @@ -545,7 +545,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu) static void armv6pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = armv6_pmcr_read(); @@ -586,7 +586,7 @@ static void armv6pmu_disable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { @@ -621,7 +621,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event) unsigned long val, mask, flags, evt = 0; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 654db5030c3..25762a548f2 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -18,6 +18,175 @@ #ifdef CONFIG_CPU_V7 +struct armv7_pmu_logical_state { + u32 PMCR; + u32 PMCNTENSET; + u32 PMCNTENCLR; + u32 PMOVSR; + u32 PMSWINC; + u32 PMSELR; + u32 PMCEID0; + u32 PMCEID1; + + u32 PMCCNTR; + + u32 PMUSERENR; + u32 PMINTENSET; + u32 PMINTENCLR; + u32 PMOVSSET; + + struct armv7_pmu_logical_cntr_state { + u32 PMXEVTYPER; + u32 PMXEVCNTR; + } cntrs[1]; /* we will grow this during allocation */ +}; + +#define __v7_logical_state(cpupmu) \ + ((struct armv7_pmu_logical_state *)(cpupmu)->logical_state) + +#define __v7_logical_state_single(cpupmu, name) \ + __v7_logical_state(cpupmu)->name +#define __v7_logical_state_cntr(cpupmu, name) \ + __v7_logical_state(cpupmu)->cntrs[__v7_logical_state(cpupmu)->PMSELR].name + +#define __def_v7_pmu_reg_W(kind, name, op1, Cm, op2) \ + static inline u32 __v7_pmu_write_physical_##name(u32 value) \ + { \ + asm volatile ( \ + "mcr p15, " #op1 ", %0, c9, " #Cm ", " #op2 \ + :: "r" (value) \ + ); \ + \ + return value; \ + } \ + \ + static inline u32 __v7_pmu_write_logical_##name( \ + struct arm_cpu_pmu *cpupmu, u32 value) \ + { \ + __v7_logical_state_##kind(cpupmu, name) = value; \ + return value; \ + } + +#define __def_v7_pmu_reg_R(kind, name, op1, Cm, op2) \ + static inline u32 __v7_pmu_read_physical_##name(void) \ + { \ + u32 result; \ + \ + asm volatile ( \ + "mrc p15, " #op1 ", %0, c9, " #Cm ", " #op2 \ + : "=r" (result) \ + ); \ + \ + return result; \ + } \ + \ + static inline u32 __v7_pmu_read_logical_##name( \ + struct arm_cpu_pmu *cpupmu) \ + { \ + return __v7_logical_state_##kind(cpupmu, name); \ + } + +#define __def_v7_pmu_reg_WO(name, op1, Cm, op2) \ + __def_v7_pmu_reg_W(single, name, op1, Cm, op2) +#define __def_v7_pmu_reg_RO(name, op1, Cm, op2) \ + __def_v7_pmu_reg_R(single, name, op1, Cm, op2) + +#define __def_v7_pmu_reg_RW(name, op1, Cm, op2) \ + __def_v7_pmu_reg_WO(name, op1, Cm, op2) \ + __def_v7_pmu_reg_RO(name, op1, Cm, op2) + +#define __def_v7_pmu_cntr_WO(name, op1, Cm, op2) \ + __def_v7_pmu_reg_W(cntr, name, op1, Cm, op2) +#define __def_v7_pmu_cntr_RO(name, op1, Cm, op2) \ + __def_v7_pmu_reg_R(cntr, name, op1, Cm, op2) + +#define __def_v7_pmu_cntr_RW(name, op1, Cm, op2) \ + __def_v7_pmu_cntr_WO(name, op1, Cm, op2) \ + __def_v7_pmu_cntr_RO(name, op1, Cm, op2) + +#define __def_v7_pmu_reg(name, prot, op1, Cm, op2) \ + __def_v7_pmu_reg_##prot(name, op1, Cm, op2) +#define __def_v7_pmu_cntr(name, prot, op1, Cm, op2) \ + __def_v7_pmu_cntr_##prot(name, op1, Cm, op2) + +__def_v7_pmu_reg(PMCR, RW, 0, c12, 0) +__def_v7_pmu_reg(PMCNTENSET, RW, 0, c12, 1) +__def_v7_pmu_reg(PMCNTENCLR, RW, 0, c12, 2) +__def_v7_pmu_reg(PMOVSR, RW, 0, c12, 3) +__def_v7_pmu_reg(PMSWINC, WO, 0, c12, 4) +__def_v7_pmu_reg(PMSELR, RW, 0, c12, 5) +__def_v7_pmu_reg(PMCEID0, RO, 0, c12, 6) +__def_v7_pmu_reg(PMCEID1, RO, 0, c12, 7) + +__def_v7_pmu_reg(PMCCNTR, RW, 0, c13, 0) +__def_v7_pmu_cntr(PMXEVTYPER, RW, 0, c13, 1) +__def_v7_pmu_cntr(PMXEVCNTR, RW, 0, c13, 2) + +__def_v7_pmu_reg(PMUSERENR, RW, 0, c14, 0) +__def_v7_pmu_reg(PMINTENSET, RW, 0, c14, 1) +__def_v7_pmu_reg(PMINTENCLR, RW, 0, c14, 2) +__def_v7_pmu_reg(PMOVSSET, RW, 0, c14, 3) + +#define __v7_pmu_write_physical(name, value) \ + __v7_pmu_write_physical_##name(value) +#define __v7_pmu_read_physical(name) \ + __v7_pmu_read_physical_##name() + +#define __v7_pmu_write_logical(cpupmu, name, value) \ + __v7_pmu_write_logical_##name(cpupmu, value) +#define __v7_pmu_read_logical(cpupmu, name) \ + __v7_pmu_read_logical_##name(cpupmu) + +#define __v7_pmu_write_reg(cpupmu, name, value) do { \ + if ((cpupmu)->active) \ + __v7_pmu_write_physical(name, value); \ + else \ + __v7_pmu_write_logical(cpupmu, name, value); \ +} while(0) + +#define __v7_pmu_read_reg(cpupmu, name) ( \ + (cpupmu)->active ? \ + __v7_pmu_read_physical(name) : \ + __v7_pmu_read_logical(cpupmu, name) \ +) + +#define __v7_pmu_reg_set(cpupmu, name, logical_name, mask) do { \ + if ((cpupmu)->active) \ + __v7_pmu_write_physical(name, mask); \ + else { \ + u32 __value; \ + __value =__v7_pmu_read_logical(cpupmu, logical_name) | (mask); \ + __v7_pmu_write_logical(cpupmu, logical_name, __value); \ + } \ +} while(0) + +#define __v7_pmu_reg_clr(cpupmu, name, logical_name, mask) do { \ + if ((cpupmu)->active) \ + __v7_pmu_write_physical(name, mask); \ + else { \ + u32 __value; \ + __value = __v7_pmu_read_logical(cpupmu, logical_name) & ~(mask); \ + __v7_pmu_write_logical(cpupmu, logical_name, __value); \ + } \ +} while(0) + +#define __v7_pmu_save_reg(cpupmu, name) \ + __v7_pmu_write_logical(cpupmu, name, \ + __v7_pmu_read_physical(name)) +#define __v7_pmu_restore_reg(cpupmu, name) \ + __v7_pmu_write_physical(name, \ + __v7_pmu_read_logical(cpupmu, name)) +static u32 read_mpidr(void) +{ + u32 result; + + asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (result)); + + return result; +} + +static void armv7pmu_reset(void *info); + /* * Common ARMv7 event types * @@ -784,18 +953,16 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] #define ARMV7_EXCLUDE_USER (1 << 30) #define ARMV7_INCLUDE_HYP (1 << 27) -static inline u32 armv7_pmnc_read(void) +static inline u32 armv7_pmnc_read(struct arm_cpu_pmu *cpupmu) { - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; + return __v7_pmu_read_reg(cpupmu, PMCR); } -static inline void armv7_pmnc_write(u32 val) +static inline void armv7_pmnc_write(struct arm_cpu_pmu *cpupmu, u32 val) { val &= ARMV7_PMNC_MASK; isb(); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); + __v7_pmu_write_reg(cpupmu, PMCR, val); } static inline int armv7_pmnc_has_overflowed(u32 pmnc) @@ -814,10 +981,10 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); } -static inline int armv7_pmnc_select_counter(int idx) +static inline int armv7_pmnc_select_counter(struct arm_cpu_pmu *cpupmu, int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); + __v7_pmu_write_reg(cpupmu, PMSELR, counter); isb(); return idx; @@ -825,185 +992,197 @@ static inline int armv7_pmnc_select_counter(int idx) static inline u32 armv7pmu_read_counter(struct perf_event *event) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct arm_pmu *pmu = to_arm_pmu(event->pmu); + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u32 value = 0; - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) + if (!armv7_pmnc_counter_valid(pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV7_IDX_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); + value = __v7_pmu_read_reg(cpupmu, PMCCNTR); + else if (armv7_pmnc_select_counter(cpupmu, idx) == idx) + value = __v7_pmu_read_reg(cpupmu, PMXEVCNTR); return value; } static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct arm_pmu *pmu = to_arm_pmu(event->pmu); + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) + if (!armv7_pmnc_counter_valid(pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV7_IDX_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); + __v7_pmu_write_reg(cpupmu, PMCCNTR, value); + else if (armv7_pmnc_select_counter(cpupmu, idx) == idx) + __v7_pmu_write_reg(cpupmu, PMXEVCNTR, value); } -static inline void armv7_pmnc_write_evtsel(int idx, u32 val) +static inline void armv7_pmnc_write_evtsel(struct arm_cpu_pmu *cpupmu, int idx, u32 val) { - if (armv7_pmnc_select_counter(idx) == idx) { + if (armv7_pmnc_select_counter(cpupmu, idx) == idx) { val &= ARMV7_EVTYPE_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + __v7_pmu_write_reg(cpupmu, PMXEVTYPER, val); } } -static inline int armv7_pmnc_enable_counter(int idx) +static inline int armv7_pmnc_enable_counter(struct arm_cpu_pmu *cpupmu, int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); + __v7_pmu_reg_set(cpupmu, PMCNTENSET, PMCNTENSET, BIT(counter)); return idx; } -static inline int armv7_pmnc_disable_counter(int idx) +static inline int armv7_pmnc_disable_counter(struct arm_cpu_pmu *cpupmu, int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); + __v7_pmu_reg_clr(cpupmu, PMCNTENCLR, PMCNTENSET, BIT(counter)); return idx; } -static inline int armv7_pmnc_enable_intens(int idx) +static inline int armv7_pmnc_enable_intens(struct arm_cpu_pmu *cpupmu, int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); + __v7_pmu_reg_set(cpupmu, PMINTENSET, PMCNTENSET, BIT(counter)); return idx; } -static inline int armv7_pmnc_disable_intens(int idx) +static inline int armv7_pmnc_disable_intens(struct arm_cpu_pmu *cpupmu, int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); + __v7_pmu_reg_clr(cpupmu, PMINTENCLR, PMINTENSET, BIT(counter)); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); + __v7_pmu_reg_clr(cpupmu, PMOVSR, PMOVSR, BIT(counter)); isb(); return idx; } -static inline u32 armv7_pmnc_getreset_flags(void) +static inline u32 armv7_pmnc_getreset_flags(struct arm_cpu_pmu *cpupmu) { u32 val; /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + val = __v7_pmu_read_reg(cpupmu, PMOVSR); /* Write to clear flags */ val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + __v7_pmu_reg_clr(cpupmu, PMOVSR, PMOVSR, val); return val; } #ifdef DEBUG -static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) +static void armv7_pmnc_dump_regs(struct arm_pmu *pmu) { u32 val; unsigned int cnt; + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); + printk(KERN_INFO "PMNC =0x%08x\n", __v7_pmu_read_reg(PMCR)); + printk(KERN_INFO "CNTENS=0x%08x\n", __v7_pmu_read_reg(PMCNTENSET)); + printk(KERN_INFO "INTENS=0x%08x\n", __v7_pmu_read_reg(PMINTENSET)); + printk(KERN_INFO "FLAGS =0x%08x\n", __v7_pmu_read_reg(PMOVSR)); + printk(KERN_INFO "SELECT=0x%08x\n", __v7_pmu_read_reg(PMSELR)); + printk(KERN_INFO "CCNT =0x%08x\n", __v7_pmu_read_reg(PMCCNTR)); for (cnt = ARMV7_IDX_COUNTER0; - cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) { + armv7_pmnc_select_counter(cpupmu, cnt); printk(KERN_INFO "CNT[%d] count =0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + ARMV7_IDX_TO_COUNTER(cnt), + __v7_pmu_read_reg(cpupmu, PMXEVCNTR)); printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); + ARMV7_IDX_TO_COUNTER(cnt), + __v7_pmu_read_reg(cpupmu, PMXEVTYPER)); } } #endif -static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu, +static void armv7pmu_save_regs(struct arm_pmu *pmu, struct cpupmu_regs *regs) { unsigned int cnt; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc)); - if (!(regs->pmc & ARMV7_PMNC_E)) + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + + if (!cpupmu->active) return; - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset)); - asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren)); - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset)); - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0])); + if (!*cpupmu->cpu_hw_events.used_mask) + return; + + if (!__v7_pmu_save_reg(cpupmu, PMCR) & ARMV7_PMNC_E) + return; + + __v7_pmu_save_reg(cpupmu, PMCNTENSET); + __v7_pmu_save_reg(cpupmu, PMUSERENR); + __v7_pmu_save_reg(cpupmu, PMINTENSET); + __v7_pmu_save_reg(cpupmu, PMCCNTR); + for (cnt = ARMV7_IDX_COUNTER0; - cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 1" - : "=r"(regs->pmxevttype[cnt])); - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r"(regs->pmxevtcnt[cnt])); + cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) { + armv7_pmnc_select_counter(cpupmu, cnt); + __v7_pmu_save_reg(cpupmu, PMSELR); /* mirror physical PMSELR */ + __v7_pmu_save_reg(cpupmu, PMXEVTYPER); + __v7_pmu_save_reg(cpupmu, PMXEVCNTR); } return; } -static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu, +/* armv7pmu_reset() must be called before calling this funtion */ +static void armv7pmu_restore_regs(struct arm_pmu *pmu, struct cpupmu_regs *regs) { unsigned int cnt; - if (!(regs->pmc & ARMV7_PMNC_E)) + u32 pmcr; + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + + armv7pmu_reset(pmu); + + if (!cpupmu->active) return; - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset)); - asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren)); - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset)); - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0])); + if (!*cpupmu->cpu_hw_events.used_mask) + return; + + pmcr = __v7_pmu_read_logical(cpupmu, PMCR); + if (!pmcr & ARMV7_PMNC_E) + return; + + __v7_pmu_restore_reg(cpupmu, PMCNTENSET); + __v7_pmu_restore_reg(cpupmu, PMUSERENR); + __v7_pmu_restore_reg(cpupmu, PMINTENSET); + __v7_pmu_restore_reg(cpupmu, PMCCNTR); + for (cnt = ARMV7_IDX_COUNTER0; - cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mcr p15, 0, %0, c9, c13, 1" - : : "r"(regs->pmxevttype[cnt])); - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r"(regs->pmxevtcnt[cnt])); + cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) { + armv7_pmnc_select_counter(cpupmu, cnt); + __v7_pmu_save_reg(cpupmu, PMSELR); /* mirror physical PMSELR */ + __v7_pmu_restore_reg(cpupmu, PMXEVTYPER); + __v7_pmu_restore_reg(cpupmu, PMXEVCNTR); } - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc)); + __v7_pmu_write_reg(cpupmu, PMCR, pmcr); } static void armv7pmu_enable_event(struct perf_event *event) { unsigned long flags; struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct arm_pmu *pmu = to_arm_pmu(event->pmu); + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + struct pmu_hw_events *events = pmu->get_hw_events(pmu); int idx = hwc->idx; - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + if (!armv7_pmnc_counter_valid(pmu, idx)) { pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", smp_processor_id(), idx); return; @@ -1018,25 +1197,25 @@ static void armv7pmu_enable_event(struct perf_event *event) /* * Disable counter */ - armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_counter(cpupmu, idx); /* * Set event (if destined for PMNx counters) * We only need to set the event for the cycle counter if we * have the ability to perform event filtering. */ - if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); + if (pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(cpupmu, idx, hwc->config_base); /* * Enable interrupt for this counter */ - armv7_pmnc_enable_intens(idx); + armv7_pmnc_enable_intens(cpupmu, idx); /* * Enable counter */ - armv7_pmnc_enable_counter(idx); + armv7_pmnc_enable_counter(cpupmu,idx); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -1045,11 +1224,12 @@ static void armv7pmu_disable_event(struct perf_event *event) { unsigned long flags; struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct arm_pmu *pmu = to_arm_pmu(event->pmu); + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + struct pmu_hw_events *events = pmu->get_hw_events(pmu); int idx = hwc->idx; - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + if (!armv7_pmnc_counter_valid(pmu, idx)) { pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", smp_processor_id(), idx); return; @@ -1063,12 +1243,12 @@ static void armv7pmu_disable_event(struct perf_event *event) /* * Disable counter */ - armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_counter(cpupmu, idx); /* * Disable interrupt for this counter */ - armv7_pmnc_disable_intens(idx); + armv7_pmnc_disable_intens(cpupmu, idx); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -1077,15 +1257,23 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) { u32 pmnc; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct arm_pmu *pmu = (struct arm_pmu *)dev; + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + struct pmu_hw_events *cpuc = pmu->get_hw_events(pmu); struct pt_regs *regs; int idx; + if (!cpupmu->active) { + pr_warn_ratelimited("%s: Spurious interrupt for inactive PMU %s: event counts will be wrong.\n", + __func__, pmu->name); + pr_warn_once("This is a known interrupt affinity bug in the b.L switcher perf support.\n"); + return IRQ_NONE; + } + /* * Get and reset the IRQ flags */ - pmnc = armv7_pmnc_getreset_flags(); + pmnc = armv7_pmnc_getreset_flags(cpupmu); /* * Did an overflow occur? @@ -1098,7 +1286,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + for (idx = 0; idx < pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1120,7 +1308,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); + pmu->disable(event); } /* @@ -1135,25 +1323,27 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void armv7pmu_start(struct arm_pmu *cpu_pmu) +static void armv7pmu_start(struct arm_pmu *pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + struct pmu_hw_events *events = pmu->get_hw_events(pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + armv7_pmnc_write(cpupmu, armv7_pmnc_read(cpupmu) | ARMV7_PMNC_E); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv7pmu_stop(struct arm_pmu *cpu_pmu) +static void armv7pmu_stop(struct arm_pmu *pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + struct pmu_hw_events *events = pmu->get_hw_events(pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + armv7_pmnc_write(cpupmu, armv7_pmnc_read(cpupmu) & ~ARMV7_PMNC_E); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -1212,19 +1402,33 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static bool check_active(struct arm_cpu_pmu *cpupmu) +{ + u32 mpidr = read_mpidr(); + + BUG_ON(!(mpidr & 0x80000000)); /* this won't work on uniprocessor */ + + cpupmu->active = ((mpidr ^ cpupmu->mpidr) & 0xFFFFFF) == 0; + return cpupmu->active; +} + static void armv7pmu_reset(void *info) { - struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; + struct arm_pmu *pmu = (struct arm_pmu *)info; + struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu); + u32 idx, nb_cnt = pmu->num_events; + + if (!check_active(cpupmu)) + return; /* The counter and interrupt enable registers are unknown at reset. */ for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_disable_counter(idx); - armv7_pmnc_disable_intens(idx); + armv7_pmnc_disable_counter(cpupmu, idx); + armv7_pmnc_disable_intens(cpupmu, idx); } /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + armv7_pmnc_write(cpupmu, ARMV7_PMNC_P | ARMV7_PMNC_C); } static int armv7_a8_map_event(struct perf_event *event) @@ -1257,8 +1461,13 @@ static int armv7_a7_map_event(struct perf_event *event) &armv7_a7_perf_cache_map, 0xFF); } +static void armv7pmu_cpu_init(struct arm_pmu *pmu, + struct arm_cpu_pmu *cpupmu); + static void armv7pmu_init(struct arm_pmu *cpu_pmu) { + struct arm_cpu_pmu *cpu_pmus = cpu_pmu->cpu_pmus; + cpu_pmu->handle_irq = armv7pmu_handle_irq; cpu_pmu->enable = armv7pmu_enable_event; cpu_pmu->disable = armv7pmu_disable_event; @@ -1270,7 +1479,10 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->reset = armv7pmu_reset; cpu_pmu->save_regs = armv7pmu_save_regs; cpu_pmu->restore_regs = armv7pmu_restore_regs; + cpu_pmu->cpu_init = armv7pmu_cpu_init; cpu_pmu->max_period = (1LLU << 32) - 1; + + cpu_pmu->cpu_pmus = cpu_pmus; }; static u32 armv7_read_num_pmnc_events(void) @@ -1278,12 +1490,38 @@ static u32 armv7_read_num_pmnc_events(void) u32 nb_cnt; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + nb_cnt = (__v7_pmu_read_physical(PMCR) >> ARMV7_PMNC_N_SHIFT); + nb_cnt &= ARMV7_PMNC_N_MASK; /* Add the CPU cycles counter and return */ return nb_cnt + 1; } +static void armv7pmu_cpu_init(struct arm_pmu *pmu, + struct arm_cpu_pmu *cpupmu) +{ + size_t size = offsetof(struct armv7_pmu_logical_state, cntrs) + + pmu->num_events * sizeof(*__v7_logical_state(cpupmu)); + + cpupmu->logical_state = kzalloc(size, GFP_KERNEL); + + /* + * We need a proper error return mechanism for these init functions. + * Until then, panicking the kernel is acceptable, since a failure + * here is indicative of crippling memory contstraints which will + * likely make the system unusable anyway: + */ + BUG_ON(!cpupmu->logical_state); + + /* + * Save the "read-only" ID registers in logical_state. + * Because they are read-only, there are no direct accessors, + * so poke them directly into the logical_state structure: + */ + __v7_logical_state(cpupmu)->PMCEID0 = __v7_pmu_read_physical(PMCEID0); + __v7_logical_state(cpupmu)->PMCEID1 = __v7_pmu_read_physical(PMCEID1); +} + static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 63990c42fac..cd670eafbb5 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -225,7 +225,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) unsigned long pmnc; struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu); struct pt_regs *regs; int idx; @@ -285,7 +285,7 @@ static void xscale1pmu_enable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; switch (idx) { @@ -321,7 +321,7 @@ static void xscale1pmu_disable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; switch (idx) { @@ -374,7 +374,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale1pmu_read_pmnc(); @@ -386,7 +386,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu) static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale1pmu_read_pmnc(); @@ -572,7 +572,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) unsigned long pmnc, of_flags; struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu); struct pt_regs *regs; int idx; @@ -626,7 +626,7 @@ static void xscale2pmu_enable_event(struct perf_event *event) unsigned long flags, ien, evtsel; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); @@ -672,7 +672,7 @@ static void xscale2pmu_disable_event(struct perf_event *event) unsigned long flags, ien, evtsel, of_flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); @@ -738,7 +738,7 @@ out: static void xscale2pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; @@ -750,7 +750,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu) static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale2pmu_read_pmnc(); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 15c70f0202f..70f35a48c8a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -67,6 +67,7 @@ enum ipi_msg_type { IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_CPU_BACKTRACE, + IPI_COMPLETION, }; static DECLARE_COMPLETION(cpu_running); @@ -465,6 +466,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_CPU_BACKTRACE, "CPU backtrace"), + S(IPI_COMPLETION, "completion interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -642,6 +644,19 @@ static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs) } } +static DEFINE_PER_CPU(struct completion *, cpu_completion); + +int register_ipi_completion(struct completion *completion, int cpu) +{ + per_cpu(cpu_completion, cpu) = completion; + return IPI_COMPLETION; +} + +static void ipi_complete(unsigned int cpu) +{ + complete(per_cpu(cpu_completion, cpu)); +} + /* * Main handler for inter-processor interrupts */ @@ -696,6 +711,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs) ipi_cpu_backtrace(cpu, regs); break; + case IPI_COMPLETION: + irq_enter(); + ipi_complete(cpu); + irq_exit(); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); |