1 files changed, 256 insertions, 79 deletions
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 0b48a38e3cf..b3ae24f6afa 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,26 +19,40 @@
 #define pr_fmt(fmt) "CPU PMU: " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cpumask.h>
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
 #include <linux/of.h>
+#include <linux/percpu.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#include <asm/bL_switcher.h>
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
 #include <asm/pmu.h>
+#include <asm/smp_plat.h>
+#include <asm/topology.h>
 
-/* Set at runtime when we know what CPU type we are. */
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
+static LIST_HEAD(cpu_pmus_list);
 
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+#define cpu_for_each_pmu(pmu, cpu_pmu, cpu)				\
+	for_each_pmu(pmu, &cpu_pmus_list)				\
+		if (((cpu_pmu) = per_cpu_ptr((pmu)->cpu_pmus, cpu))->valid)
 
-static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+static struct arm_pmu *__cpu_find_any_pmu(unsigned int cpu)
+{
+	struct arm_pmu *pmu;
+	struct arm_cpu_pmu *cpu_pmu;
+
+	cpu_for_each_pmu(pmu, cpu_pmu, cpu)
+		return pmu;
+
+	return NULL;
+}
 
 /*
  * Despite the names, these two functions are CPU-specific and are used
@@ -46,7 +60,7 @@ static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
  */
 const char *perf_pmu_name(void)
 {
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	struct arm_pmu *pmu = __cpu_find_any_pmu(0);
 	if (!pmu)
 		return NULL;
 
@@ -56,7 +70,7 @@ EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	struct arm_pmu *pmu = __cpu_find_any_pmu(0);
 
 	if (!pmu)
 		return 0;
@@ -70,51 +84,73 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
 #include "perf_event_v6.c"
 #include "perf_event_v7.c"
 
-static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(struct arm_pmu *pmu)
 {
-	return &__get_cpu_var(cpu_hw_events);
+	return &this_cpu_ptr(pmu->cpu_pmus)->cpu_hw_events;
+}
+
+static int find_logical_cpu(u32 mpidr)
+{
+	int cpu = bL_switcher_get_logical_index(mpidr);
+
+	if (cpu != -EUNATCH)
+		return cpu;
+
+	return get_logical_index(mpidr);
 }
 
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+static void cpu_pmu_free_irq(struct arm_pmu *pmu)
 {
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	int cpu = -1;
+	int i;
+	int cpu;
+	struct arm_cpu_pmu *cpu_pmu;
+
+	for_each_possible_cpu(i) {
+		if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i)))
+			continue;
+
+		if (cpu_pmu->mpidr == -1)
+			continue;
 
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
+		cpu = find_logical_cpu(cpu_pmu->mpidr);
+		if (cpu < 0)
+			continue;
 
-	for (i = 0; i < irqs; ++i) {
-		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
-		if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+		if (!cpumask_test_and_clear_cpu(cpu, &pmu->active_irqs))
 			continue;
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, cpu_pmu);
+		if (cpu_pmu->irq >= 0)
+			free_irq(cpu_pmu->irq, pmu);
 	}
 }
 
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+static int cpu_pmu_request_irq(struct arm_pmu *pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	int cpu = -1;
+	int cpu;
+	struct arm_cpu_pmu *cpu_pmu;
 
-	if (!pmu_device)
-		return -ENODEV;
+	irqs = 0;
+	for_each_possible_cpu(i)
+		if (per_cpu_ptr(pmu->cpu_pmus, i))
+			++irqs;
 
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
 	if (irqs < 1) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < irqs; ++i) {
-		err = 0;
-		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
-		irq = platform_get_irq(pmu_device, i);
+	for_each_possible_cpu(i) {
+		if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i)))
+			continue;
+
+		irq = cpu_pmu->irq;
 		if (irq < 0)
 			continue;
 
+		cpu = find_logical_cpu(cpu_pmu->mpidr);
+		if (cpu < 0 || cpu != i)
+			continue;
+
 		/*
 		 * If we have a single PMU interrupt that we can't shift,
 		 * assume that we're running on a uniprocessor machine and
@@ -122,41 +158,51 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		 */
 		if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-				    irq, i);
+				    irq, cpu);
 			continue;
 		}
 
+		pr_debug("%s: requesting IRQ %d for CPU%d\n",
+			 pmu->name, irq, cpu);
+
 		err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu",
-				  cpu_pmu);
+				  pmu);
 		if (err) {
 			pr_err("unable to request IRQ%d for ARM PMU counters\n",
 				irq);
 			return err;
 		}
 
-		cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+		cpumask_set_cpu(cpu, &pmu->active_irqs);
 	}
 
 	return 0;
 }
 
-static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
+static void cpu_pmu_init(struct arm_pmu *pmu)
 {
 	int cpu;
-	for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
-		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-		events->events = per_cpu(hw_events, cpu);
-		events->used_mask = per_cpu(used_mask, cpu);
+	for_each_cpu_mask(cpu, pmu->valid_cpus) {
+		struct arm_cpu_pmu *cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, cpu);
+		struct pmu_hw_events *events = &cpu_pmu->cpu_hw_events;
+
+		events->events = cpu_pmu->hw_events;
+		events->used_mask = cpu_pmu->used_mask;
 		raw_spin_lock_init(&events->pmu_lock);
+
+		if (pmu->cpu_init)
+			pmu->cpu_init(pmu, cpu_pmu);
+
+		cpu_pmu->valid = true;
 	}
 
-	cpu_pmu->get_hw_events	= cpu_pmu_get_cpu_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
+	pmu->get_hw_events	= cpu_pmu_get_cpu_events;
+	pmu->request_irq	= cpu_pmu_request_irq;
+	pmu->free_irq		= cpu_pmu_free_irq;
 
 	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
+	if (pmu->reset)
+		on_each_cpu_mask(&pmu->valid_cpus, pmu->reset, pmu, 1);
 }
 
 /*
@@ -168,36 +214,42 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+	struct arm_pmu *pmu;
+	struct arm_cpu_pmu *cpu_pmu;
+	int ret = NOTIFY_DONE;
 
 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;
 
-	if (pmu && pmu->reset)
-		pmu->reset(pmu);
-	else
-		return NOTIFY_DONE;
+	cpu_for_each_pmu(pmu, cpu_pmu, (unsigned int)hcpu)
+		if (pmu->reset) {
+			pmu->reset(pmu);
+			ret = NOTIFY_OK;
+		}
 
-	return NOTIFY_OK;
+	return ret;
 }
 
 static int cpu_pmu_pm_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
 	int cpu = smp_processor_id();
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
-	struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+	struct arm_pmu *pmu;
+	struct arm_cpu_pmu *cpu_pmu;
+	int ret = NOTIFY_DONE;
 
-	if (!pmu)
-		return NOTIFY_DONE;
+	cpu_for_each_pmu(pmu, cpu_pmu, cpu) {
+		struct cpupmu_regs *pmuregs = &cpu_pmu->cpu_pmu_regs;
 
-	if (action == CPU_PM_ENTER && pmu->save_regs) {
-		pmu->save_regs(pmu, pmuregs);
-	} else if (action == CPU_PM_EXIT && pmu->restore_regs) {
-		pmu->restore_regs(pmu, pmuregs);
+		if (action == CPU_PM_ENTER && pmu->save_regs)
+			pmu->save_regs(pmu, pmuregs);
+		else if (action == CPU_PM_EXIT && pmu->restore_regs)
+			pmu->restore_regs(pmu, pmuregs);
+
+		ret = NOTIFY_OK;
 	}
 
-	return NOTIFY_OK;
+	return ret;
 }
 
 static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
@@ -286,25 +338,100 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 	return ret;
 }
 
+static void cpu_pmu_free(struct arm_pmu *pmu)
+{
+	if (!pmu)
+		return;
+
+	free_percpu(pmu->cpu_pmus);
+	kfree(pmu);
+}
+
+/*
+ * HACK: Find a b.L switcher partner for CPU cpu on the specified cluster
+ * This information should be obtained from an interface provided by the
+ * Switcher itself, if possible.
+ */
+#ifdef CONFIG_BL_SWITCHER
+static int bL_get_partner(int cpu, int cluster)
+{
+	unsigned int i;
+
+
+	for_each_possible_cpu(i) {
+		if (cpu_topology[i].thread_id == cpu_topology[cpu].thread_id &&
+		    cpu_topology[i].core_id == cpu_topology[cpu].core_id &&
+		    cpu_topology[i].socket_id == cluster)
+			return i;
+	}
+
+	return -1; /* no partner found */
+}
+#else
+static int bL_get_partner(int __always_unused cpu, int __always_unused cluster)
+{
+	return -1;
+}
+#endif
+
+static int find_irq(struct platform_device *pdev,
+		    struct device_node *pmu_node,
+		    struct device_node *cluster_node,
+		    u32 mpidr)
+{
+	int irq = -1;
+	u32 cluster;
+	u32 core;
+	struct device_node *cores_node;
+	struct device_node *core_node = NULL;
+
+	if (of_property_read_u32(cluster_node, "reg", &cluster) ||
+	    cluster != MPIDR_AFFINITY_LEVEL(mpidr, 1))
+		goto error;
+
+	cores_node = of_get_child_by_name(cluster_node, "cores");
+	if (!cores_node)
+		goto error;
+
+	for_each_child_of_node(cores_node, core_node)
+		if (!of_property_read_u32(core_node, "reg", &core) &&
+		    core == MPIDR_AFFINITY_LEVEL(mpidr, 0))
+			break;
+
+	if (!core_node)
+		goto error;
+
+	irq = platform_get_irq(pdev, core);
+
+error:
+	of_node_put(core_node);
+	of_node_put(cores_node);
+	return irq;
+}
+
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
 	struct device_node *node = pdev->dev.of_node;
 	struct arm_pmu *pmu;
+	struct arm_cpu_pmu __percpu *cpu_pmus;
 	int ret = 0;
-	int cpu;
 
 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!");
-		return -ENOMEM;
-	}
+	if (!pmu)
+		goto error_nomem;
+
+	pmu->cpu_pmus = cpu_pmus = alloc_percpu(struct arm_cpu_pmu);
+	if (!cpu_pmus)
+		goto error_nomem;
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
 		struct device_node *ncluster;
 		int cluster = -1;
 		cpumask_t sibling_mask;
+		cpumask_t phys_sibling_mask;
+		unsigned int i;
 
 		ncluster = of_parse_phandle(node, "cluster", 0);
 		if (ncluster) {
@@ -315,11 +442,59 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 				cluster = be32_to_cpup(hwid);
 		}
 		/* set sibling mask to all cpu mask if socket is not specified */
-		if (cluster == -1 ||
+		/*
+		 * In a switcher kernel, we affine all PMUs to CPUs and
+		 * abstract the runtime presence/absence of PMUs at a lower
+		 * level.
+		 */
+		if (cluster == -1 || IS_ENABLED(CONFIG_BL_SWITCHER) ||
 			cluster_to_logical_mask(cluster, &sibling_mask))
-			cpumask_setall(&sibling_mask);
+			cpumask_copy(&sibling_mask, cpu_possible_mask);
 
-		smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+		if (bL_switcher_get_enabled())
+			/*
+			 * The switcher initialises late now, so it should not
+			 * have initialised yet:
+			 */
+			BUG();
+
+		cpumask_copy(&phys_sibling_mask, cpu_possible_mask);
+
+		/*
+		 * HACK: Deduce how the switcher will modify the topology
+		 * in order to fill in PMU<->CPU combinations which don't
+		 * make sense when the switcher is disabled.  Ideally, this
+		 * knowledge should come from the swithcer somehow.
+		 */
+		for_each_possible_cpu(i) {
+			int cpu = i;
+
+			per_cpu_ptr(cpu_pmus, i)->mpidr = -1;
+			per_cpu_ptr(cpu_pmus, i)->irq = -1;
+
+			if (cpu_topology[i].socket_id != cluster) {
+				cpumask_clear_cpu(i, &phys_sibling_mask);
+				cpu = bL_get_partner(i, cluster);
+			}
+
+			if (cpu == -1)
+				cpumask_clear_cpu(i, &sibling_mask);
+			else {
+				int irq = find_irq(pdev, node, ncluster,
+						   cpu_logical_map(cpu));
+				per_cpu_ptr(cpu_pmus, i)->mpidr =
+					cpu_logical_map(cpu);
+				per_cpu_ptr(cpu_pmus, i)->irq = irq;
+			}
+		}
+
+		/*
+		 * This relies on an MP view of the system to choose the right
+		 * CPU to run init_fn:
+		 */
+		smp_call_function_any(&phys_sibling_mask, init_fn, pmu, 1);
+
+		bL_switcher_put_enabled();
 
 		/* now set the valid_cpus after init */
 		cpumask_copy(&pmu->valid_cpus, &sibling_mask);
@@ -327,24 +502,26 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		ret = probe_current_pmu(pmu);
 	}
 
-	if (ret) {
-		pr_info("failed to probe PMU!");
-		goto out_free;
-	}
-
-	for_each_cpu_mask(cpu, pmu->valid_cpus)
-		per_cpu(cpu_pmu, cpu) = pmu;
+	if (ret)
+		goto error;
 
 	pmu->plat_device = pdev;
 	cpu_pmu_init(pmu);
 	ret = armpmu_register(pmu, -1);
 
-	if (!ret)
-		return 0;
+	if (ret)
+		goto error;
 
-out_free:
-	pr_info("failed to register PMU devices!");
-	kfree(pmu);
+	list_add(&pmu->class_pmus_list, &cpu_pmus_list);
+	goto out;
+
+error_nomem:
+	pr_warn("out of memory\n");
+	ret = -ENOMEM;
+error:
+	pr_warn("failed to register PMU device(s)!\n");
+	cpu_pmu_free(pmu);
+out:
 	return ret;
 }