Automatically merging tracking-lsk-vexpress-iks into merge-manifest

Conflicting files:
author: Mark Brown <broonie@sirena.org.uk> 2013-06-13 16:36:56 +0100
committer: Mark Brown <broonie@sirena.org.uk> 2013-06-13 16:36:56 +0100
commit: 8e1adb79ff00c3840233f65a460312c98c7bf4cb (patch)
tree: bead4a3dae2f064d9d622228e3022cf9b4b36606
parent: 7e1a54fe7dd79d8ccc86397c19e8bfdab21f0ca6 (diff)
parent: 90d0e0a125f44ead5ae47e5c8cf9d2c72cda1c6c (diff)
20 files changed, 2393 insertions, 344 deletions
diff --git a/Documentation/cpu-freq/cpufreq-arm-bl.txt b/Documentation/cpu-freq/cpufreq-arm-bl.txt
new file mode 100644
index 00000000000..52e2f3ad761
--- /dev/null
+++ b/Documentation/cpu-freq/cpufreq-arm-bl.txt
@@ -0,0 +1,47 @@
+Synchronous cluster switching interface for the ARM big.LITTLE switcher
+-----------------------------------------------------------------------
+
+The arm-bl-cpufreq driver provides a simple interface which models two
+clusters as two performance points.
+
+Within each CPU's cpufreq directory in sysfs
+(/sys/devices/system/cpu/cpu?/cpufreq/):
+
+cpuinfo_max_freq:
+
+	reports the dummy frequency value which corresponds to the "big"
+	cluster.
+
+cpuinfo_min_freq:
+
+	reports the dummy frequency value which corresponds to the
+	"little" cluster.
+
+cpuinfo_cur_freq:
+
+	reports the dummy frequency corresponding to the currently
+	running cluster.
+
+
+To switch clusters, either the built-in "powersave" or "performance"
+governors can be used to force the "little" or "big" cluster
+respectively; or alternatively the "userspace" governor can be used,
+
+The following script fragment demonstrates how the userspace governor
+can be used to switch:
+
+
+for x in /sys/devices/system/cpu/cpu[0-9]*; do
+	echo userspace >$x/cpufreq/scaling_governor
+done
+
+big_freq=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq`
+little_freq=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
+
+switch_to_big () {
+	echo $big_freq >/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed
+}
+
+switch_to_little () {
+	echo $little_freq >/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed
+}
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 1506bf536d1..c3b9427f0a0 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -53,7 +53,7 @@ static int read_mpidr(void)
 {
 	unsigned int id;
 	asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
-	return id;
+	return id & MPIDR_HWID_BITMASK;
 }
 
 /*
@@ -72,16 +72,14 @@ static s64 get_ns(void)
 
 static void bL_do_switch(void *_arg)
 {
-	unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
+	unsigned ib_mpidr, ib_cpu, ib_cluster;
 	long volatile handshake, **handshake_ptr = _arg;
 
 	pr_debug("%s\n", __func__);
 
-	mpidr = read_mpidr();
-	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	ob_cluster = clusterid;
-	ib_cluster = clusterid ^ 1;
+	ib_mpidr = cpu_logical_map(smp_processor_id());
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
 
 	/* Advertise our handshake location */
 	if (handshake_ptr) {
@@ -94,7 +92,7 @@ static void bL_do_switch(void *_arg)
 	 * Our state has been saved at this point.  Let's release our
 	 * inbound CPU.
 	 */
-	mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
 	sev();
 
 	/*
@@ -148,6 +146,7 @@ static int bL_switchpoint(unsigned long _arg)
  */
 
 static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
 
 /*
  * bL_switch_to - Switch to a specific cluster for the current CPU
@@ -158,41 +157,46 @@ static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
  */
 static int bL_switch_to(unsigned int new_cluster_id)
 {
-	unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
+	unsigned int mpidr, this_cpu, that_cpu;
+	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
 	struct completion inbound_alive;
 	struct tick_device *tdev;
 	enum clock_event_mode tdev_mode;
 	long volatile *handshake_ptr;
 	int ipi_nr, ret;
 
-	mpidr = read_mpidr();
-	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	ob_cluster = clusterid;
-	ib_cluster = clusterid ^ 1;
+	this_cpu = smp_processor_id();
+	ob_mpidr = read_mpidr();
+	ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+	ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+	BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
 
-	if (new_cluster_id == clusterid)
+	if (new_cluster_id == ob_cluster)
 		return 0;
 
-	pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
+	that_cpu = bL_switcher_cpu_pairing[this_cpu];
+	ib_mpidr = cpu_logical_map(that_cpu);
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
 
-	this_cpu = smp_processor_id();
+	pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+		 this_cpu, ob_mpidr, ib_mpidr);
 
 	/* Close the gate for our entry vectors */
-	mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
-	mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
+	mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
 
 	/* Install our "inbound alive" notifier. */
 	init_completion(&inbound_alive);
 	ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
-	ipi_nr |= ((1 << 16) << bL_gic_id[cpuid][ob_cluster]);
-	mcpm_set_early_poke(cpuid, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+	ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
 
 	/*
 	 * Let's wake up the inbound CPU now in case it requires some delay
 	 * to come online, but leave it gated in our entry vector code.
 	 */
-	ret = mcpm_cpu_power_up(cpuid, ib_cluster);
+	ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
 	if (ret) {
 		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
 		return ret;
@@ -202,14 +206,14 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
 	 * in a possible WFI, such as in bL_power_down().
 	 */
-	gic_send_sgi(bL_gic_id[cpuid][ib_cluster], 0);
+	gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
 
 	/*
 	 * Wait for the inbound to come up.  This allows for other
 	 * tasks to be scheduled in the mean time.
 	 */
 	wait_for_completion(&inbound_alive);
-	mcpm_set_early_poke(cpuid, ib_cluster, 0, 0);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
 
 	/*
 	 * From this point we are entering the switch critical zone
@@ -217,10 +221,10 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	 */
 	local_irq_disable();
 	local_fiq_disable();
-	trace_cpu_migrate_begin(get_ns(), mpidr & MPIDR_HWID_BITMASK);
+	trace_cpu_migrate_begin(get_ns(), ob_mpidr);
 
 	/* redirect GIC's SGIs to our counterpart */
-	gic_migrate_target(bL_gic_id[cpuid][ib_cluster]);
+	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
 
 	tdev = tick_get_device(this_cpu);
 	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
@@ -237,13 +241,13 @@ static int bL_switch_to(unsigned int new_cluster_id)
 		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
 
 	/*
-	 * Flip the cluster in the CPU logical map for this CPU.
+	 * Swap the physical CPUs in the logical map for this logical CPU.
 	 * This must be flushed to RAM as the resume code
 	 * needs to access it while the caches are still disabled.
 	 */
-	cpu_logical_map(this_cpu) ^= (1 << 8);
-	__cpuc_flush_dcache_area(&cpu_logical_map(this_cpu),
-				 sizeof(cpu_logical_map(this_cpu)));
+	cpu_logical_map(this_cpu) = ib_mpidr;
+	cpu_logical_map(that_cpu) = ob_mpidr;
+	sync_cache_w(&cpu_logical_map(this_cpu));
 
 	/* Let's do the actual CPU switch. */
 	ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
@@ -252,10 +256,8 @@ static int bL_switch_to(unsigned int new_cluster_id)
 
 	/* We are executing on the inbound CPU at this point */
 	mpidr = read_mpidr();
-	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid);
-	BUG_ON(clusterid != ib_cluster);
+	pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+	BUG_ON(mpidr != ib_mpidr);
 
 	mcpm_cpu_powered_up();
 
@@ -267,7 +269,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 					  tdev->evtdev->next_event, 1);
 	}
 
-	trace_cpu_migrate_finish(get_ns(), mpidr & MPIDR_HWID_BITMASK);
+	trace_cpu_migrate_finish(get_ns(), ib_mpidr);
 	local_fiq_enable();
 	local_irq_enable();
 
@@ -286,7 +288,7 @@ struct bL_thread {
 	struct completion started;
 };
 
-static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
+static struct bL_thread bL_threads[NR_CPUS];
 
 static int bL_switcher_thread(void *arg)
 {
@@ -339,7 +341,7 @@ int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
 {
 	struct bL_thread *t;
 
-	if (cpu >= MAX_CPUS_PER_CLUSTER) {
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
 		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
 		return -EINVAL;
 	}
@@ -364,7 +366,7 @@ EXPORT_SYMBOL_GPL(bL_switch_request);
 static DEFINE_MUTEX(bL_switcher_activation_lock);
 static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
 static unsigned int bL_switcher_active;
-static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER];
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
 static cpumask_t bL_switcher_removed_logical_cpus;
 
 int bL_switcher_register_notifier(struct notifier_block *nb)
@@ -400,52 +402,86 @@ static void bL_switcher_restore_cpus(void)
 
 static int bL_switcher_halve_cpus(void)
 {
-	int cpu, cluster, i, ret;
-	cpumask_t cluster_mask[2], common_mask;
-
-	cpumask_clear(&bL_switcher_removed_logical_cpus);
-	cpumask_clear(&cluster_mask[0]);
-	cpumask_clear(&cluster_mask[1]);
+	int i, j, cluster_0, gic_id, ret;
+	unsigned int cpu, cluster, mask;
+	cpumask_t available_cpus;
 
+	/* First pass to validate what we have */
+	mask = 0;
 	for_each_online_cpu(i) {
-		cpu = cpu_logical_map(i) & 0xff;
-		cluster = (cpu_logical_map(i) >> 8) & 0xff;
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
 		if (cluster >= 2) {
 			pr_err("%s: only dual cluster systems are supported\n", __func__);
 			return -EINVAL;
 		}
-		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
+		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+			return -EINVAL;
+		mask |= (1 << cluster);
 	}
-
-	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
-		pr_err("%s: no common set of CPUs\n", __func__);
+	if (mask != 3) {
+		pr_err("%s: no CPU pairing possible\n", __func__);
 		return -EINVAL;
 	}
 
-	for_each_online_cpu(i) {
-		cpu = cpu_logical_map(i) & 0xff;
-		cluster = (cpu_logical_map(i) >> 8) & 0xff;
-
-		if (cpumask_test_cpu(cpu, &common_mask)) {
-			/* Let's take note of the GIC ID for this CPU */
-			int gic_id = gic_get_cpu_id(i);
-			if (gic_id < 0) {
-				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
-				return -EINVAL;
-			}
-			bL_gic_id[cpu][cluster] = gic_id;
-			pr_info("GIC ID for CPU %u cluster %u is %u\n",
-				cpu, cluster, gic_id);
-
+	/*
+	 * Now let's do the pairing.  We match each CPU with another CPU
+	 * from a different cluster.  To get a uniform scheduling behavior
+	 * without fiddling with CPU topology and compute capacity data,
+	 * we'll use logical CPUs initially belonging to the same cluster.
+	 */
+	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+	cpumask_copy(&available_cpus, cpu_online_mask);
+	cluster_0 = -1;
+	for_each_cpu(i, &available_cpus) {
+		int match = -1;
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster_0 == -1)
+			cluster_0 = cluster;
+		if (cluster != cluster_0)
+			continue;
+		cpumask_clear_cpu(i, &available_cpus);
+		for_each_cpu(j, &available_cpus) {
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
 			/*
-			 * We keep only those logical CPUs which number
-			 * is equal to their physical CPU number. This is
-			 * not perfect but good enough in most cases.
+			 * Let's remember the last match to create "odd"
+			 * pairing on purpose in order for other code not
+			 * to assume any relation between physical and
+			 * logical CPU numbers.
 			 */
-			if (cpu == i) {
-				bL_switcher_cpu_original_cluster[cpu] = cluster;
-				continue;
-			}
+			if (cluster != cluster_0)
+				match = j;
+		}
+		if (match != -1) {
+			bL_switcher_cpu_pairing[i] = match;
+			cpumask_clear_cpu(match, &available_cpus);
+			pr_info("CPU%d paired with CPU%d\n", i, match);
+		}
+	}
+
+	/*
+	 * Now we disable the unwanted CPUs i.e. everything that has no
+	 * pairing information (that includes the pairing counterparts).
+	 */ 
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+		/* Let's take note of the GIC ID for this CPU */
+		gic_id = gic_get_cpu_id(i);
+		if (gic_id < 0) {
+			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+			bL_switcher_restore_cpus();
+			return -EINVAL;
+		}
+		bL_gic_id[cpu][cluster] = gic_id;
+		pr_info("GIC ID for CPU %u cluster %u is %u\n",
+			cpu, cluster, gic_id);
+
+		if (bL_switcher_cpu_pairing[i] != -1) {
+			bL_switcher_cpu_original_cluster[i] = cluster;
+			continue;
 		}
 
 		ret = cpu_down(i);
@@ -459,9 +495,29 @@ static int bL_switcher_halve_cpus(void)
 	return 0;
 }
 
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+	int cpu;
+
+	if (!bL_switcher_active)
+		return -EUNATCH;
+
+	mpidr &= MPIDR_HWID_BITMASK; 
+	for_each_online_cpu(cpu) {
+		int pairing = bL_switcher_cpu_pairing[cpu];
+		if (pairing == -1)
+			continue;
+		if ((mpidr == cpu_logical_map(cpu)) ||
+		    (mpidr == cpu_logical_map(pairing)))
+			return cpu;
+	}
+	return -EINVAL;
+}
+
 static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
 {
-	trace_cpu_migrate_current(get_ns(), read_mpidr() & MPIDR_HWID_BITMASK);
+	trace_cpu_migrate_current(get_ns(), read_mpidr());
 }
 
 int bL_switcher_trace_trigger(void)
@@ -530,7 +586,7 @@ out:
 
 static void bL_switcher_disable(void)
 {
-	unsigned int cpu, cluster, i;
+	unsigned int cpu, cluster;
 	struct bL_thread *t;
 	struct task_struct *task;
 
@@ -556,15 +612,14 @@ static void bL_switcher_disable(void)
 	 * possibility for interference from external requests.
 	 */
 	for_each_online_cpu(cpu) {
-		BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff));
 		t = &bL_threads[cpu];
 		task = t->task;
 		t->task = NULL;
-		if (IS_ERR_OR_NULL(task))
+		if (!task || IS_ERR(task))
 			continue;
 		kthread_stop(task);
 		/* no more switch may happen on this CPU at this point */
-		cluster = (cpu_logical_map(cpu) >> 8) & 0xff;
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
 		if (cluster == bL_switcher_cpu_original_cluster[cpu])
 			continue;
 		init_completion(&t->started);
@@ -573,21 +628,17 @@ static void bL_switcher_disable(void)
 		if (!IS_ERR(task)) {
 			wait_for_completion(&t->started);
 			kthread_stop(task);
-			cluster = (cpu_logical_map(cpu) >> 8) & 0xff;
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
 			if (cluster == bL_switcher_cpu_original_cluster[cpu])
 				continue;
 		}
 		/* If execution gets here, we're in trouble. */
 		pr_crit("%s: unable to restore original cluster for CPU %d\n",
 			__func__, cpu);
-		for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
-			if ((cpu_logical_map(i) & 0xff) != cpu)
-				continue;
-			pr_crit("%s: CPU %d can't be restored\n",
-				__func__, i);
-			cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus);
-			break;
-		}
+		pr_crit("%s: CPU %d can't be restored\n",
+			__func__, bL_switcher_cpu_pairing[cpu]);
+		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+				  &bL_switcher_removed_logical_cpus);
 	}
 
 	bL_switcher_restore_cpus();
diff --git a/arch/arm/configs/vexpress_bL_defconfig b/arch/arm/configs/vexpress_bL_defconfig
new file mode 100644
index 00000000000..0d18cbd3f7c
--- /dev/null
+++ b/arch/arm/configs/vexpress_bL_defconfig
@@ -0,0 +1,157 @@
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA is not set
+CONFIG_ARCH_VEXPRESS_TC2=y
+CONFIG_ARCH_VEXPRESS_DCSCB=y
+CONFIG_ARM_ERRATA_720789=y
+CONFIG_PL310_ERRATA_753970=y
+CONFIG_SMP=y
+CONFIG_HAVE_ARM_ARCH_TIMER=y
+CONFIG_BIG_LITTLE=y
+CONFIG_BL_SWITCHER=y
+CONFIG_BL_SWITCHER_DUMMY_IF=y
+CONFIG_NR_CPUS=8
+CONFIG_AEABI=y
+# CONFIG_OABI_COMPAT is not set
+CONFIG_HIGHMEM=y
+CONFIG_HIGHPTE=y
+# CONFIG_COMPACTION is not set
+# CONFIG_ATAGS is not set
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_ARM_APPENDED_DTB=y
+CONFIG_ARM_ATAG_DTB_COMPAT=y
+CONFIG_CMDLINE="console=ttyAMA0,38400"
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y
+CONFIG_CPU_IDLE=y
+CONFIG_VFP=y
+CONFIG_NEON=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_SCSI=y
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+CONFIG_SMSC911X=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_AMBAKMI=y
+CONFIG_LEGACY_PTY_COUNT=16
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_SENSORS_VEXPRESS=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_VEXPRESS=y
+CONFIG_FB=y
+CONFIG_FB_ARMCLCD=y
+CONFIG_FB_ARMHDLCD=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+# CONFIG_SND_DRIVERS is not set
+CONFIG_SND_ARMAACI=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_LOGITECH_DJ=m
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOGGER=y
+CONFIG_ANDROID_TIMED_GPIO=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_ANDROID_INTF_ALARM_DEV=y
+CONFIG_EXT4_FS=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_FTRACE is not set
+CONFIG_DEBUG_USER=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index d60e77d179a..ebf8d9872a6 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -47,6 +47,7 @@ bool bL_switcher_get_enabled(void);
 void bL_switcher_put_enabled(void);
 
 int bL_switcher_trace_trigger(void);
+int bL_switcher_get_logical_index(u32 mpidr);
 
 #else
 static inline int bL_switcher_register_notifier(struct notifier_block *nb)
@@ -62,6 +63,7 @@ static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
 static inline bool bL_switcher_get_enabled(void) { return false; }
 static inline void bL_switcher_put_enabled(void) { }
 static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
 #endif /* CONFIG_BL_SWITCHER */
 
 #endif
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 0cd7824ca76..a7eaad37497 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -13,7 +13,9 @@
 #define __ARM_PMU_H__
 
 #include <linux/interrupt.h>
+#include <linux/percpu.h>
 #include <linux/perf_event.h>
+#include <linux/types.h>
 
 /*
  * struct arm_pmu_platdata - ARM PMU platform data
@@ -71,6 +73,21 @@ struct cpupmu_regs {
 	u32 pmxevtcnt[8];
 };
 
+struct arm_cpu_pmu {
+	bool			valid;
+	bool			active;
+
+	u32			mpidr;
+	int			irq;
+
+	struct perf_event	*hw_events[ARMPMU_MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+	struct pmu_hw_events	cpu_hw_events;
+	struct cpupmu_regs	cpu_pmu_regs;
+
+	void			*logical_state;
+};
+
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
@@ -93,16 +110,24 @@ struct arm_pmu {
 	int		(*map_event)(struct perf_event *event);
 	void		(*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
 	void		(*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
+	void		(*cpu_init)(struct arm_pmu *, struct arm_cpu_pmu *);
 	int		num_events;
 	atomic_t	active_events;
 	struct mutex	reserve_mutex;
 	u64		max_period;
 	struct platform_device	*plat_device;
-	struct pmu_hw_events	*(*get_hw_events)(void);
+	struct pmu_hw_events	*(*get_hw_events)(struct arm_pmu *);
+
+	struct list_head	class_pmus_list;
+	struct arm_cpu_pmu __percpu *cpu_pmus;
 };
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
+#define for_each_pmu(pmu, head) list_for_each_entry(pmu, head, class_pmus_list)
+
+#define to_this_cpu_pmu(arm_pmu) this_cpu_ptr((arm_pmu)->cpu_pmus)
+
 extern const struct dev_pm_ops armpmu_dev_pm_ops;
 
 int armpmu_register(struct arm_pmu *armpmu, int type);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index d847c622a7b..3d753cc4aa0 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -205,7 +205,7 @@ static void
 armpmu_del(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
@@ -223,7 +223,7 @@ static int
 armpmu_add(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int err = 0;
@@ -467,8 +467,14 @@ static int armpmu_event_init(struct perf_event *event)
 static void armpmu_enable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu);
+	int enabled;
+	
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
+
+	BUG_ON(!hw_events->used_mask); /* TEMPORARY */
+	enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
 		armpmu->start(armpmu);
@@ -477,6 +483,10 @@ static void armpmu_enable(struct pmu *pmu)
 static void armpmu_disable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
+
 	armpmu->stop(armpmu);
 }
 
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 0b48a38e3cf..b3ae24f6afa 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,26 +19,40 @@
 #define pr_fmt(fmt) "CPU PMU: " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cpumask.h>
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
 #include <linux/of.h>
+#include <linux/percpu.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#include <asm/bL_switcher.h>
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
 #include <asm/pmu.h>
+#include <asm/smp_plat.h>
+#include <asm/topology.h>
 
-/* Set at runtime when we know what CPU type we are. */
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
+static LIST_HEAD(cpu_pmus_list);
 
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+#define cpu_for_each_pmu(pmu, cpu_pmu, cpu)				\
+	for_each_pmu(pmu, &cpu_pmus_list)				\
+		if (((cpu_pmu) = per_cpu_ptr((pmu)->cpu_pmus, cpu))->valid)
 
-static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+static struct arm_pmu *__cpu_find_any_pmu(unsigned int cpu)
+{
+	struct arm_pmu *pmu;
+	struct arm_cpu_pmu *cpu_pmu;
+
+	cpu_for_each_pmu(pmu, cpu_pmu, cpu)
+		return pmu;
+
+	return NULL;
+}
 
 /*
  * Despite the names, these two functions are CPU-specific and are used
@@ -46,7 +60,7 @@ static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
  */
 const char *perf_pmu_name(void)
 {
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	struct arm_pmu *pmu = __cpu_find_any_pmu(0);
 	if (!pmu)
 		return NULL;
 
@@ -56,7 +70,7 @@ EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	struct arm_pmu *pmu = __cpu_find_any_pmu(0);
 
 	if (!pmu)
 		return 0;
@@ -70,51 +84,73 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
 #include "perf_event_v6.c"
 #include "perf_event_v7.c"
 
-static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(struct arm_pmu *pmu)
 {
-	return &__get_cpu_var(cpu_hw_events);
+	return &this_cpu_ptr(pmu->cpu_pmus)->cpu_hw_events;
+}
+
+static int find_logical_cpu(u32 mpidr)
+{
+	int cpu = bL_switcher_get_logical_index(mpidr);
+
+	if (cpu != -EUNATCH)
+		return cpu;
+
+	return get_logical_index(mpidr);
 }
 
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+static void cpu_pmu_free_irq(struct arm_pmu *pmu)
 {
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	int cpu = -1;
+	int i;
+	int cpu;
+	struct arm_cpu_pmu *cpu_pmu;
+
+	for_each_possible_cpu(i) {
+		if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i)))
+			continue;
+
+		if (cpu_pmu->mpidr == -1)
+			continue;
 
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
+		cpu = find_logical_cpu(cpu_pmu->mpidr);
+		if (cpu < 0)
+			continue;
 
-	for (i = 0; i < irqs; ++i) {
-		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
-		if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+		if (!cpumask_test_and_clear_cpu(cpu, &pmu->active_irqs))
 			continue;
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, cpu_pmu);
+		if (cpu_pmu->irq >= 0)
+			free_irq(cpu_pmu->irq, pmu);
 	}
 }
 
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+static int cpu_pmu_request_irq(struct arm_pmu *pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	int cpu = -1;
+	int cpu;
+	struct arm_cpu_pmu *cpu_pmu;
 
-	if (!pmu_device)
-		return -ENODEV;
+	irqs = 0;
+	for_each_possible_cpu(i)
+		if (per_cpu_ptr(pmu->cpu_pmus, i))
+			++irqs;
 
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
 	if (irqs < 1) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < irqs; ++i) {
-		err = 0;
-		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
-		irq = platform_get_irq(pmu_device, i);
+	for_each_possible_cpu(i) {
+		if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i)))
+			continue;
+
+		irq = cpu_pmu->irq;
 		if (irq < 0)
 			continue;
 
+		cpu = find_logical_cpu(cpu_pmu->mpidr);
+		if (cpu < 0 || cpu != i)
+			continue;
+
 		/*
 		 * If we have a single PMU interrupt that we can't shift,
 		 * assume that we're running on a uniprocessor machine and
@@ -122,41 +158,51 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		 */
 		if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-				    irq, i);
+				    irq, cpu);
 			continue;
 		}
 
+		pr_debug("%s: requesting IRQ %d for CPU%d\n",
+			 pmu->name, irq, cpu);
+
 		err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu",
-				  cpu_pmu);
+				  pmu);
 		if (err) {
 			pr_err("unable to request IRQ%d for ARM PMU counters\n",
 				irq);
 			return err;
 		}
 
-		cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+		cpumask_set_cpu(cpu, &pmu->active_irqs);
 	}
 
 	return 0;
 }
 
-static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
+static void cpu_pmu_init(struct arm_pmu *pmu)
 {
 	int cpu;
-	for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
-		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-		events->events = per_cpu(hw_events, cpu);
-		events->used_mask = per_cpu(used_mask, cpu);
+	for_each_cpu_mask(cpu, pmu->valid_cpus) {
+		struct arm_cpu_pmu *cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, cpu);
+		struct pmu_hw_events *events = &cpu_pmu->cpu_hw_events;
+
+		events->events = cpu_pmu->hw_events;
+		events->used_mask = cpu_pmu->used_mask;
 		raw_spin_lock_init(&events->pmu_lock);
+
+		if (pmu->cpu_init)
+			pmu->cpu_init(pmu, cpu_pmu);
+
+		cpu_pmu->valid = true;
 	}
 
-	cpu_pmu->get_hw_events	= cpu_pmu_get_cpu_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
+	pmu->get_hw_events	= cpu_pmu_get_cpu_events;
+	pmu->request_irq	= cpu_pmu_request_irq;
+	pmu->free_irq		= cpu_pmu_free_irq;
 
 	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
+	if (pmu->reset)
+		on_each_cpu_mask(&pmu->valid_cpus, pmu->reset, pmu, 1);
 }
 
 /*
@@ -168,36 +214,42 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+	struct arm_pmu *pmu;
+	struct arm_cpu_pmu *cpu_pmu;
+	int ret = NOTIFY_DONE;
 
 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;
 
-	if (pmu && pmu->reset)
-		pmu->reset(pmu);
-	else
-		return NOTIFY_DONE;
+	cpu_for_each_pmu(pmu, cpu_pmu, (unsigned int)hcpu)
+		if (pmu->reset) {
+			pmu->reset(pmu);
+			ret = NOTIFY_OK;
+		}
 
-	return NOTIFY_OK;
+	return ret;
 }
 
 static int cpu_pmu_pm_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
 	int cpu = smp_processor_id();
-	struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
-	struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+	struct arm_pmu *pmu;
+	struct arm_cpu_pmu *cpu_pmu;
+	int ret = NOTIFY_DONE;
 
-	if (!pmu)
-		return NOTIFY_DONE;
+	cpu_for_each_pmu(pmu, cpu_pmu, cpu) {
+		struct cpupmu_regs *pmuregs = &cpu_pmu->cpu_pmu_regs;
 
-	if (action == CPU_PM_ENTER && pmu->save_regs) {
-		pmu->save_regs(pmu, pmuregs);
-	} else if (action == CPU_PM_EXIT && pmu->restore_regs) {
-		pmu->restore_regs(pmu, pmuregs);
+		if (action == CPU_PM_ENTER && pmu->save_regs)
+			pmu->save_regs(pmu, pmuregs);
+		else if (action == CPU_PM_EXIT && pmu->restore_regs)
+			pmu->restore_regs(pmu, pmuregs);
+
+		ret = NOTIFY_OK;
 	}
 
-	return NOTIFY_OK;
+	return ret;
 }
 
 static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
@@ -286,25 +338,100 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 	return ret;
 }
 
+static void cpu_pmu_free(struct arm_pmu *pmu)
+{
+	if (!pmu)
+		return;
+
+	free_percpu(pmu->cpu_pmus);
+	kfree(pmu);
+}
+
+/*
+ * HACK: Find a b.L switcher partner for CPU cpu on the specified cluster
+ * This information should be obtained from an interface provided by the
+ * Switcher itself, if possible.
+ */
+#ifdef CONFIG_BL_SWITCHER
+static int bL_get_partner(int cpu, int cluster)
+{
+	unsigned int i;
+
+
+	for_each_possible_cpu(i) {
+		if (cpu_topology[i].thread_id == cpu_topology[cpu].thread_id &&
+		    cpu_topology[i].core_id == cpu_topology[cpu].core_id &&
+		    cpu_topology[i].socket_id == cluster)
+			return i;
+	}
+
+	return -1; /* no partner found */
+}
+#else
+static int bL_get_partner(int __always_unused cpu, int __always_unused cluster)
+{
+	return -1;
+}
+#endif
+
+static int find_irq(struct platform_device *pdev,
+		    struct device_node *pmu_node,
+		    struct device_node *cluster_node,
+		    u32 mpidr)
+{
+	int irq = -1;
+	u32 cluster;
+	u32 core;
+	struct device_node *cores_node;
+	struct device_node *core_node = NULL;
+
+	if (of_property_read_u32(cluster_node, "reg", &cluster) ||
+	    cluster != MPIDR_AFFINITY_LEVEL(mpidr, 1))
+		goto error;
+
+	cores_node = of_get_child_by_name(cluster_node, "cores");
+	if (!cores_node)
+		goto error;
+
+	for_each_child_of_node(cores_node, core_node)
+		if (!of_property_read_u32(core_node, "reg", &core) &&
+		    core == MPIDR_AFFINITY_LEVEL(mpidr, 0))
+			break;
+
+	if (!core_node)
+		goto error;
+
+	irq = platform_get_irq(pdev, core);
+
+error:
+	of_node_put(core_node);
+	of_node_put(cores_node);
+	return irq;
+}
+
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
 	struct device_node *node = pdev->dev.of_node;
 	struct arm_pmu *pmu;
+	struct arm_cpu_pmu __percpu *cpu_pmus;
 	int ret = 0;
-	int cpu;
 
 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!");
-		return -ENOMEM;
-	}
+	if (!pmu)
+		goto error_nomem;
+
+	pmu->cpu_pmus = cpu_pmus = alloc_percpu(struct arm_cpu_pmu);
+	if (!cpu_pmus)
+		goto error_nomem;
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
 		struct device_node *ncluster;
 		int cluster = -1;
 		cpumask_t sibling_mask;
+		cpumask_t phys_sibling_mask;
+		unsigned int i;
 
 		ncluster = of_parse_phandle(node, "cluster", 0);
 		if (ncluster) {
@@ -315,11 +442,59 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 				cluster = be32_to_cpup(hwid);
 		}
 		/* set sibling mask to all cpu mask if socket is not specified */
-		if (cluster == -1 ||
+		/*
+		 * In a switcher kernel, we affine all PMUs to CPUs and
+		 * abstract the runtime presence/absence of PMUs at a lower
+		 * level.
+		 */
+		if (cluster == -1 || IS_ENABLED(CONFIG_BL_SWITCHER) ||
 			cluster_to_logical_mask(cluster, &sibling_mask))
-			cpumask_setall(&sibling_mask);
+			cpumask_copy(&sibling_mask, cpu_possible_mask);
 
-		smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+		if (bL_switcher_get_enabled())
+			/*
+			 * The switcher initialises late now, so it should not
+			 * have initialised yet:
+			 */
+			BUG();
+
+		cpumask_copy(&phys_sibling_mask, cpu_possible_mask);
+
+		/*
+		 * HACK: Deduce how the switcher will modify the topology
+		 * in order to fill in PMU<->CPU combinations which don't
+		 * make sense when the switcher is disabled.  Ideally, this
+		 * knowledge should come from the swithcer somehow.
+		 */
+		for_each_possible_cpu(i) {
+			int cpu = i;
+
+			per_cpu_ptr(cpu_pmus, i)->mpidr = -1;
+			per_cpu_ptr(cpu_pmus, i)->irq = -1;
+
+			if (cpu_topology[i].socket_id != cluster) {
+				cpumask_clear_cpu(i, &phys_sibling_mask);
+				cpu = bL_get_partner(i, cluster);
+			}
+
+			if (cpu == -1)
+				cpumask_clear_cpu(i, &sibling_mask);
+			else {
+				int irq = find_irq(pdev, node, ncluster,
+						   cpu_logical_map(cpu));
+				per_cpu_ptr(cpu_pmus, i)->mpidr =
+					cpu_logical_map(cpu);
+				per_cpu_ptr(cpu_pmus, i)->irq = irq;
+			}
+		}
+
+		/*
+		 * This relies on an MP view of the system to choose the right
+		 * CPU to run init_fn:
+		 */
+		smp_call_function_any(&phys_sibling_mask, init_fn, pmu, 1);
+
+		bL_switcher_put_enabled();
 
 		/* now set the valid_cpus after init */
 		cpumask_copy(&pmu->valid_cpus, &sibling_mask);
@@ -327,24 +502,26 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		ret = probe_current_pmu(pmu);
 	}
 
-	if (ret) {
-		pr_info("failed to probe PMU!");
-		goto out_free;
-	}
-
-	for_each_cpu_mask(cpu, pmu->valid_cpus)
-		per_cpu(cpu_pmu, cpu) = pmu;
+	if (ret)
+		goto error;
 
 	pmu->plat_device = pdev;
 	cpu_pmu_init(pmu);
 	ret = armpmu_register(pmu, -1);
 
-	if (!ret)
-		return 0;
+	if (ret)
+		goto error;
 
-out_free:
-	pr_info("failed to register PMU devices!");
-	kfree(pmu);
+	list_add(&pmu->class_pmus_list, &cpu_pmus_list);
+	goto out;
+
+error_nomem:
+	pr_warn("out of memory\n");
+	ret = -ENOMEM;
+error:
+	pr_warn("failed to register PMU device(s)!\n");
+	cpu_pmu_free(pmu);
+out:
 	return ret;
 }
 
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 03664b0e8fa..a191bdb9ebd 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -439,7 +439,7 @@ static void armv6pmu_enable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
@@ -477,7 +477,7 @@ armv6pmu_handle_irq(int irq_num,
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu);
 	struct pt_regs *regs;
 	int idx;
 
@@ -533,7 +533,7 @@ armv6pmu_handle_irq(int irq_num,
 static void armv6pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
@@ -545,7 +545,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu)
 static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
@@ -586,7 +586,7 @@ static void armv6pmu_disable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
@@ -621,7 +621,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
 	unsigned long val, mask, flags, evt = 0;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 654db5030c3..25762a548f2 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -18,6 +18,175 @@
 
 #ifdef CONFIG_CPU_V7
 
+struct armv7_pmu_logical_state {
+	u32	PMCR;
+	u32	PMCNTENSET;
+	u32	PMCNTENCLR;
+	u32	PMOVSR;
+	u32	PMSWINC;
+	u32	PMSELR;
+	u32	PMCEID0;
+	u32	PMCEID1;
+
+	u32	PMCCNTR;
+
+	u32	PMUSERENR;
+	u32	PMINTENSET;
+	u32	PMINTENCLR;
+	u32	PMOVSSET;
+
+	struct armv7_pmu_logical_cntr_state {
+		u32	PMXEVTYPER;
+		u32	PMXEVCNTR;
+	} cntrs[1]; /* we will grow this during allocation */
+};
+
+#define __v7_logical_state(cpupmu) \
+	((struct armv7_pmu_logical_state *)(cpupmu)->logical_state)
+
+#define __v7_logical_state_single(cpupmu, name) \
+	__v7_logical_state(cpupmu)->name
+#define __v7_logical_state_cntr(cpupmu, name) \
+	__v7_logical_state(cpupmu)->cntrs[__v7_logical_state(cpupmu)->PMSELR].name
+
+#define __def_v7_pmu_reg_W(kind, name, op1, Cm, op2)			\
+	static inline u32 __v7_pmu_write_physical_##name(u32 value)	\
+	{								\
+		asm volatile (						\
+			"mcr p15, " #op1 ", %0, c9, " #Cm ", " #op2	\
+			:: "r" (value)					\
+		);							\
+									\
+		return value;						\
+	}								\
+									\
+	static inline u32 __v7_pmu_write_logical_##name(		\
+		struct arm_cpu_pmu *cpupmu, u32 value)			\
+	{								\
+		__v7_logical_state_##kind(cpupmu, name) = value;	\
+		return value;						\
+	}
+
+#define __def_v7_pmu_reg_R(kind, name, op1, Cm, op2)			\
+	static inline u32 __v7_pmu_read_physical_##name(void)		\
+	{								\
+		u32 result;						\
+									\
+		asm volatile (						\
+			"mrc p15, " #op1 ", %0, c9, " #Cm ", " #op2	\
+			: "=r" (result)					\
+		);							\
+									\
+		return result;						\
+	}								\
+									\
+	static inline u32 __v7_pmu_read_logical_##name(			\
+		struct arm_cpu_pmu *cpupmu)				\
+	{								\
+		return __v7_logical_state_##kind(cpupmu, name);		\
+	}
+
+#define __def_v7_pmu_reg_WO(name, op1, Cm, op2)		\
+	__def_v7_pmu_reg_W(single, name, op1, Cm, op2)
+#define __def_v7_pmu_reg_RO(name, op1, Cm, op2)		\
+	__def_v7_pmu_reg_R(single, name, op1, Cm, op2)
+
+#define __def_v7_pmu_reg_RW(name, op1, Cm, op2) \
+	__def_v7_pmu_reg_WO(name, op1, Cm, op2)	\
+	__def_v7_pmu_reg_RO(name, op1, Cm, op2)
+
+#define __def_v7_pmu_cntr_WO(name, op1, Cm, op2)	\
+	__def_v7_pmu_reg_W(cntr, name, op1, Cm, op2)
+#define __def_v7_pmu_cntr_RO(name, op1, Cm, op2)	\
+	__def_v7_pmu_reg_R(cntr, name, op1, Cm, op2)
+
+#define __def_v7_pmu_cntr_RW(name, op1, Cm, op2)	\
+	__def_v7_pmu_cntr_WO(name, op1, Cm, op2)	\
+	__def_v7_pmu_cntr_RO(name, op1, Cm, op2)
+
+#define __def_v7_pmu_reg(name, prot, op1, Cm, op2)	\
+	__def_v7_pmu_reg_##prot(name, op1, Cm, op2)
+#define __def_v7_pmu_cntr(name, prot, op1, Cm, op2)	\
+	__def_v7_pmu_cntr_##prot(name, op1, Cm, op2)
+
+__def_v7_pmu_reg(PMCR,		RW, 0, c12, 0)
+__def_v7_pmu_reg(PMCNTENSET,	RW, 0, c12, 1)
+__def_v7_pmu_reg(PMCNTENCLR,	RW, 0, c12, 2)
+__def_v7_pmu_reg(PMOVSR,	RW, 0, c12, 3)
+__def_v7_pmu_reg(PMSWINC,	WO, 0, c12, 4)
+__def_v7_pmu_reg(PMSELR,	RW, 0, c12, 5)
+__def_v7_pmu_reg(PMCEID0,	RO, 0, c12, 6)
+__def_v7_pmu_reg(PMCEID1,	RO, 0, c12, 7)
+
+__def_v7_pmu_reg(PMCCNTR,	RW, 0, c13, 0)
+__def_v7_pmu_cntr(PMXEVTYPER,	RW, 0, c13, 1)
+__def_v7_pmu_cntr(PMXEVCNTR,	RW, 0, c13, 2)
+
+__def_v7_pmu_reg(PMUSERENR,	RW, 0, c14, 0)
+__def_v7_pmu_reg(PMINTENSET,	RW, 0, c14, 1)
+__def_v7_pmu_reg(PMINTENCLR,	RW, 0, c14, 2)
+__def_v7_pmu_reg(PMOVSSET,	RW, 0, c14, 3)
+
+#define __v7_pmu_write_physical(name, value) \
+	__v7_pmu_write_physical_##name(value)
+#define __v7_pmu_read_physical(name) \
+	__v7_pmu_read_physical_##name()
+
+#define __v7_pmu_write_logical(cpupmu, name, value) \
+	__v7_pmu_write_logical_##name(cpupmu, value)
+#define __v7_pmu_read_logical(cpupmu, name) \
+	__v7_pmu_read_logical_##name(cpupmu)
+
+#define __v7_pmu_write_reg(cpupmu, name, value) do {		\
+	if ((cpupmu)->active)					\
+		__v7_pmu_write_physical(name, value);		\
+	else							\
+		__v7_pmu_write_logical(cpupmu, name, value);	\
+} while(0)
+
+#define __v7_pmu_read_reg(cpupmu, name) (		\
+	(cpupmu)->active ?				\
+		__v7_pmu_read_physical(name) :		\
+		__v7_pmu_read_logical(cpupmu, name)	\
+)
+
+#define __v7_pmu_reg_set(cpupmu, name, logical_name, mask) do {		\
+	if ((cpupmu)->active)						\
+		__v7_pmu_write_physical(name, mask);			\
+	else {								\
+		u32 __value;						\
+		__value =__v7_pmu_read_logical(cpupmu, logical_name) | (mask); \
+		__v7_pmu_write_logical(cpupmu, logical_name, __value); \
+	}								\
+} while(0)
+
+#define __v7_pmu_reg_clr(cpupmu, name, logical_name, mask) do {		\
+	if ((cpupmu)->active)						\
+		__v7_pmu_write_physical(name, mask);			\
+	else {								\
+		u32 __value;						\
+		__value = __v7_pmu_read_logical(cpupmu, logical_name) & ~(mask); \
+		__v7_pmu_write_logical(cpupmu, logical_name, __value);	\
+	}								\
+} while(0)
+
+#define __v7_pmu_save_reg(cpupmu, name)					\
+	__v7_pmu_write_logical(cpupmu, name,				\
+				__v7_pmu_read_physical(name))
+#define __v7_pmu_restore_reg(cpupmu, name)				\
+	__v7_pmu_write_physical(name,					\
+				__v7_pmu_read_logical(cpupmu, name))
+static u32 read_mpidr(void)
+{
+	u32 result;
+
+	asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (result));
+
+	return result;
+}
+
+static void armv7pmu_reset(void *info);
+
 /*
  * Common ARMv7 event types
  *
@@ -784,18 +953,16 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 #define	ARMV7_EXCLUDE_USER	(1 << 30)
 #define	ARMV7_INCLUDE_HYP	(1 << 27)
 
-static inline u32 armv7_pmnc_read(void)
+static inline u32 armv7_pmnc_read(struct arm_cpu_pmu *cpupmu)
 {
-	u32 val;
-	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
-	return val;
+	return __v7_pmu_read_reg(cpupmu, PMCR);
 }
 
-static inline void armv7_pmnc_write(u32 val)
+static inline void armv7_pmnc_write(struct arm_cpu_pmu *cpupmu, u32 val)
 {
 	val &= ARMV7_PMNC_MASK;
 	isb();
-	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+	__v7_pmu_write_reg(cpupmu, PMCR, val);
 }
 
 static inline int armv7_pmnc_has_overflowed(u32 pmnc)
@@ -814,10 +981,10 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 	return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
 }
 
-static inline int armv7_pmnc_select_counter(int idx)
+static inline int armv7_pmnc_select_counter(struct arm_cpu_pmu *cpupmu, int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
+	__v7_pmu_write_reg(cpupmu, PMSELR, counter);
 	isb();
 
 	return idx;
@@ -825,185 +992,197 @@ static inline int armv7_pmnc_select_counter(int idx)
 
 static inline u32 armv7pmu_read_counter(struct perf_event *event)
 {
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	u32 value = 0;
 
-	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
+	if (!armv7_pmnc_counter_valid(pmu, idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
-		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if (armv7_pmnc_select_counter(idx) == idx)
-		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+		value = __v7_pmu_read_reg(cpupmu, PMCCNTR);
+	else if (armv7_pmnc_select_counter(cpupmu, idx) == idx)
+		value = __v7_pmu_read_reg(cpupmu, PMXEVCNTR);
 
 	return value;
 }
 
 static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
 {
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
-	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
+	if (!armv7_pmnc_counter_valid(pmu, idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if (armv7_pmnc_select_counter(idx) == idx)
-		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+		__v7_pmu_write_reg(cpupmu, PMCCNTR, value);
+	else if (armv7_pmnc_select_counter(cpupmu, idx) == idx)
+		__v7_pmu_write_reg(cpupmu, PMXEVCNTR, value);
 }
 
-static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
+static inline void armv7_pmnc_write_evtsel(struct arm_cpu_pmu *cpupmu, int idx, u32 val)
 {
-	if (armv7_pmnc_select_counter(idx) == idx) {
+	if (armv7_pmnc_select_counter(cpupmu, idx) == idx) {
 		val &= ARMV7_EVTYPE_MASK;
-		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+		__v7_pmu_write_reg(cpupmu, PMXEVTYPER, val);
 	}
 }
 
-static inline int armv7_pmnc_enable_counter(int idx)
+static inline int armv7_pmnc_enable_counter(struct arm_cpu_pmu *cpupmu, int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
+	__v7_pmu_reg_set(cpupmu, PMCNTENSET, PMCNTENSET, BIT(counter));
 	return idx;
 }
 
-static inline int armv7_pmnc_disable_counter(int idx)
+static inline int armv7_pmnc_disable_counter(struct arm_cpu_pmu *cpupmu, int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
+	__v7_pmu_reg_clr(cpupmu, PMCNTENCLR, PMCNTENSET, BIT(counter));
 	return idx;
 }
 
-static inline int armv7_pmnc_enable_intens(int idx)
+static inline int armv7_pmnc_enable_intens(struct arm_cpu_pmu *cpupmu, int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
+	__v7_pmu_reg_set(cpupmu, PMINTENSET, PMCNTENSET, BIT(counter));
 	return idx;
 }
 
-static inline int armv7_pmnc_disable_intens(int idx)
+static inline int armv7_pmnc_disable_intens(struct arm_cpu_pmu *cpupmu, int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
-	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
+	__v7_pmu_reg_clr(cpupmu, PMINTENCLR, PMINTENSET, BIT(counter));
 	isb();
 	/* Clear the overflow flag in case an interrupt is pending. */
-	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
+	__v7_pmu_reg_clr(cpupmu, PMOVSR, PMOVSR, BIT(counter));
 	isb();
 
 	return idx;
 }
 
-static inline u32 armv7_pmnc_getreset_flags(void)
+static inline u32 armv7_pmnc_getreset_flags(struct arm_cpu_pmu *cpupmu)
 {
 	u32 val;
 
 	/* Read */
-	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+	val = __v7_pmu_read_reg(cpupmu, PMOVSR);
 
 	/* Write to clear flags */
 	val &= ARMV7_FLAG_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+	__v7_pmu_reg_clr(cpupmu, PMOVSR, PMOVSR, val);
 
 	return val;
 }
 
 #ifdef DEBUG
-static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
+static void armv7_pmnc_dump_regs(struct arm_pmu *pmu)
 {
 	u32 val;
 	unsigned int cnt;
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
 
 	printk(KERN_INFO "PMNC registers dump:\n");
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
-	printk(KERN_INFO "PMNC  =0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
-	printk(KERN_INFO "CNTENS=0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
-	printk(KERN_INFO "INTENS=0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
-	printk(KERN_INFO "FLAGS =0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
-	printk(KERN_INFO "SELECT=0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
-	printk(KERN_INFO "CCNT  =0x%08x\n", val);
+	printk(KERN_INFO "PMNC  =0x%08x\n", __v7_pmu_read_reg(PMCR));
+	printk(KERN_INFO "CNTENS=0x%08x\n", __v7_pmu_read_reg(PMCNTENSET));
+	printk(KERN_INFO "INTENS=0x%08x\n", __v7_pmu_read_reg(PMINTENSET));
+	printk(KERN_INFO "FLAGS =0x%08x\n", __v7_pmu_read_reg(PMOVSR));
+	printk(KERN_INFO "SELECT=0x%08x\n", __v7_pmu_read_reg(PMSELR));
+	printk(KERN_INFO "CCNT  =0x%08x\n", __v7_pmu_read_reg(PMCCNTR));
 
 	for (cnt = ARMV7_IDX_COUNTER0;
-			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
-		armv7_pmnc_select_counter(cnt);
-		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+			cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) {
+		armv7_pmnc_select_counter(cpupmu, cnt);
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
-			ARMV7_IDX_TO_COUNTER(cnt), val);
-		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+			ARMV7_IDX_TO_COUNTER(cnt),
+			__v7_pmu_read_reg(cpupmu, PMXEVCNTR));
 		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
-			ARMV7_IDX_TO_COUNTER(cnt), val);
+			ARMV7_IDX_TO_COUNTER(cnt),
+			__v7_pmu_read_reg(cpupmu, PMXEVTYPER));
 	}
 }
 #endif
 
-static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+static void armv7pmu_save_regs(struct arm_pmu *pmu,
 					struct cpupmu_regs *regs)
 {
 	unsigned int cnt;
-	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
-	if (!(regs->pmc & ARMV7_PMNC_E))
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+
+	if (!cpupmu->active)
 		return;
 
-	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
-	asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
-	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
-	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+	if (!*cpupmu->cpu_hw_events.used_mask)
+		return;
+
+	if (!__v7_pmu_save_reg(cpupmu, PMCR) & ARMV7_PMNC_E)
+		return;
+
+	__v7_pmu_save_reg(cpupmu, PMCNTENSET);
+	__v7_pmu_save_reg(cpupmu, PMUSERENR);
+	__v7_pmu_save_reg(cpupmu, PMINTENSET);
+	__v7_pmu_save_reg(cpupmu, PMCCNTR);
+
 	for (cnt = ARMV7_IDX_COUNTER0;
-			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
-		armv7_pmnc_select_counter(cnt);
-		asm volatile("mrc p15, 0, %0, c9, c13, 1"
-					: "=r"(regs->pmxevttype[cnt]));
-		asm volatile("mrc p15, 0, %0, c9, c13, 2"
-					: "=r"(regs->pmxevtcnt[cnt]));
+			cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) {
+		armv7_pmnc_select_counter(cpupmu, cnt);
+		__v7_pmu_save_reg(cpupmu, PMSELR); /* mirror physical PMSELR */
+		__v7_pmu_save_reg(cpupmu, PMXEVTYPER);
+		__v7_pmu_save_reg(cpupmu, PMXEVCNTR);
 	}
 	return;
 }
 
-static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+/* armv7pmu_reset() must be called before calling this funtion */
+static void armv7pmu_restore_regs(struct arm_pmu *pmu,
 					struct cpupmu_regs *regs)
 {
 	unsigned int cnt;
-	if (!(regs->pmc & ARMV7_PMNC_E))
+	u32 pmcr;
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+
+	armv7pmu_reset(pmu);
+
+	if (!cpupmu->active)
 		return;
 
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
-	asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
-	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+	if (!*cpupmu->cpu_hw_events.used_mask)
+		return;
+
+	pmcr = __v7_pmu_read_logical(cpupmu, PMCR);
+	if (!pmcr & ARMV7_PMNC_E)
+		return;
+
+	__v7_pmu_restore_reg(cpupmu, PMCNTENSET);
+	__v7_pmu_restore_reg(cpupmu, PMUSERENR);
+	__v7_pmu_restore_reg(cpupmu, PMINTENSET);
+	__v7_pmu_restore_reg(cpupmu, PMCCNTR);
+
 	for (cnt = ARMV7_IDX_COUNTER0;
-			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
-		armv7_pmnc_select_counter(cnt);
-		asm volatile("mcr p15, 0, %0, c9, c13, 1"
-					: : "r"(regs->pmxevttype[cnt]));
-		asm volatile("mcr p15, 0, %0, c9, c13, 2"
-					: : "r"(regs->pmxevtcnt[cnt]));
+			cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) {
+		armv7_pmnc_select_counter(cpupmu, cnt);
+		__v7_pmu_save_reg(cpupmu, PMSELR); /* mirror physical PMSELR */
+		__v7_pmu_restore_reg(cpupmu, PMXEVTYPER);
+		__v7_pmu_restore_reg(cpupmu, PMXEVCNTR);
 	}
-	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+	__v7_pmu_write_reg(cpupmu, PMCR, pmcr);
 }
 
 static void armv7pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
 	struct hw_perf_event *hwc = &event->hw;
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+	struct pmu_hw_events *events = pmu->get_hw_events(pmu);
 	int idx = hwc->idx;
 
-	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+	if (!armv7_pmnc_counter_valid(pmu, idx)) {
 		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
 			smp_processor_id(), idx);
 		return;
@@ -1018,25 +1197,25 @@ static void armv7pmu_enable_event(struct perf_event *event)
 	/*
 	 * Disable counter
 	 */
-	armv7_pmnc_disable_counter(idx);
+	armv7_pmnc_disable_counter(cpupmu, idx);
 
 	/*
 	 * Set event (if destined for PMNx counters)
 	 * We only need to set the event for the cycle counter if we
 	 * have the ability to perform event filtering.
 	 */
-	if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
-		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+	if (pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(cpupmu, idx, hwc->config_base);
 
 	/*
 	 * Enable interrupt for this counter
 	 */
-	armv7_pmnc_enable_intens(idx);
+	armv7_pmnc_enable_intens(cpupmu, idx);
 
 	/*
 	 * Enable counter
 	 */
-	armv7_pmnc_enable_counter(idx);
+	armv7_pmnc_enable_counter(cpupmu,idx);
 
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
@@ -1045,11 +1224,12 @@ static void armv7pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags;
 	struct hw_perf_event *hwc = &event->hw;
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+	struct pmu_hw_events *events = pmu->get_hw_events(pmu);
 	int idx = hwc->idx;
 
-	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+	if (!armv7_pmnc_counter_valid(pmu, idx)) {
 		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
 			smp_processor_id(), idx);
 		return;
@@ -1063,12 +1243,12 @@ static void armv7pmu_disable_event(struct perf_event *event)
 	/*
 	 * Disable counter
 	 */
-	armv7_pmnc_disable_counter(idx);
+	armv7_pmnc_disable_counter(cpupmu, idx);
 
 	/*
 	 * Disable interrupt for this counter
 	 */
-	armv7_pmnc_disable_intens(idx);
+	armv7_pmnc_disable_intens(cpupmu, idx);
 
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
@@ -1077,15 +1257,23 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
 	u32 pmnc;
 	struct perf_sample_data data;
-	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct arm_pmu *pmu = (struct arm_pmu *)dev;
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+	struct pmu_hw_events *cpuc = pmu->get_hw_events(pmu);
 	struct pt_regs *regs;
 	int idx;
 
+	if (!cpupmu->active) {
+		pr_warn_ratelimited("%s: Spurious interrupt for inactive PMU %s: event counts will be wrong.\n",
+			__func__, pmu->name);
+		pr_warn_once("This is a known interrupt affinity bug in the b.L switcher perf support.\n");
+		return IRQ_NONE;
+	}
+
 	/*
 	 * Get and reset the IRQ flags
 	 */
-	pmnc = armv7_pmnc_getreset_flags();
+	pmnc = armv7_pmnc_getreset_flags(cpupmu);
 
 	/*
 	 * Did an overflow occur?
@@ -1098,7 +1286,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+	for (idx = 0; idx < pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
@@ -1120,7 +1308,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(event);
+			pmu->disable(event);
 	}
 
 	/*
@@ -1135,25 +1323,27 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void armv7pmu_start(struct arm_pmu *cpu_pmu)
+static void armv7pmu_start(struct arm_pmu *pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+	struct pmu_hw_events *events = pmu->get_hw_events(pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
-	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+	armv7_pmnc_write(cpupmu, armv7_pmnc_read(cpupmu) | ARMV7_PMNC_E);
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
+static void armv7pmu_stop(struct arm_pmu *pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+	struct pmu_hw_events *events = pmu->get_hw_events(pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
-	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+	armv7_pmnc_write(cpupmu, armv7_pmnc_read(cpupmu) & ~ARMV7_PMNC_E);
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
@@ -1212,19 +1402,33 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
 	return 0;
 }
 
+static bool check_active(struct arm_cpu_pmu *cpupmu)
+{
+	u32 mpidr = read_mpidr();
+
+	BUG_ON(!(mpidr & 0x80000000)); /* this won't work on uniprocessor */
+
+	cpupmu->active = ((mpidr ^ cpupmu->mpidr) & 0xFFFFFF) == 0;
+	return cpupmu->active;
+}
+
 static void armv7pmu_reset(void *info)
 {
-	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
-	u32 idx, nb_cnt = cpu_pmu->num_events;
+	struct arm_pmu *pmu = (struct arm_pmu *)info;
+	struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+	u32 idx, nb_cnt = pmu->num_events;
+
+	if (!check_active(cpupmu))
+		return;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
 	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
-		armv7_pmnc_disable_counter(idx);
-		armv7_pmnc_disable_intens(idx);
+		armv7_pmnc_disable_counter(cpupmu, idx);
+		armv7_pmnc_disable_intens(cpupmu, idx);
 	}
 
 	/* Initialize & Reset PMNC: C and P bits */
-	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+	armv7_pmnc_write(cpupmu, ARMV7_PMNC_P | ARMV7_PMNC_C);
 }
 
 static int armv7_a8_map_event(struct perf_event *event)
@@ -1257,8 +1461,13 @@ static int armv7_a7_map_event(struct perf_event *event)
 				&armv7_a7_perf_cache_map, 0xFF);
 }
 
+static void armv7pmu_cpu_init(struct arm_pmu *pmu,
+					struct arm_cpu_pmu *cpupmu);
+
 static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 {
+	struct arm_cpu_pmu *cpu_pmus = cpu_pmu->cpu_pmus;
+
 	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
 	cpu_pmu->enable		= armv7pmu_enable_event;
 	cpu_pmu->disable	= armv7pmu_disable_event;
@@ -1270,7 +1479,10 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->reset		= armv7pmu_reset;
 	cpu_pmu->save_regs	= armv7pmu_save_regs;
 	cpu_pmu->restore_regs	= armv7pmu_restore_regs;
+	cpu_pmu->cpu_init	= armv7pmu_cpu_init;
 	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	cpu_pmu->cpu_pmus = cpu_pmus;
 };
 
 static u32 armv7_read_num_pmnc_events(void)
@@ -1278,12 +1490,38 @@ static u32 armv7_read_num_pmnc_events(void)
 	u32 nb_cnt;
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+	nb_cnt = (__v7_pmu_read_physical(PMCR) >> ARMV7_PMNC_N_SHIFT);
+	nb_cnt &= ARMV7_PMNC_N_MASK;
 
 	/* Add the CPU cycles counter and return */
 	return nb_cnt + 1;
 }
 
+static void armv7pmu_cpu_init(struct arm_pmu *pmu,
+			      struct arm_cpu_pmu *cpupmu)
+{
+	size_t size = offsetof(struct armv7_pmu_logical_state, cntrs) +
+		pmu->num_events * sizeof(*__v7_logical_state(cpupmu));
+
+	cpupmu->logical_state = kzalloc(size, GFP_KERNEL);
+
+	/*
+	 * We need a proper error return mechanism for these init functions.
+	 * Until then, panicking the kernel is acceptable, since a failure
+	 * here is indicative of crippling memory contstraints which will
+	 * likely make the system unusable anyway:
+	 */
+	BUG_ON(!cpupmu->logical_state);
+
+	/*
+	 * Save the "read-only" ID registers in logical_state.
+	 * Because they are read-only, there are no direct accessors,
+	 * so poke them directly into the logical_state structure:
+	 */
+	__v7_logical_state(cpupmu)->PMCEID0 = __v7_pmu_read_physical(PMCEID0);
+	__v7_logical_state(cpupmu)->PMCEID1 = __v7_pmu_read_physical(PMCEID1);
+}
+
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 63990c42fac..cd670eafbb5 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -225,7 +225,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	unsigned long pmnc;
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu);
 	struct pt_regs *regs;
 	int idx;
 
@@ -285,7 +285,7 @@ static void xscale1pmu_enable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	switch (idx) {
@@ -321,7 +321,7 @@ static void xscale1pmu_disable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	switch (idx) {
@@ -374,7 +374,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
@@ -386,7 +386,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
 static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
@@ -572,7 +572,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu);
 	struct pt_regs *regs;
 	int idx;
 
@@ -626,7 +626,7 @@ static void xscale2pmu_enable_event(struct perf_event *event)
 	unsigned long flags, ien, evtsel;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
@@ -672,7 +672,7 @@ static void xscale2pmu_disable_event(struct perf_event *event)
 	unsigned long flags, ien, evtsel, of_flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
@@ -738,7 +738,7 @@ out:
 static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
@@ -750,7 +750,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
 static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc();
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index b110645bc56..eee1c5722fd 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -156,7 +156,8 @@ static u32 cci_pmu_get_max_counters(void)
 	return n_cnts + 1;
 }
 
-static struct pmu_hw_events *cci_pmu_get_hw_events(void)
+static struct pmu_hw_events *cci_pmu_get_hw_events(
+	struct arm_pmu *__always_unused pmu)
 {
 	return &cci_hw_events;
 }
@@ -233,7 +234,7 @@ static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
 static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev)
 {
 	struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
 	struct perf_sample_data data;
 	struct pt_regs *regs;
 	int idx;
@@ -285,7 +286,7 @@ static void cci_pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
 	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
 	struct hw_perf_event *hw_counter = &event->hw;
 	int idx = hw_counter->idx;
 
@@ -309,7 +310,7 @@ static void cci_pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags;
 	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
 	struct hw_perf_event *hw_counter = &event->hw;
 	int idx = hw_counter->idx;
 
@@ -330,7 +331,7 @@ static void cci_pmu_start(struct arm_pmu *cci_pmu)
 	u32 val;
 	unsigned long flags;
 	struct cci_drvdata *info = platform_get_drvdata(cci_pmu->plat_device);
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
@@ -346,7 +347,7 @@ static void cci_pmu_stop(struct arm_pmu *cci_pmu)
 	u32 val;
 	unsigned long flags;
 	struct cci_drvdata *info = platform_get_drvdata(cci_pmu->plat_device);
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c
index d3f8fb44cca..f35b70a0e68 100644
--- a/drivers/clk/versatile/clk-vexpress-spc.c
+++ b/drivers/clk/versatile/clk-vexpress-spc.c
@@ -102,7 +102,7 @@ struct clk *vexpress_clk_register_spc(const char *name, int cluster_id)
 #if defined(CONFIG_OF)
 void __init vexpress_clk_of_register_spc(void)
 {
-	char name[9] = "cluster";
+	char name[9] = "clusterX";
 	struct device_node *node = NULL;
 	struct clk *clk;
 	const u32 *val;
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 6ef9c7b0691..204812a5b92 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -2,6 +2,27 @@
 # ARM CPU Frequency scaling drivers
 #
 
+config ARM_BL_CPUFREQ
+	depends on EXPERIMENTAL
+	depends on BL_SWITCHER
+	tristate "Simple cpufreq interface for the ARM big.LITTLE switcher"
+	help
+	  Provides a simple cpufreq interface to control the ARM
+	  big.LITTLE switcher.
+
+	  Refer to Documentation/cpu-freq/cpufreq-arm-bl.txt for details.
+
+	  If unsure, say N.
+
+config ARM_BL_CPUFREQ_TEST
+	depends on ARM_BL_CPUFREQ
+	bool "Unit testing on cpufreq interface for the ARM big.LITTLE switcher"
+	help
+	  Make tests on the cpufreq interface for the ARM big.LITTLE
+	  switcher before loading it.
+
+	  If unsure, say N.
+
 config ARM_OMAP2PLUS_CPUFREQ
 	bool "TI OMAP2+"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 6148eac24fc..3bcc6a11589 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -44,7 +44,8 @@ obj-$(CONFIG_X86_INTEL_PSTATE)		+= intel_pstate.o
 
 ##################################################################################
 # ARM SoC drivers
-obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
+obj-$(CONFIG_UX500_SOC_DB8500)		+= db8500-cpufreq.o
+obj-$(CONFIG_ARM_BL_CPUFREQ)		+= arm-bl-cpufreq.o
 obj-$(CONFIG_ARM_S3C2416_CPUFREQ)	+= s3c2416-cpufreq.o
 obj-$(CONFIG_ARM_S3C64XX_CPUFREQ)	+= s3c64xx-cpufreq.o
 obj-$(CONFIG_ARM_S5PV210_CPUFREQ)	+= s5pv210-cpufreq.o
diff --git a/drivers/cpufreq/arm-bl-cpufreq.c b/drivers/cpufreq/arm-bl-cpufreq.c
new file mode 100644
index 00000000000..bc633f2d7b0
--- /dev/null
+++ b/drivers/cpufreq/arm-bl-cpufreq.c
@@ -0,0 +1,270 @@
+/*
+ * arm-bl-cpufreq.c: Simple cpufreq backend for the ARM big.LITTLE switcher
+ * Copyright (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define MODULE_NAME "arm-bl-cpufreq"
+#define __module_pr_fmt(prefix, fmt) MODULE_NAME ": " prefix fmt
+#define pr_fmt(fmt) __module_pr_fmt("", fmt)
+
+#include <linux/bug.h>
+#include <linux/cache.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/string.h>
+
+#include <asm/bL_switcher.h>
+
+#include "arm-bl-cpufreq.h"
+
+/*
+ * Include tests prototypes and includes
+ * We need to include this file a second time with ARM_BL_CPUFREQ_DEFINE_TESTS
+ * defined to include functions body.
+ */
+#include "arm-bl-cpufreq_tests.c"
+
+#define ARM_BL_CPUFREQ_DEFINE_TESTS
+#include "arm-bl-cpufreq_tests.c"
+
+/* Dummy frequencies representing the big and little clusters: */
+#define FREQ_BIG	1000000
+#define FREQ_LITTLE	 100000
+
+/*  Cluster numbers */
+#define CLUSTER_BIG	0
+#define CLUSTER_LITTLE	1
+
+/* Miscellaneous helpers */
+
+static unsigned int entry_to_freq(
+	struct cpufreq_frequency_table const *entry)
+{
+	return entry->frequency;
+}
+
+static unsigned int entry_to_cluster(
+	struct cpufreq_frequency_table const *entry)
+{
+	return entry->index;
+}
+
+static struct cpufreq_frequency_table const *find_entry_by_cluster(int cluster)
+{
+	unsigned int i;
+
+	for(i = 0; entry_to_freq(&bl_freqs[i]) != CPUFREQ_TABLE_END; i++)
+		if(entry_to_cluster(&bl_freqs[i]) == cluster)
+			return &bl_freqs[i];
+
+	WARN(1, pr_fmt("%s(): invalid cluster number %d, assuming 0\n"),
+		__func__, cluster);
+	return &bl_freqs[0];
+}
+
+static unsigned int cluster_to_freq(int cluster)
+{
+	return entry_to_freq(find_entry_by_cluster(cluster));
+}
+
+/*
+ * Functions to get the current status.
+ *
+ * Beware that the cluster for another CPU may change unexpectedly.
+ */
+
+static unsigned int get_local_cluster(void)
+{
+	unsigned int mpidr;
+	asm ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (mpidr));
+	return MPIDR_AFFINITY_LEVEL(mpidr, 1);
+}
+
+static void __get_current_cluster(void *_data)
+{
+	unsigned int *_cluster = _data;
+	*_cluster = get_local_cluster();
+}
+
+static int get_current_cluster(unsigned int cpu)
+{
+	unsigned int cluster = 0;
+	smp_call_function_single(cpu, __get_current_cluster, &cluster, 1);
+	return cluster;
+}
+
+static int get_current_cached_cluster(unsigned int cpu)
+{
+	return per_cpu(cpu_cur_cluster, cpu);
+}
+
+static unsigned int get_current_freq(unsigned int cpu)
+{
+	return cluster_to_freq(get_current_cluster(cpu));
+}
+
+/*
+ * Switch to the requested cluster.
+ */
+static void switch_to_entry(unsigned int cpu,
+			    struct cpufreq_frequency_table const *target)
+{
+	int old_cluster, new_cluster;
+	struct cpufreq_freqs freqs;
+
+	old_cluster = get_current_cached_cluster(cpu);
+	new_cluster = entry_to_cluster(target);
+
+	pr_debug("Switching to cluster %d on CPU %d\n", new_cluster, cpu);
+
+	if(new_cluster == old_cluster)
+		return;
+
+	freqs.cpu = cpu;
+	freqs.old = cluster_to_freq(old_cluster);
+	freqs.new = entry_to_freq(target);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	bL_switch_request(cpu, new_cluster);
+	per_cpu(cpu_cur_cluster, cpu) = new_cluster;
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+
+/* Cpufreq methods and module code */
+
+static int bl_cpufreq_init(struct cpufreq_policy *policy)
+{
+	unsigned int cluster, cpu = policy->cpu;
+	int err;
+
+	/*
+	 * Set CPU and policy min and max frequencies based on bl_freqs:
+	 */
+	err = cpufreq_frequency_table_cpuinfo(policy, bl_freqs);
+	if (err)
+		goto error;
+	/*
+	 * Publish frequency table so that it is available to governors
+	 * and sysfs:
+	 */
+	cpufreq_frequency_table_get_attr(bl_freqs, policy->cpu);
+
+	cluster = get_current_cluster(cpu);
+	per_cpu(cpu_cur_cluster, cpu) = cluster;
+
+	/*
+	 * Ideally, transition_latency should be calibrated here.
+	 */
+	policy->cpuinfo.transition_latency = BL_CPUFREQ_FAKE_LATENCY;
+	policy->cur = cluster_to_freq(cluster);
+	policy->shared_type = CPUFREQ_SHARED_TYPE_NONE;
+
+	pr_info("cpufreq initialised successfully\n");
+	return 0;
+
+error:
+	pr_warning("cpufreq initialisation failed (%d)\n", err);
+	return err;
+}
+
+static int bl_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, bl_freqs);
+}
+
+static int bl_cpufreq_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	int err;
+	int index;
+
+	err = cpufreq_frequency_table_target(policy, bl_freqs, target_freq,
+					     relation, &index);
+	if(err)
+		return err;
+
+	switch_to_entry(policy->cpu, &bl_freqs[index]);
+	return 0;
+}
+
+static unsigned int bl_cpufreq_get(unsigned int cpu)
+{
+	return get_current_freq(cpu);
+}
+
+static struct freq_attr *bl_cpufreq_attrs[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL
+};
+
+static struct cpufreq_driver __read_mostly bl_cpufreq_driver = {
+	.owner = THIS_MODULE,
+	.name = MODULE_NAME,
+
+	.init = bl_cpufreq_init,
+	.verify = bl_cpufreq_verify,
+	.target = bl_cpufreq_target,
+	.get = bl_cpufreq_get,
+	.attr = bl_cpufreq_attrs,
+	/* what else? */
+};
+
+static int __init bl_cpufreq_module_init(void)
+{
+	int err;
+
+	/* test_config :
+	 *	- 0: Do not run tests
+	 *	- 1: Run tests and then register cpufreq driver if tests passed
+	 */
+	if ((test_config > 0) && (pre_init_tests() != 0))
+		return -EINVAL;
+
+	err = cpufreq_register_driver(&bl_cpufreq_driver);
+	if(err)
+		pr_info("cpufreq backend driver registration failed (%d)\n",
+			err);
+	else {
+		pr_info("cpufreq backend driver registered.\n");
+
+		if ((test_config > 0) && (post_init_tests() != 0)) {
+			cpufreq_unregister_driver(&bl_cpufreq_driver);
+			return -EINVAL;
+		}
+	}
+
+	return err;
+}
+module_init(bl_cpufreq_module_init);
+
+static void __exit bl_cpufreq_module_exit(void)
+{
+	cpufreq_unregister_driver(&bl_cpufreq_driver);
+	pr_info("cpufreq backend driver unloaded.\n");
+}
+module_exit(bl_cpufreq_module_exit);
+
+
+MODULE_AUTHOR("Dave Martin");
+MODULE_DESCRIPTION("Simple cpufreq interface for the ARM big.LITTLE switcher");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/arm-bl-cpufreq.h b/drivers/cpufreq/arm-bl-cpufreq.h
new file mode 100644
index 00000000000..b13bb8c543d
--- /dev/null
+++ b/drivers/cpufreq/arm-bl-cpufreq.h
@@ -0,0 +1,37 @@
+#ifndef ARM_BL_CPUFREQ_H
+#define ARM_BL_CPUFREQ_H
+
+/* Dummy frequencies representing the big and little clusters: */
+#define FREQ_BIG	1000000
+#define FREQ_LITTLE	 100000
+
+/*  Cluster numbers */
+#define CLUSTER_BIG	0
+#define CLUSTER_LITTLE	1
+
+/*
+ * Switch latency advertised to cpufreq.  This value is bogus and will
+ * need to be properly calibrated when running on real hardware.
+ */
+#define BL_CPUFREQ_FAKE_LATENCY 1
+
+static struct cpufreq_frequency_table __read_mostly bl_freqs[] = {
+	{ CLUSTER_BIG,		FREQ_BIG		},
+	{ CLUSTER_LITTLE,	FREQ_LITTLE		},
+	{ 0,			CPUFREQ_TABLE_END	},
+};
+
+/* Cached current cluster for each CPU to save on IPIs */
+static DEFINE_PER_CPU(unsigned int, cpu_cur_cluster);
+
+static unsigned int entry_to_freq(struct cpufreq_frequency_table const *entry);
+static unsigned int entry_to_cluster(
+				struct cpufreq_frequency_table const *entry);
+static struct cpufreq_frequency_table const *find_entry_by_cluster(int cluster);
+static unsigned int cluster_to_freq(int cluster);
+static int get_current_cluster(unsigned int cpu);
+static int get_current_cached_cluster(unsigned int cpu);
+static unsigned int get_current_freq(unsigned int cpu);
+static unsigned int bl_cpufreq_get(unsigned int cpu);
+
+#endif /* ! ARM_BL_CPUFREQ_H */
diff --git a/drivers/cpufreq/arm-bl-cpufreq_tests.c b/drivers/cpufreq/arm-bl-cpufreq_tests.c
new file mode 100644
index 00000000000..da349e165f4
--- /dev/null
+++ b/drivers/cpufreq/arm-bl-cpufreq_tests.c
@@ -0,0 +1,652 @@
+/*
+ * arm-bl-cpufreqtests.c: Unit tests on the simple cpufreq backend for the
+ * ARM big.LITTLE switcher
+ * Copyright (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef ARM_BL_CPUFREQ_DEFINE_TESTS
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include "arm-bl-cpufreq.h"
+
+static short int test_config;
+
+static int pre_init_tests(void);
+static int post_init_tests(void);
+
+#else /* ! ARM_BL_CPUFREQ_DEFINE_TESTS */
+
+#ifdef CONFIG_ARM_BL_CPUFREQ_TEST
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) __module_pr_fmt("[test] ", fmt)
+
+#define SWITCH_DELAY 10
+#define SWITCH_TRANSITION_DELAY 200
+#define POST_INIT_TESTS_DELAY 100
+
+static DECLARE_WAIT_QUEUE_HEAD(test_wq);
+static int test_transition_count;
+unsigned int test_transition_freq;
+
+module_param(test_config, short, 1);
+MODULE_PARM_DESC(test_config, "Make tests before registering cpufreq driver. (0 : no tests, 1 : tests and registering driver (default))");
+
+static struct cpufreq_frequency_table const *get_other_entry(
+				struct cpufreq_frequency_table const *entry)
+{
+	if (entry_to_cluster(entry) == CLUSTER_BIG)
+		return find_entry_by_cluster(CLUSTER_LITTLE);
+	else
+		return find_entry_by_cluster(CLUSTER_BIG);
+}
+
+static int test_cpufreq_frequency_table(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	struct cpufreq_frequency_table const *entry;
+
+	/* Get big and little cpufreq_frequency_table entries and check
+	 * entry_to_freq() and entry_to_cluster() return corresponding
+	 * frequencies and cluster id.
+	 */
+	entry = find_entry_by_cluster(CLUSTER_BIG);
+
+	++nTest;
+	if (entry_to_freq(entry) != FREQ_BIG) {
+		testResult = 0;
+		++failCount;
+	} else
+		testResult = 1;
+	pr_info("name=pre-init/frequency_table/%d:entry_to_freq(big) result=%s\n",
+					nTest, (testResult ? "PASS" : "FAIL"));
+
+	++nTest;
+	if (entry_to_cluster(entry) != CLUSTER_BIG) {
+		testResult = 0;
+		++failCount;
+	} else
+		testResult = 1;
+	pr_info("name=pre-init/frequency_table/%d:entry_to_cluster(big) result=%s\n",
+					nTest, (testResult ? "PASS" : "FAIL"));
+
+	entry = find_entry_by_cluster(CLUSTER_LITTLE);
+
+	++nTest;
+	if (entry_to_freq(entry) != FREQ_LITTLE) {
+		testResult = 0;
+		++failCount;
+	} else
+		testResult = 1;
+	pr_info("name=pre-init/frequency_table/%d:entry_to_freq(little) result=%s\n",
+					nTest, (testResult ? "PASS" : "FAIL"));
+
+	++nTest;
+	if (entry_to_cluster(entry) != CLUSTER_LITTLE) {
+		testResult = 0;
+		++failCount;
+	} else
+		testResult = 1;
+	pr_info("name=pre-init/frequency_table/%d:entry_to_cluster(little) result=%s\n",
+					nTest, (testResult ? "PASS" : "FAIL"));
+
+	pr_info("name=pre-init/frequency_table run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int test_cluster_to_freq(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+
+	/* Check if test_cluster_to_freq() result is consistent, ie :
+	 *	- CLUSTER_BIG => FREQ_BIG
+	 *	- CLUSTER_LITTLE => FREQ_LITTLE
+	 */
+	++nTest;
+	if (cluster_to_freq(CLUSTER_BIG) != FREQ_BIG) {
+		testResult = 0;
+		++failCount;
+	} else
+		testResult = 1;
+	pr_info("name=pre-init/cluster_to_freq/%d:cluster_to_freq(big) result=%s\n",
+					nTest, (testResult ? "PASS" : "FAIL"));
+
+	++nTest;
+	if (cluster_to_freq(CLUSTER_LITTLE) != FREQ_LITTLE) {
+		testResult = 0;
+		++failCount;
+	} else
+		testResult = 1;
+	pr_info("name=pre-init/cluster_to_freq/%d:cluster_to_freq(little) result=%s\n",
+					nTest, (testResult ? "PASS" : "FAIL"));
+
+	pr_info("name=pre-init/cluster_to_freq run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int test_get_current_cluster(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	unsigned int cluster, cpu;
+
+	/* Check if get_current_cluster() return a consistent value, ie
+	 * CLUSTER_BIG or CLUSTER_LITTLE
+	 */
+	for_each_cpu(cpu, cpu_present_mask) {
+		cluster = get_current_cluster(cpu);
+		++nTest;
+		if ((cluster != CLUSTER_BIG) && (cluster != CLUSTER_LITTLE)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=pre-init/get_current_cluster/%d:get_current_cluster(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+	}
+
+	pr_info("name=pre-init/get_current_cluster run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int test_bl_cpufreq_get(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	unsigned int cpu;
+	struct cpufreq_frequency_table const *other_entry = NULL;
+	struct cpufreq_frequency_table const *origin_entry = NULL;
+	struct cpufreq_policy *policy = NULL;
+
+	/*
+	 * Check bl_cpufreq_get() return value : for all cores value has to be
+	 * the frequency of origin_entry
+	 */
+	for_each_cpu(cpu, cpu_present_mask) {
+		policy = cpufreq_cpu_get(cpu);
+		origin_entry = find_entry_by_cluster(get_current_cluster(cpu));
+		other_entry = get_other_entry(origin_entry);
+
+		++nTest;
+		if (bl_cpufreq_get(cpu) != entry_to_freq(origin_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/bl_cpufreq_get/%d:origin(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/*
+		 * Switch to "other" cluster, ie cluster not used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(other_entry),
+							CPUFREQ_RELATION_H);
+
+		++nTest;
+		if (bl_cpufreq_get(cpu) != entry_to_freq(other_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/bl_cpufreq_get/%d:other(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/*
+		 * Switch back to "origin" cluster, ie cluster used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(origin_entry),
+							CPUFREQ_RELATION_H);
+		cpufreq_cpu_put(policy);
+	}
+
+	pr_info("name=post-init/bl_cpufreq_get run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int test_get_current_freq(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	unsigned int cpu;
+	struct cpufreq_frequency_table const *other_entry = NULL;
+	struct cpufreq_frequency_table const *origin_entry = NULL;
+	struct cpufreq_policy *policy = NULL;
+
+	/*
+	 * Check if get_current_freq() return a consistent value, ie
+	 * FREQ_BIG while on big cluster and FREQ_LITTLE on little cluster
+	 */
+	for_each_cpu(cpu, cpu_present_mask) {
+		policy = cpufreq_cpu_get(cpu);
+		origin_entry = find_entry_by_cluster(get_current_cluster(cpu));
+		other_entry = get_other_entry(origin_entry);
+
+		++nTest;
+		if (get_current_freq(cpu) != entry_to_freq(origin_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/get_current_freq/%d:origin(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/*
+		 * Switch to "other" cluster, ie cluster not used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(other_entry),
+							CPUFREQ_RELATION_H);
+
+		++nTest;
+		if (get_current_freq(cpu) != entry_to_freq(other_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/get_current_freq/%d:other(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/*
+		 * Switch back to "origin" cluster, ie cluster used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(origin_entry),
+							CPUFREQ_RELATION_H);
+		cpufreq_cpu_put(policy);
+	}
+
+	pr_info("name=post-init/get_current_freq run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int test_get_current_cached_cluster(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	unsigned int cpu, cluster;
+	struct cpufreq_frequency_table const *other_entry = NULL;
+	struct cpufreq_frequency_table const *origin_entry = NULL;
+	struct cpufreq_policy *policy = NULL;
+
+	/*
+	 * Check if get_current_cached_cluster() return a consistent value, ie
+	 * CLUSTER_BIG while on big cluster and CLUSTER_LITTLE on little cluster
+	 */
+	for_each_cpu(cpu, cpu_present_mask) {
+		policy = cpufreq_cpu_get(cpu);
+		origin_entry = find_entry_by_cluster(get_current_cluster(cpu));
+		other_entry = get_other_entry(origin_entry);
+
+		++nTest;
+		cluster = get_current_cached_cluster(cpu);
+		if (cluster != entry_to_cluster(origin_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/get_current_cached_cluster/%d:origin(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/*
+		 * Switch to "other" cluster, ie cluster not used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(other_entry),
+							CPUFREQ_RELATION_H);
+
+		++nTest;
+		cluster = get_current_cached_cluster(cpu);
+		if (cluster != entry_to_cluster(other_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/get_current_cached_cluster/%d:other(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/*
+		 * Switch back to "origin" cluster, ie cluster used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(origin_entry),
+							CPUFREQ_RELATION_H);
+		cpufreq_cpu_put(policy);
+	}
+
+	pr_info("name=post-init/get_current_cached_cluster run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int test_cpufreq_driver_target(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	unsigned int cpu;
+	struct cpufreq_frequency_table const *other_entry = NULL;
+	struct cpufreq_frequency_table const *origin_entry = NULL;
+	struct cpufreq_policy *policy = NULL;
+
+	/*
+	 * Try to switch between cluster and check if switch was performed with
+	 * success
+	 */
+	for_each_cpu(cpu, cpu_present_mask) {
+		policy = cpufreq_cpu_get(cpu);
+		origin_entry = find_entry_by_cluster(get_current_cluster(cpu));
+		other_entry = get_other_entry(origin_entry);
+
+		/* Switch to "other" cluster, ie cluster not used at module
+		 * loading time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(other_entry),
+							CPUFREQ_RELATION_H);
+
+		/*
+		 * Give the hardware some time to switch between clusters
+		 */
+		mdelay(SWITCH_DELAY);
+
+		++nTest;
+		if (get_current_cluster(cpu) != entry_to_cluster(other_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/cpufreq_driver_target/%d:other(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/* Switch again to "other" cluster
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(other_entry),
+							CPUFREQ_RELATION_H);
+		/*
+		 * Give the hardware some time to switch between clusters
+		 */
+		mdelay(SWITCH_DELAY);
+
+		++nTest;
+		if (get_current_cluster(cpu) != entry_to_cluster(other_entry)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/cpufreq_driver_target/%d:otherAgain(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/* Switch back to "origin" cluster, ie cluster used at module loading
+		 * time
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(origin_entry),
+							CPUFREQ_RELATION_H);
+		/*
+		 * Give the hardware some time to switch between clusters
+		 */
+		mdelay(SWITCH_DELAY);
+
+		++nTest;
+		if (get_current_cluster(cpu) != entry_to_cluster(origin_entry))
+		{
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/cpufreq_driver_target/%d:origin(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/* Switch again to "origin" cluster
+		 */
+		cpufreq_driver_target(policy, entry_to_freq(origin_entry),
+							CPUFREQ_RELATION_H);
+		/*
+		 * Give the hardware some time to switch between clusters
+		 */
+		mdelay(SWITCH_DELAY);
+
+		++nTest;
+		if (get_current_cluster(cpu) != entry_to_cluster(origin_entry))
+		{
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/cpufreq_driver_target/%d:originAgain(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		cpufreq_cpu_put(policy);
+	}
+
+	pr_info("name=post-init/cpufreq_driver_target run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+/* Check that new frequency is expected frequency, increment count and wake up
+ * test function.
+ */
+static int test_arm_bl_cpufreq_notifier(struct notifier_block *nb,
+						unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+
+	if (freq->new != test_transition_freq)
+		test_transition_freq = -1;
+
+	++test_transition_count;
+
+	wake_up(&test_wq);
+
+	return 0;
+}
+static struct notifier_block test_arm_bl_cpufreq_notifier_block = {
+	.notifier_call  = test_arm_bl_cpufreq_notifier
+};
+
+static int test_transitions(void)
+{
+	int nTest = 0, failCount = 0, testResult = 0;
+	unsigned int cpu, origin_freq, other_freq;
+	struct cpufreq_frequency_table const *other_entry = NULL;
+	struct cpufreq_frequency_table const *origin_entry = NULL;
+	struct cpufreq_policy *policy = NULL;
+
+	/*
+	 * register test_arm_bl_cpufreq_notifier_block as notifier :
+	 * test_arm_bl_cpufreq_notifier_block will be called on cluster
+	 * change and increment transition_count
+	 */
+	cpufreq_register_notifier(&test_arm_bl_cpufreq_notifier_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+
+	/*
+	 * Switch between cluster and check if notifications are received
+	 */
+	for_each_cpu(cpu, cpu_present_mask) {
+		policy = cpufreq_cpu_get(cpu);
+		origin_entry = find_entry_by_cluster(get_current_cluster(cpu));
+		other_entry = get_other_entry(origin_entry);
+		origin_freq = entry_to_freq(origin_entry);
+		other_freq = entry_to_freq(other_entry);
+
+		/* Switch on little cluster and check notification
+		 */
+		++nTest;
+		test_transition_count = 0;
+		test_transition_freq = other_freq;
+		cpufreq_driver_target(policy, other_freq, CPUFREQ_RELATION_H);
+		wait_event_timeout(test_wq, (test_transition_count == 2),
+				msecs_to_jiffies(SWITCH_TRANSITION_DELAY));
+
+		if ((test_transition_count != 2)
+				|| (test_transition_freq != other_freq)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/transitions/%d:other(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		/* Switch on big cluster and check notification
+		 */
+		++nTest;
+		test_transition_count = 0;
+		test_transition_freq = origin_freq;
+		cpufreq_driver_target(policy, origin_freq, CPUFREQ_RELATION_H);
+		wait_event_timeout(test_wq, (test_transition_count == 2),
+				msecs_to_jiffies(SWITCH_TRANSITION_DELAY));
+
+		if ((test_transition_count != 2)
+				|| (test_transition_freq != origin_freq)) {
+			testResult = 0;
+			++failCount;
+		} else
+			testResult = 1;
+		pr_info("name=post-init/transitions/%d:origin(%u) result=%s\n",
+				nTest, cpu, (testResult ? "PASS" : "FAIL"));
+
+		cpufreq_cpu_put(policy);
+	}
+
+	cpufreq_unregister_notifier(&test_arm_bl_cpufreq_notifier_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+
+	pr_info("name=post-init/transitions run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int pre_init_tests(void)
+{
+	int nTest = 0, failCount = 0;
+
+	pr_info("Begin pre-init tests");
+
+	++nTest;
+	if (test_cpufreq_frequency_table() < 0)
+		++failCount;
+
+	++nTest;
+	if (test_cluster_to_freq() < 0)
+		++failCount;
+
+	++nTest;
+	if (test_get_current_cluster() < 0)
+		++failCount;
+
+	pr_info("name=pre-init run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+static int post_init_tests(void)
+{
+	/*
+	 * Run all post-init tests
+	 *
+	 * We wait POST_INIT_TESTS_DELAY ms between tests to be sure system is
+	 * in a stable state before running a new test.
+	 */
+	int nTest = 0, failCount = 0;
+
+
+	mdelay(POST_INIT_TESTS_DELAY);
+	++nTest;
+	if (test_cpufreq_driver_target() < 0)
+		++failCount;
+
+	mdelay(POST_INIT_TESTS_DELAY);
+	++nTest;
+	if (test_transitions() < 0)
+		++failCount;
+
+	mdelay(POST_INIT_TESTS_DELAY);
+	++nTest;
+	if (test_get_current_freq() < 0)
+		++failCount;
+
+	mdelay(POST_INIT_TESTS_DELAY);
+	++nTest;
+	if (test_bl_cpufreq_get() < 0)
+		++failCount;
+
+	mdelay(POST_INIT_TESTS_DELAY);
+	++nTest;
+	if (test_get_current_cached_cluster() < 0)
+		++failCount;
+
+	pr_info("name=post-init run=%d result=%s pass=%d fail=%d\n",
+				nTest, (failCount == 0 ? "PASS" : "FAIL"),
+				(nTest - failCount), failCount);
+	if (failCount != 0)
+		return -1;
+
+	return 0;
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) __module_pr_fmt("", fmt)
+#else /* ! CONFIG_ARM_BL_CPUFREQ_TEST */
+
+static int pre_init_tests(void) { return 0; }
+static int post_init_tests(void) { return 0; }
+
+#endif /* CONFIG_ARM_BL_CPUFREQ_TEST */
+#endif /* ARM_BL_CPUFREQ_DEFINE_TESTS */
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index b5601fcd79e..7858957b9e6 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -23,18 +23,43 @@
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/export.h>
+#include <linux/mutex.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <asm/bL_switcher.h>
 #include <asm/topology.h>
 #include "arm_big_little.h"
 
+#ifdef CONFIG_BL_SWITCHER
+static bool bL_switching_enabled;
+#define is_bL_switching_enabled()		bL_switching_enabled
+#define set_switching_enabled(x) 		(bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled()		false
+#define set_switching_enabled(x) 		do { } while (0)
+#endif
+
+#define A15_CLUSTER	0
+#define A7_CLUSTER	1
 #define MAX_CLUSTERS	2
 
+#define ACTUAL_FREQ(cluster, freq)	((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq)	((cluster == A7_CLUSTER) ? freq >> 1 : freq)
+
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS];
-static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static int freq_table_cnt[MAX_CLUSTERS];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+
+static unsigned int clk_big_min;	/* (Big) clock frequencies */
+static unsigned int clk_little_max;	/* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
 
 /*
  * Functions to get the current status.
@@ -43,14 +68,113 @@ static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
  */
 static int cpu_to_cluster(int cpu)
 {
-	return topology_physical_package_id(cpu);
+	return is_bL_switching_enabled() ? MAX_CLUSTERS:
+		topology_physical_package_id(cpu);
 }
 
-static unsigned int bL_cpufreq_get(unsigned int cpu)
+static unsigned int find_cluster_maxfreq(int cluster)
 {
-	u32 cur_cluster = cpu_to_cluster(cpu);
+	int j;
+	u32 max_freq = 0, cpu_freq;
+
+	for_each_online_cpu(j) {
+		cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+		if ((cluster == per_cpu(physical_cluster, j)) &&
+				(max_freq < cpu_freq))
+			max_freq = cpu_freq;
+	}
+
+	pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
+			max_freq);
+
+	return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
+{
+	u32 cur_cluster = per_cpu(physical_cluster, cpu);
+	u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+	/* For switcher we use virtual A15 clock rates */
+	if (is_bL_switching_enabled())
+		rate = VIRT_FREQ(cur_cluster, rate);
+
+	pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
+			cur_cluster, rate);
+
+	return rate;
+}
+
+static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
+{
+	pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq, cpu));
+
+	return per_cpu(cpu_last_req_freq, cpu);
+}
+
+static unsigned int
+bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
+{
+	u32 new_rate, prev_rate;
+	int ret;
+
+	mutex_lock(&cluster_lock[new_cluster]);
+
+	prev_rate = per_cpu(cpu_last_req_freq, cpu);
+	per_cpu(cpu_last_req_freq, cpu) = rate;
+	per_cpu(physical_cluster, cpu) = new_cluster;
+
+	if (is_bL_switching_enabled()) {
+		new_rate = find_cluster_maxfreq(new_cluster);
+		new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+	} else {
+		new_rate = rate;
+	}
+
+	pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
+			__func__, cpu, old_cluster, new_cluster, new_rate);
 
-	return clk_get_rate(clk[cur_cluster]) / 1000;
+	ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+	if (WARN_ON(ret)) {
+		pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
+				new_cluster);
+		per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+		per_cpu(physical_cluster, cpu) = old_cluster;
+
+		mutex_unlock(&cluster_lock[new_cluster]);
+
+		return ret;
+	}
+
+	mutex_unlock(&cluster_lock[new_cluster]);
+
+	/* Recalc freq for old cluster when switching clusters */
+	if (old_cluster != new_cluster) {
+		pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
+				__func__, cpu, old_cluster, new_cluster);
+
+		/* Switch cluster */
+		bL_switch_request(cpu, new_cluster);
+
+		mutex_lock(&cluster_lock[old_cluster]);
+
+		/* Set freq of old cluster if there are cpus left on it */
+		new_rate = find_cluster_maxfreq(old_cluster);
+		new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+		if (new_rate) {
+			pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
+					__func__, old_cluster, new_rate);
+
+			if (clk_set_rate(clk[old_cluster], new_rate * 1000))
+				pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+						__func__, ret, old_cluster);
+		}
+		mutex_unlock(&cluster_lock[old_cluster]);
+	}
+
+	return 0;
 }
 
 /* Validate policy frequency range */
@@ -67,21 +191,22 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 		unsigned int target_freq, unsigned int relation)
 {
 	struct cpufreq_freqs freqs;
-	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster,
+	    actual_cluster;
 	int ret = 0;
 
 	/* ASSUMPTION: The cpu can't be hotplugged in this function */
-	cur_cluster = cpu_to_cluster(policy->cpu);
+	cur_cluster = cpu_to_cluster(cpu);
+	new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
 
-	freqs.old = bL_cpufreq_get(policy->cpu);
+	freqs.cpu = cpu;
+	freqs.old = bL_cpufreq_get_rate(cpu);
 
 	/* Determine valid target frequency using freq_table */
 	cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
 			target_freq, relation, &freq_tab_idx);
 	freqs.new = freq_table[cur_cluster][freq_tab_idx].frequency;
 
-	freqs.cpu = policy->cpu;
-
 	pr_debug("%s: cpu: %d, cluster: %d, oldfreq: %d, target freq: %d, new freq: %d\n",
 			__func__, cpu, cur_cluster, freqs.old, target_freq,
 			freqs.new);
@@ -89,14 +214,22 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
+	if (is_bL_switching_enabled()) {
+		if ((actual_cluster == A15_CLUSTER) &&
+				(freqs.new < clk_big_min)) {
+			new_cluster = A7_CLUSTER;
+		} else if ((actual_cluster == A7_CLUSTER) &&
+				(freqs.new > clk_little_max)) {
+			new_cluster = A15_CLUSTER;
+		}
+	}
+
 	for_each_cpu(freqs.cpu, policy->cpus)
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
-	if (ret) {
-		pr_err("clk_set_rate failed: %d\n", ret);
+	ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new);
+	if (ret)
 		return ret;
-	}
 
 	policy->cur = freqs.new;
 
@@ -106,6 +239,28 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	return ret;
 }
 
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t min_freq = ~0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency < min_freq)
+			min_freq = table[i].frequency;
+	return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t max_freq = 0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency > max_freq)
+			max_freq = table[i].frequency;
+	return max_freq;
+}
+
 /* translate the integer array into cpufreq_frequency_table entries */
 struct cpufreq_frequency_table *
 arm_bL_copy_table_from_array(unsigned int *table, int count)
@@ -141,7 +296,41 @@ void arm_bL_free_freq_table(u32 cluster)
 }
 EXPORT_SYMBOL_GPL(arm_bL_free_freq_table);
 
-static void put_cluster_clk_and_freq_table(u32 cluster)
+static int merge_cluster_tables(void)
+{
+	int i, j, k = 0, count = 1;
+	struct cpufreq_frequency_table *table;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		count += freq_table_cnt[i];
+
+	table = kzalloc(sizeof(*table) * count, GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	freq_table[MAX_CLUSTERS] = table;
+
+	/* Add in reverse order to get freqs in increasing order */
+	for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
+		for (j = 0; j < freq_table_cnt[i]; j++) {
+			table[k].index = k;
+			table[k].frequency = VIRT_FREQ(i,
+					freq_table[i][j].frequency);
+			pr_debug("%s: index: %d, freq: %d\n", __func__, k,
+					table[k].frequency);
+			k++;
+		}
+	}
+
+	table[k].index = k;
+	table[k].frequency = CPUFREQ_TABLE_END;
+
+	pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
+
+	return 0;
+}
+
+static void _put_cluster_clk_and_freq_table(u32 cluster)
 {
 	if (!atomic_dec_return(&cluster_usage[cluster])) {
 		clk_put(clk[cluster]);
@@ -152,9 +341,26 @@ static void put_cluster_clk_and_freq_table(u32 cluster)
 	}
 }
 
-static int get_cluster_clk_and_freq_table(u32 cluster)
+static void put_cluster_clk_and_freq_table(u32 cluster)
 {
-	char name[9] = "cluster";
+	int i;
+
+	if (cluster < MAX_CLUSTERS)
+		return _put_cluster_clk_and_freq_table(cluster);
+
+	if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS]))
+		return;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		_put_cluster_clk_and_freq_table(i);
+
+	/* free virtual table */
+	arm_bL_free_freq_table(MAX_CLUSTERS);
+}
+
+static int _get_cluster_clk_and_freq_table(u32 cluster)
+{
+	char name[9] = "clusterX";
 	int count;
 
 	if (atomic_inc_return(&cluster_usage[cluster]) != 1)
@@ -164,6 +370,8 @@ static int get_cluster_clk_and_freq_table(u32 cluster)
 	if (!freq_table[cluster])
 		goto atomic_dec;
 
+	freq_table_cnt[cluster] = count;
+
 	name[7] = cluster + '0';
 	clk[cluster] = clk_get(NULL, name);
 	if (!IS_ERR_OR_NULL(clk[cluster])) {
@@ -181,6 +389,48 @@ atomic_dec:
 	return -ENODATA;
 }
 
+static int get_cluster_clk_and_freq_table(u32 cluster)
+{
+	int i, ret;
+
+	if (cluster < MAX_CLUSTERS)
+		return _get_cluster_clk_and_freq_table(cluster);
+
+	if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1)
+		return 0;
+
+	/*
+	 * Get data for all clusters and fill virtual cluster with a merge of
+	 * both
+	 */
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		ret = _get_cluster_clk_and_freq_table(i);
+		if (ret)
+			goto put_clusters;
+	}
+
+	ret = merge_cluster_tables();
+	if (ret)
+		goto put_clusters;
+
+	/* Assuming 2 cluster, set clk_big_min and clk_little_max */
+	clk_big_min = get_table_min(freq_table[0]);
+	clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
+
+	pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
+			__func__, cluster, clk_big_min, clk_little_max);
+
+	return 0;
+
+put_clusters:
+	while (i)
+		_put_cluster_clk_and_freq_table(--i);
+
+	atomic_dec(&cluster_usage[MAX_CLUSTERS]);
+
+	return ret;
+}
+
 /* Per-CPU initialization */
 static int bL_cpufreq_init(struct cpufreq_policy *policy)
 {
@@ -202,20 +452,22 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 
 	cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
 
-	policy->cpuinfo.transition_latency = 1000000;	/* 1 ms assumed */
-	policy->cur = bL_cpufreq_get(policy->cpu);
+	if (cur_cluster < MAX_CLUSTERS) {
+		cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+		cpumask_copy(policy->related_cpus, policy->cpus);
 
-	cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
-	cpumask_copy(policy->related_cpus, policy->cpus);
+		per_cpu(physical_cluster, policy->cpu) = cur_cluster;
+	} else {
+		/* Assumption: during init, we are always running on A15 */
+		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+	}
 
-	pr_info("CPU %d initialized\n", policy->cpu);
-	return 0;
-}
+	policy->cpuinfo.transition_latency = 1000000;	/* 1 ms assumed */
+	policy->cur = clk_get_cpu_rate(policy->cpu);
+	per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur;
 
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
-	put_cluster_clk_and_freq_table(cpu_to_cluster(policy->cpu));
-	pr_debug("%s: Exited, cpu: %d\n", __func__, policy->cpu);
+	pr_info("%s: Initialized, cpu: %d, cluster %d\n", __func__,
+			policy->cpu, cur_cluster);
 
 	return 0;
 }
@@ -231,15 +483,46 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.flags	= CPUFREQ_STICKY,
 	.verify	= bL_cpufreq_verify_policy,
 	.target	= bL_cpufreq_set_target,
-	.get	= bL_cpufreq_get,
+	.get	= bL_cpufreq_get_rate,
 	.init	= bL_cpufreq_init,
-	.exit	= bL_cpufreq_exit,
 	.attr	= bL_cpufreq_attr,
 };
 
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	pr_debug("%s: action: %ld\n", __func__, action);
+
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_unregister_driver(&bL_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+		set_switching_enabled(true);
+		cpufreq_register_driver(&bL_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_DISABLE:
+		set_switching_enabled(false);
+		cpufreq_register_driver(&bL_cpufreq_driver);
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+	.notifier_call = bL_cpufreq_switcher_notifier,
+};
+
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
 {
-	int ret;
+	int ret, i;
 
 	if (arm_bL_ops) {
 		pr_debug("%s: Already registered: %s, exiting\n", __func__,
@@ -254,16 +537,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
 
 	arm_bL_ops = ops;
 
+	ret = bL_switcher_get_enabled();
+	set_switching_enabled(ret);
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		mutex_init(&cluster_lock[i]);
+
 	ret = cpufreq_register_driver(&bL_cpufreq_driver);
 	if (ret) {
 		pr_info("%s: Failed registering platform driver: %s, err: %d\n",
 				__func__, ops->name, ret);
 		arm_bL_ops = NULL;
 	} else {
-		pr_info("%s: Registered platform driver: %s\n", __func__,
-				ops->name);
+		ret = bL_switcher_register_notifier(&bL_switcher_notifier);
+		if (ret) {
+			cpufreq_unregister_driver(&bL_cpufreq_driver);
+			arm_bL_ops = NULL;
+		} else {
+			pr_info("%s: Registered platform driver: %s\n",
+					__func__, ops->name);
+		}
 	}
 
+	bL_switcher_put_enabled();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bL_cpufreq_register);
@@ -275,9 +571,23 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops)
 				__func__, arm_bL_ops->name);
 	}
 
+	bL_switcher_get_enabled();
+	bL_switcher_unregister_notifier(&bL_switcher_notifier);
 	cpufreq_unregister_driver(&bL_cpufreq_driver);
+	bL_switcher_put_enabled();
 	pr_info("%s: Un-registered platform driver: %s\n", __func__,
 			arm_bL_ops->name);
+
+	/* For saving table get/put on every cpu in/out */
+	if (is_bL_switching_enabled()) {
+		put_cluster_clk_and_freq_table(MAX_CLUSTERS);
+	} else {
+		int i;
+
+		for (i = 0; i < MAX_CLUSTERS; i++)
+			put_cluster_clk_and_freq_table(i);
+	}
+
 	arm_bL_ops = NULL;
 }
 EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index bfd6273fd87..1db214b2660 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -21,6 +21,7 @@
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
 #include <asm/cputime.h>
+#include <asm/bL_switcher.h>
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -378,7 +379,7 @@ static struct notifier_block notifier_trans_block = {
 	.notifier_call = cpufreq_stat_notifier_trans
 };
 
-static int __init cpufreq_stats_init(void)
+static int cpufreq_stats_setup(void)
 {
 	int ret;
 	unsigned int cpu;
@@ -406,7 +407,8 @@ static int __init cpufreq_stats_init(void)
 
 	return 0;
 }
-static void __exit cpufreq_stats_exit(void)
+
+static void cpufreq_stats_cleanup(void)
 {
 	unsigned int cpu;
 
@@ -421,6 +423,49 @@ static void __exit cpufreq_stats_exit(void)
 	}
 }
 
+static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_stats_cleanup();
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+	case BL_NOTIFY_POST_DISABLE:
+		cpufreq_stats_setup();
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block switcher_notifier = {
+	.notifier_call = cpufreq_stats_switcher_notifier,
+};
+
+static int __init cpufreq_stats_init(void)
+{
+	int ret;
+	spin_lock_init(&cpufreq_stats_lock);
+
+	ret = cpufreq_stats_setup();
+	if (!ret)
+		bL_switcher_register_notifier(&switcher_notifier);
+
+	return ret;
+}
+
+static void __exit cpufreq_stats_exit(void)
+{
+	bL_switcher_unregister_notifier(&switcher_notifier);
+	cpufreq_stats_cleanup();
+}
+
 MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
 MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
 				"through sysfs filesystem");
diff --git a/linaro/configs/big-LITTLE-IKS.conf b/linaro/configs/big-LITTLE-IKS.conf
new file mode 100644
index 00000000000..b067fde86ea
--- /dev/null
+++ b/linaro/configs/big-LITTLE-IKS.conf
@@ -0,0 +1,5 @@
+CONFIG_BIG_LITTLE=y
+CONFIG_BL_SWITCHER=y
+CONFIG_ARM_DT_BL_CPUFREQ=y
+CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
author	Mark Brown <broonie@sirena.org.uk>	2013-06-13 16:36:56 +0100
committer	Mark Brown <broonie@sirena.org.uk>	2013-06-13 16:36:56 +0100
commit	8e1adb79ff00c3840233f65a460312c98c7bf4cb (patch)
tree	bead4a3dae2f064d9d622228e3022cf9b4b36606
parent	7e1a54fe7dd79d8ccc86397c19e8bfdab21f0ca6 (diff)
parent	90d0e0a125f44ead5ae47e5c8cf9d2c72cda1c6c (diff)