diff options
author | Mark Brown <broonie@sirena.org.uk> | 2013-06-13 16:36:54 +0100 |
---|---|---|
committer | Mark Brown <broonie@sirena.org.uk> | 2013-06-13 16:36:54 +0100 |
commit | 7e1a54fe7dd79d8ccc86397c19e8bfdab21f0ca6 (patch) | |
tree | 27229267b3816cb00660fa5a7c57604de8c2a3e5 | |
parent | 44665078f9e4261a98e0d37303d8a0d4aaebaf3d (diff) | |
parent | 2be12549c900303c418fcbb7044979e190fd4e7e (diff) |
Automatically merging tracking-iks into merge-manifest
Conflicting files:
45 files changed, 1866 insertions, 433 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b67e45a9830..49edb6bea6b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1706,6 +1706,24 @@ config BIG_LITTLE help This option enables support for the big.LITTLE architecture. +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G @@ -1793,8 +1811,9 @@ config SCHED_HRTICK def_bool HIGH_RES_TIMERS config THUMB2_KERNEL - bool "Compile the kernel in Thumb-2 mode" + bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY depends on CPU_V7 && !CPU_V6 && !CPU_V6K + default y if CPU_THUMBONLY select AEABI select ARM_ASM_UNIFIED select ARM_UNWIND diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index bd48ab52544..d412ff0b15a 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -13,3 +13,7 @@ obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o CFLAGS_REMOVE_mcpm_entry.o = -pg +AFLAGS_mcpm_head.o := -march=armv7-a +AFLAGS_vlock.o := -march=armv7-a +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 00000000000..1506bf536d1 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,732 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cpu_pm.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/clockchips.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/notifier.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/moduleparam.h> + +#include <asm/smp_plat.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/suspend.h> +#include <asm/mcpm.h> +#include <asm/bL_switcher.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power_cpu_migrate.h> + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id)); + return id; +} + +/* + * Get a global nanosecond time stamp for tracing. + */ +static s64 get_ns(void) +{ + struct timespec ts; + getnstimeofday(&ts); + return timespec_to_ns(&ts); +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_arg) +{ + unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster; + long volatile handshake, **handshake_ptr = _arg; + + pr_debug("%s\n", __func__); + + mpidr = read_mpidr(); + cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + ob_cluster = clusterid; + ib_cluster = clusterid ^ 1; + + /* Advertise our handshake location */ + if (handshake_ptr) { + handshake = 0; + *handshake_ptr = &handshake; + } else + handshake = -1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* + * Let's wait until our inbound is alive. + */ + while (!handshake) { + wfe(); + smp_mb(); + } + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. + */ +#define STACK_SIZE 512 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + void *stack = current_thread_info() + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += clusterid * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu; + struct completion inbound_alive; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; + long volatile *handshake_ptr; + int ipi_nr, ret; + + mpidr = read_mpidr(); + cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + ob_cluster = clusterid; + ib_cluster = clusterid ^ 1; + + if (new_cluster_id == clusterid) + return 0; + + pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid); + + this_cpu = smp_processor_id(); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(cpuid, ob_cluster, NULL); + mcpm_set_entry_vector(cpuid, ib_cluster, NULL); + + /* Install our "inbound alive" notifier. */ + init_completion(&inbound_alive); + ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); + ipi_nr |= ((1 << 16) << bL_gic_id[cpuid][ob_cluster]); + mcpm_set_early_poke(cpuid, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(cpuid, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in bL_power_down(). + */ + gic_send_sgi(bL_gic_id[cpuid][ib_cluster], 0); + + /* + * Wait for the inbound to come up. This allows for other + * tasks to be scheduled in the mean time. + */ + wait_for_completion(&inbound_alive); + mcpm_set_early_poke(cpuid, ib_cluster, 0, 0); + + /* + * From this point we are entering the switch critical zone + * and can't sleep/schedule anymore. + */ + local_irq_disable(); + local_fiq_disable(); + trace_cpu_migrate_begin(get_ns(), mpidr & MPIDR_HWID_BITMASK); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(bL_gic_id[cpuid][ib_cluster]); + + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* + * Flip the cluster in the CPU logical map for this CPU. + * This must be flushed to RAM as the resume code + * needs to access it while the caches are still disabled. + */ + cpu_logical_map(this_cpu) ^= (1 << 8); + __cpuc_flush_dcache_area(&cpu_logical_map(this_cpu), + sizeof(cpu_logical_map(this_cpu))); + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid); + BUG_ON(clusterid != ib_cluster); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + + trace_cpu_migrate_finish(get_ns(), mpidr & MPIDR_HWID_BITMASK); + local_fiq_enable(); + local_irq_enable(); + + *handshake_ptr = 1; + dsb_sev(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct bL_thread { + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; + struct completion started; +}; + +static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + cluster = xchg(&t->wanted_cluster, -1); + if (cluster != -1) + bL_switch_to(cluster); + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct * bL_switcher_thread_create(int cpu, void *arg) +{ + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; +} + +/* + * bL_switch_request - Switch to a specific cluster for the given CPU + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. + */ +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +{ + struct bL_thread *t; + + if (cpu >= MAX_CPUS_PER_CLUSTER) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; + } + + t = &bL_threads[cpu]; + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; + + t->wanted_cluster = new_cluster_id; + wake_up(&t->wq); + return 0; +} + +EXPORT_SYMBOL_GPL(bL_switch_request); + +/* + * Activation and configuration code. + */ + +static DEFINE_MUTEX(bL_switcher_activation_lock); +static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier); +static unsigned int bL_switcher_active; +static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER]; +static cpumask_t bL_switcher_removed_logical_cpus; + +int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_register_notifier); + +int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier); + +static int bL_activation_notify(unsigned long val) +{ + int ret; + + ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL); + if (ret & NOTIFY_STOP_MASK) + pr_err("%s: notifier chain failed with status 0x%x\n", + __func__, ret); + return notifier_to_errno(ret); +} + +static void bL_switcher_restore_cpus(void) +{ + int i; + + for_each_cpu(i, &bL_switcher_removed_logical_cpus) + cpu_up(i); +} + +static int bL_switcher_halve_cpus(void) +{ + int cpu, cluster, i, ret; + cpumask_t cluster_mask[2], common_mask; + + cpumask_clear(&bL_switcher_removed_logical_cpus); + cpumask_clear(&cluster_mask[0]); + cpumask_clear(&cluster_mask[1]); + + for_each_online_cpu(i) { + cpu = cpu_logical_map(i) & 0xff; + cluster = (cpu_logical_map(i) >> 8) & 0xff; + if (cluster >= 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + cpumask_set_cpu(cpu, &cluster_mask[cluster]); + } + + if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) { + pr_err("%s: no common set of CPUs\n", __func__); + return -EINVAL; + } + + for_each_online_cpu(i) { + cpu = cpu_logical_map(i) & 0xff; + cluster = (cpu_logical_map(i) >> 8) & 0xff; + + if (cpumask_test_cpu(cpu, &common_mask)) { + /* Let's take note of the GIC ID for this CPU */ + int gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + + /* + * We keep only those logical CPUs which number + * is equal to their physical CPU number. This is + * not perfect but good enough in most cases. + */ + if (cpu == i) { + bL_switcher_cpu_original_cluster[cpu] = cluster; + continue; + } + } + + ret = cpu_down(i); + if (ret) { + bL_switcher_restore_cpus(); + return ret; + } + cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); + } + + return 0; +} + +static void bL_switcher_trace_trigger_cpu(void *__always_unused info) +{ + trace_cpu_migrate_current(get_ns(), read_mpidr() & MPIDR_HWID_BITMASK); +} + +int bL_switcher_trace_trigger(void) +{ + int ret; + + preempt_disable(); + + bL_switcher_trace_trigger_cpu(NULL); + ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); + +static int bL_switcher_enable(void) +{ + int cpu, ret; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + if (bL_switcher_active) { + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return 0; + } + + pr_info("big.LITTLE switcher initializing\n"); + + ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE); + if (ret) + goto error; + + ret = bL_switcher_halve_cpus(); + if (ret) + goto error; + + bL_switcher_trace_trigger(); + + for_each_online_cpu(cpu) { + struct bL_thread *t = &bL_threads[cpu]; + init_waitqueue_head(&t->wq); + init_completion(&t->started); + t->wanted_cluster = -1; + t->task = bL_switcher_thread_create(cpu, t); + } + + bL_switcher_active = 1; + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + pr_info("big.LITTLE switcher initialized\n"); + goto out; + +error: + pr_warning("big.LITTLE switcher initialization failed\n"); + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return ret; +} + +#ifdef CONFIG_SYSFS + +static void bL_switcher_disable(void) +{ + unsigned int cpu, cluster, i; + struct bL_thread *t; + struct task_struct *task; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + + if (!bL_switcher_active) + goto out; + + if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) { + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + goto out; + } + + bL_switcher_active = 0; + + /* + * To deactivate the switcher, we must shut down the switcher + * threads to prevent any other requests from being accepted. + * Then, if the final cluster for given logical CPU is not the + * same as the original one, we'll recreate a switcher thread + * just for the purpose of switching the CPU back without any + * possibility for interference from external requests. + */ + for_each_online_cpu(cpu) { + BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff)); + t = &bL_threads[cpu]; + task = t->task; + t->task = NULL; + if (IS_ERR_OR_NULL(task)) + continue; + kthread_stop(task); + /* no more switch may happen on this CPU at this point */ + cluster = (cpu_logical_map(cpu) >> 8) & 0xff; + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + init_completion(&t->started); + t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; + task = bL_switcher_thread_create(cpu, t); + if (!IS_ERR(task)) { + wait_for_completion(&t->started); + kthread_stop(task); + cluster = (cpu_logical_map(cpu) >> 8) & 0xff; + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + } + /* If execution gets here, we're in trouble. */ + pr_crit("%s: unable to restore original cluster for CPU %d\n", + __func__, cpu); + for_each_cpu(i, &bL_switcher_removed_logical_cpus) { + if ((cpu_logical_map(i) & 0xff) != cpu) + continue; + pr_crit("%s: CPU %d can't be restored\n", + __func__, i); + cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus); + break; + } + } + + bL_switcher_restore_cpus(); + bL_switcher_trace_trigger(); + + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); +} + +static ssize_t bL_switcher_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", bL_switcher_active); +} + +static ssize_t bL_switcher_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + + switch (buf[0]) { + case '0': + bL_switcher_disable(); + ret = 0; + break; + case '1': + ret = bL_switcher_enable(); + break; + default: + ret = -EINVAL; + } + + return (ret >= 0) ? count : ret; +} + +static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret = bL_switcher_trace_trigger(); + + return ret ? ret : count; +} + +static struct kobj_attribute bL_switcher_active_attr = + __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); + +static struct kobj_attribute bL_switcher_trace_trigger_attr = + __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store); + +static struct attribute *bL_switcher_attrs[] = { + &bL_switcher_active_attr.attr, + &bL_switcher_trace_trigger_attr.attr, + NULL, +}; + +static struct attribute_group bL_switcher_attr_group = { + .attrs = bL_switcher_attrs, +}; + +static struct kobject *bL_switcher_kobj; + +static int __init bL_switcher_sysfs_init(void) +{ + int ret; + + bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); + if (!bL_switcher_kobj) + return -ENOMEM; + ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); + if (ret) + kobject_put(bL_switcher_kobj); + return ret; +} + +#endif /* CONFIG_SYSFS */ + +bool bL_switcher_get_enabled(void) +{ + mutex_lock(&bL_switcher_activation_lock); + + return bL_switcher_active; +} +EXPORT_SYMBOL_GPL(bL_switcher_get_enabled); + +void bL_switcher_put_enabled(void) +{ + mutex_unlock(&bL_switcher_activation_lock); +} +EXPORT_SYMBOL_GPL(bL_switcher_put_enabled); + +/* + * Veto any CPU hotplug operation while the switcher is active. + * We're just not ready to deal with that given the trickery involved. + */ +static int bL_switcher_hotplug_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + switch (action) { + case CPU_UP_PREPARE: + case CPU_DOWN_PREPARE: + if (bL_switcher_active) + return NOTIFY_BAD; + } + return NOTIFY_DONE; +} + +static struct notifier_block bL_switcher_hotplug_notifier = + { &bL_switcher_hotplug_callback, NULL, 0 }; + +static bool no_bL_switcher; +core_param(no_bL_switcher, no_bL_switcher, bool, 0644); + +static int __init bL_switcher_init(void) +{ + int ret; + + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + register_cpu_notifier(&bL_switcher_hotplug_notifier); + + if (!no_bL_switcher) { + ret = bL_switcher_enable(); + if (ret) + return ret; + } + +#ifdef CONFIG_SYSFS + ret = bL_switcher_sysfs_init(); + if (ret) + pr_err("%s: unable to create sysfs entry\n", __func__); +#endif + + return 0; +} + +late_initcall(bL_switcher_init); diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 00000000000..5e2dd197e72 --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -0,0 +1,71 @@ +/* + * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Dummy interface to user space for debugging purpose only. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <asm/uaccess.h> +#include <asm/bL_switcher.h> + +static ssize_t bL_switcher_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + unsigned char val[3]; + unsigned int cpu, cluster; + int ret; + + pr_debug("%s\n", __func__); + + if (len < 3) + return -EINVAL; + + if (copy_from_user(val, buf, 3)) + return -EFAULT; + + /* format: <cpu#>,<cluster#> */ + if (val[0] < '0' || val[0] > '4' || + val[1] != ',' || + val[2] < '0' || val[2] > '1') + return -EINVAL; + + cpu = val[0] - '0'; + cluster = val[2] - '0'; + ret = bL_switch_request(cpu, cluster); + + return ret ? : len; +} + +static const struct file_operations bL_switcher_fops = { + .write = bL_switcher_write, + .owner = THIS_MODULE, +}; + +static struct miscdevice bL_switcher_device = { + MISC_DYNAMIC_MINOR, + "b.L_switcher", + &bL_switcher_fops +}; + +static int __init bL_switcher_dummy_if_init(void) +{ + return misc_register(&bL_switcher_device); +} + +static void __exit bL_switcher_dummy_if_exit(void) +{ + misc_deregister(&bL_switcher_device); +} + +module_init(bL_switcher_dummy_if_init); +module_exit(bL_switcher_dummy_if_exit); diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 370236dd1a0..4a2b32fd53a 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); } +extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; + +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val) +{ + unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; + poke[0] = poke_phys_addr; + poke[1] = poke_val; + __cpuc_flush_dcache_area((void *)poke, 8); + outer_clean_range(__pa(poke), __pa(poke + 2)); +} + static const struct mcpm_platform_ops *platform_ops; int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 8178705c4b2..057e9c5a9e1 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8, r11} + ldmia r5, {r0, r6, r7, r8, r11} + add r0, r5, r0 @ r0 = mcpm_entry_early_pokes add r6, r5, r6 @ r6 = mcpm_entry_vectors ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys add r8, r5, r8 @ r8 = mcpm_sync add r11, r5, r11 @ r11 = first_man_locks + @ Perform an early poke, if any + add r0, r0, r4, lsl #3 + ldmia r0, {r0, r1} + teq r0, #0 + strne r1, [r0] + mov r0, #MCPM_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = sync cluster base @@ -195,7 +202,8 @@ mcpm_entry_gated: .align 2 -3: .word mcpm_entry_vectors - . +3: .word mcpm_entry_early_pokes - . + .word mcpm_entry_vectors - 3b .word mcpm_power_up_setup_phys - 3b .word mcpm_sync - 3b .word first_man_locks - 3b @@ -214,6 +222,10 @@ first_man_locks: ENTRY(mcpm_entry_vectors) .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_entry_early_pokes, #object +ENTRY(mcpm_entry_early_pokes) + .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_power_up_setup_phys, #object ENTRY(mcpm_power_up_setup_phys) .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 00000000000..d60e77d179a --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h @@ -0,0 +1,67 @@ +/* + * arch/arm/include/asm/bL_switcher.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +#include <linux/compiler.h> +#include <linux/types.h> + +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id); + +/* + * Register here to be notified about runtime enabling/disabling of + * the switcher. + * + * The notifier chain is called with the switcher activation lock held: + * the switcher will not be enabled or disabled during callbacks. + * Callbacks must not call bL_switcher_{get,put}_enabled(). + */ +#define BL_NOTIFY_PRE_ENABLE 0 +#define BL_NOTIFY_POST_ENABLE 1 +#define BL_NOTIFY_PRE_DISABLE 2 +#define BL_NOTIFY_POST_DISABLE 3 + +#ifdef CONFIG_BL_SWITCHER + +int bL_switcher_register_notifier(struct notifier_block *nb); +int bL_switcher_unregister_notifier(struct notifier_block *nb); + +/* + * Use these functions to temporarily prevent enabling/disabling of + * the switcher. + * bL_switcher_get_enabled() returns true if the switcher is currently + * enabled. Each call to bL_switcher_get_enabled() must be followed + * by a call to bL_switcher_put_enabled(). These functions are not + * recursive. + */ +bool bL_switcher_get_enabled(void); +void bL_switcher_put_enabled(void); + +int bL_switcher_trace_trigger(void); + +#else +static inline int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline bool bL_switcher_get_enabled(void) { return false; } +static inline void bL_switcher_put_enabled(void) { } +static inline int bL_switcher_trace_trigger(void) { return 0; } +#endif /* CONFIG_BL_SWITCHER */ + +#endif diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index ce4d01c03e6..cedd3721318 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -42,6 +42,8 @@ #define vectors_high() (0) #endif +#ifdef CONFIG_CPU_CP15 + extern unsigned long cr_no_alignment; /* defined in entry-armv.S */ extern unsigned long cr_alignment; /* defined in entry-armv.S */ @@ -96,6 +98,18 @@ static inline void set_copro_access(unsigned int val) isb(); } -#endif +#else /* ifdef CONFIG_CPU_CP15 */ + +/* + * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the + * minds of the developers). Yielding 0 for machines without a cp15 (and making + * it read-only) is fine for most cases and saves quite some #ifdeffery. + */ +#define cr_no_alignment UL(0) +#define cr_alignment UL(0) + +#endif /* ifdef CONFIG_CPU_CP15 / else */ + +#endif /* ifndef __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index ad41ec2471e..7652712d1d1 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -38,6 +38,24 @@ #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK) +#define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_INTEL 0x69 + +#define ARM_CPU_PART_ARM1136 0xB360 +#define ARM_CPU_PART_ARM1156 0xB560 +#define ARM_CPU_PART_ARM1176 0xB760 +#define ARM_CPU_PART_ARM11MPCORE 0xB020 +#define ARM_CPU_PART_CORTEX_A8 0xC080 +#define ARM_CPU_PART_CORTEX_A9 0xC090 +#define ARM_CPU_PART_CORTEX_A5 0xC050 +#define ARM_CPU_PART_CORTEX_A15 0xC0F0 +#define ARM_CPU_PART_CORTEX_A7 0xC070 + +#define ARM_CPU_XSCALE_ARCH_MASK 0xe000 +#define ARM_CPU_XSCALE_ARCH_V1 0x2000 +#define ARM_CPU_XSCALE_ARCH_V2 0x4000 +#define ARM_CPU_XSCALE_ARCH_V3 0x6000 + extern unsigned int processor_id; #ifdef CONFIG_CPU_CP15 @@ -50,6 +68,7 @@ extern unsigned int processor_id; : "cc"); \ __val; \ }) + #define read_cpuid_ext(ext_reg) \ ({ \ unsigned int __val; \ @@ -59,29 +78,24 @@ extern unsigned int processor_id; : "cc"); \ __val; \ }) -#else -#define read_cpuid(reg) (processor_id) -#define read_cpuid_ext(reg) 0 -#endif -#define ARM_CPU_IMP_ARM 0x41 -#define ARM_CPU_IMP_INTEL 0x69 +#else /* ifdef CONFIG_CPU_CP15 */ -#define ARM_CPU_PART_ARM1136 0xB360 -#define ARM_CPU_PART_ARM1156 0xB560 -#define ARM_CPU_PART_ARM1176 0xB760 -#define ARM_CPU_PART_ARM11MPCORE 0xB020 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 -#define ARM_CPU_PART_CORTEX_A7 0xC070 +/* + * read_cpuid and read_cpuid_ext should only ever be called on machines that + * have cp15 so warn on other usages. + */ +#define read_cpuid(reg) \ + ({ \ + WARN_ON_ONCE(1); \ + 0; \ + }) -#define ARM_CPU_XSCALE_ARCH_MASK 0xe000 -#define ARM_CPU_XSCALE_ARCH_V1 0x2000 -#define ARM_CPU_XSCALE_ARCH_V2 0x4000 -#define ARM_CPU_XSCALE_ARCH_V3 0x6000 +#define read_cpuid_ext(reg) read_cpuid(reg) + +#endif /* ifdef CONFIG_CPU_CP15 / else */ +#ifdef CONFIG_CPU_CP15 /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID @@ -92,6 +106,15 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void) return read_cpuid(CPUID_ID); } +#else /* ifdef CONFIG_CPU_CP15 */ + +static inline unsigned int __attribute_const__ read_cpuid_id(void) +{ + return processor_id; +} + +#endif /* ifdef CONFIG_CPU_CP15 / else */ + static inline unsigned int __attribute_const__ read_cpuid_implementor(void) { return (read_cpuid_id() & 0xFF000000) >> 24; diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h index 8cacbcda76d..b6e9f2c108b 100644 --- a/arch/arm/include/asm/glue-df.h +++ b/arch/arm/include/asm/glue-df.h @@ -18,12 +18,12 @@ * ================ * * We have the following to choose from: - * arm6 - ARM6 style * arm7 - ARM7 style * v4_early - ARMv4 without Thumb early abort handler * v4t_late - ARMv4 with Thumb late abort handler * v4t_early - ARMv4 with Thumb early abort handler - * v5tej_early - ARMv5 with Thumb and Java early abort handler + * v5t_early - ARMv5 with Thumb early abort handler + * v5tj_early - ARMv5 with Thumb and Java early abort handler * xscale - ARMv5 with Thumb with Xscale extensions * v6_early - ARMv6 generic early abort handler * v7_early - ARMv7 generic early abort handler @@ -39,19 +39,19 @@ # endif #endif -#ifdef CONFIG_CPU_ABRT_LV4T +#ifdef CONFIG_CPU_ABRT_EV4 # ifdef CPU_DABORT_HANDLER # define MULTI_DABORT 1 # else -# define CPU_DABORT_HANDLER v4t_late_abort +# define CPU_DABORT_HANDLER v4_early_abort # endif #endif -#ifdef CONFIG_CPU_ABRT_EV4 +#ifdef CONFIG_CPU_ABRT_LV4T # ifdef CPU_DABORT_HANDLER # define MULTI_DABORT 1 # else -# define CPU_DABORT_HANDLER v4_early_abort +# define CPU_DABORT_HANDLER v4t_late_abort # endif #endif @@ -63,19 +63,19 @@ # endif #endif -#ifdef CONFIG_CPU_ABRT_EV5TJ +#ifdef CONFIG_CPU_ABRT_EV5T # ifdef CPU_DABORT_HANDLER # define MULTI_DABORT 1 # else -# define CPU_DABORT_HANDLER v5tj_early_abort +# define CPU_DABORT_HANDLER v5t_early_abort # endif #endif -#ifdef CONFIG_CPU_ABRT_EV5T +#ifdef CONFIG_CPU_ABRT_EV5TJ # ifdef CPU_DABORT_HANDLER # define MULTI_DABORT 1 # else -# define CPU_DABORT_HANDLER v5t_early_abort +# define CPU_DABORT_HANDLER v5tj_early_abort # endif #endif diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 2740c2a2df6..3d7351c844a 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 6 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 7c3d813e15d..124623e5ef1 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -211,4 +211,8 @@ #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) +#define HSR_DABT_S1PTW (1U << 7) +#define HSR_DABT_CM (1U << 8) +#define HSR_DABT_EA (1U << 9) + #endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index e4956f4e23e..18d50322a9e 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -75,7 +75,7 @@ extern char __kvm_hyp_code_end[]; extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); #endif diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index fd611996bfb..82b4babead2 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -22,11 +22,12 @@ #include <linux/kvm_host.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#include <asm/kvm_arm.h> -u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); -u32 *vcpu_spsr(struct kvm_vcpu *vcpu); +unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); -int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run); +bool kvm_condition_valid(struct kvm_vcpu *vcpu); void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr); void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -37,14 +38,14 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) return 1; } -static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) { - return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; + return &vcpu->arch.regs.usr_regs.ARM_pc; } -static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) { - return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; + return &vcpu->arch.regs.usr_regs.ARM_cpsr; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) @@ -69,4 +70,96 @@ static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) return reg == 15; } +static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.hsr; +} + +static inline unsigned long kvm_vcpu_get_hfar(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.hxfar; +} + +static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu) +{ + return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8; +} + +static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.hyp_pc; +} + +static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; +} + +static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_WNR; +} + +static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; +} + +static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; +} + +static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA; +} + +static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; +} + +/* Get Access Size from a data abort */ +static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) +{ + switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) { + case 0: + return 1; + case 1: + return 2; + case 2: + return 4; + default: + kvm_err("Hardware is weird: SAS 0b11 is reserved\n"); + return -EFAULT; + } +} + +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_IL; +} + +static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) >> HSR_EC_SHIFT; +} + +static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == HSR_EC_IABT; +} + +static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; +} + +static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d1736a53b12..0c4e643d939 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -80,6 +80,15 @@ struct kvm_mmu_memory_cache { void *objects[KVM_NR_MEM_OBJS]; }; +struct kvm_vcpu_fault_info { + u32 hsr; /* Hyp Syndrome Register */ + u32 hxfar; /* Hyp Data/Inst. Fault Address Register */ + u32 hpfar; /* Hyp IPA Fault Address Register */ + u32 hyp_pc; /* PC when exception was taken from Hyp mode */ +}; + +typedef struct vfp_hard_struct kvm_kernel_vfp_t; + struct kvm_vcpu_arch { struct kvm_regs regs; @@ -93,13 +102,11 @@ struct kvm_vcpu_arch { u32 midr; /* Exception Information */ - u32 hsr; /* Hyp Syndrome Register */ - u32 hxfar; /* Hyp Data/Inst Fault Address Register */ - u32 hpfar; /* Hyp IPA Fault Address Register */ + struct kvm_vcpu_fault_info fault; /* Floating point registers (VFP and Advanced SIMD/NEON) */ - struct vfp_hard_struct vfp_guest; - struct vfp_hard_struct *vfp_host; + kvm_kernel_vfp_t vfp_guest; + kvm_kernel_vfp_t *vfp_host; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -122,9 +129,6 @@ struct kvm_vcpu_arch { /* Interrupt related fields */ u32 irq_lines; /* IRQ and FIQ levels */ - /* Hyp exception information */ - u32 hyp_pc; /* PC when exception was taken from Hyp mode */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -181,4 +185,26 @@ struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); + +static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, + unsigned long hyp_stack_ptr, + unsigned long vector_ptr) +{ + unsigned long pgd_low, pgd_high; + + pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); + pgd_high = (pgd_ptr >> 32ULL); + + /* + * Call initialization code, and switch to the full blown + * HYP code. The init code doesn't need to preserve these registers as + * r1-r3 and r12 are already callee save according to the AAPCS. + * Note that we slightly misuse the prototype by casing the pgd_low to + * a void *. + */ + kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 421a20b3487..970f3b5fa10 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -19,6 +19,18 @@ #ifndef __ARM_KVM_MMU_H__ #define __ARM_KVM_MMU_H__ +#include <asm/cacheflush.h> +#include <asm/pgalloc.h> +#include <asm/idmap.h> + +/* + * We directly use the kernel VA for the HYP, as we can directly share + * the mapping (HTTBR "covers" TTBR1). + */ +#define HYP_PAGE_OFFSET_MASK (~0UL) +#define HYP_PAGE_OFFSET PAGE_OFFSET +#define KERN_TO_HYP(kva) (kva) + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_hyp_pmds(void); @@ -36,6 +48,16 @@ phys_addr_t kvm_mmu_get_httbr(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) +{ + pte_val(*pte) = new_pte; + /* + * flush_pmd_entry just takes a void pointer and cleans the necessary + * cache entries, so we can reuse the function for ptes. + */ + flush_pmd_entry(pte); +} + static inline bool kvm_is_write_fault(unsigned long hsr) { unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; @@ -47,4 +69,49 @@ static inline bool kvm_is_write_fault(unsigned long hsr) return true; } +static inline void kvm_clean_pgd(pgd_t *pgd) +{ + clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); +} + +static inline void kvm_clean_pmd_entry(pmd_t *pmd) +{ + clean_pmd_entry(pmd); +} + +static inline void kvm_clean_pte(pte_t *pte) +{ + clean_pte_table(pte); +} + +static inline void kvm_set_s2pte_writable(pte_t *pte) +{ + pte_val(*pte) |= L_PTE_S2_RDWR; +} + +struct kvm; + +static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +{ + /* + * If we are going to insert an instruction page and the icache is + * either VIPT or PIPT, there is a potential problem where the host + * (or another VM) may have used the same page as this guest, and we + * read incorrect data from the icache. If we're using a PIPT cache, + * we can invalidate just that page, but if we are using a VIPT cache + * we need to invalidate the entire icache - damn shame - as written + * in the ARM ARM (DDI 0406C.b - Page B3-1393). + * + * VIVT caches are tagged using both the ASID and the VMID and doesn't + * need any kind of flushing (DDI 0406C.b - Page B3-1392). + */ + if (icache_is_pipt()) { + unsigned long hva = gfn_to_hva(kvm, gfn); + __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); + } else if (!icache_is_vivt_asid_tagged()) { + /* any kind of VIPT cache */ + __flush_icache_all(); + } +} + #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h index ab97207d9cd..343744e4809 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/arch/arm/include/asm/kvm_vgic.h @@ -21,7 +21,6 @@ #include <linux/kernel.h> #include <linux/kvm.h> -#include <linux/kvm_host.h> #include <linux/irqreturn.h> #include <linux/spinlock.h> #include <linux/types.h> diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 0f7b7620e9a..7626a7fd493 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -42,6 +42,14 @@ extern void mcpm_entry_point(void); void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); /* + * This sets an early poke i.e a value to be poked into some address + * from very early assembly code before the CPU is ungated. The + * address must be physical, and if 0 then nothing will happen. + */ +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val); + +/* * CPU/cluster power operations API for higher subsystems to use. */ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index d3a22bebe6c..610ccf33f5e 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +extern int register_ipi_completion(struct completion *completion, int cpu); + struct smp_operations { #ifdef CONFIG_SMP /* diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 023bfeb367b..c1ee007523d 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -53,12 +53,12 @@ #define KVM_ARM_FIQ_spsr fiq_regs[7] struct kvm_regs { - struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */ - __u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ - __u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ - __u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */ - __u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ - __u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ + struct pt_regs usr_regs; /* R0_usr - R14_usr, PC, CPSR */ + unsigned long svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ + unsigned long abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ + unsigned long und_regs[3]; /* SP_und, LR_und, SPSR_und */ + unsigned long irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ + unsigned long fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ }; /* Supported Processor Types */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 3f088225e71..a53efa99369 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -169,10 +169,10 @@ int main(void) DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); - DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr)); - DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.hxfar)); - DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar)); - DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.hyp_pc)); + DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); + DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); + DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); + DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); #ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 854bd22380d..5b391a689b4 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -98,8 +98,9 @@ __mmap_switched: str r9, [r4] @ Save processor ID str r1, [r5] @ Save machine type str r2, [r6] @ Save atags pointer - bic r4, r0, #CR_A @ Clear 'A' bit - stmia r7, {r0, r4} @ Save control register values + cmp r7, #0 + bicne r4, r0, #CR_A @ Clear 'A' bit + stmneia r7, {r0, r4} @ Save control register values b start_kernel ENDPROC(__mmap_switched) @@ -113,7 +114,11 @@ __mmap_switched_data: .long processor_id @ r4 .long __machine_arch_type @ r5 .long __atags_pointer @ r6 +#ifdef CONFIG_CPU_CP15 .long cr_alignment @ r7 +#else + .long 0 @ r7 +#endif .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 2c228a07e58..6a2e09c952c 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -32,15 +32,21 @@ * numbers for r1. * */ - .arm __HEAD + +#ifdef CONFIG_CPU_THUMBONLY + .thumb +ENTRY(stext) +#else + .arm ENTRY(stext) THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) +#endif setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode @ and irqs disabled diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f0d5e969ef..7c9fd36be1d 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -303,10 +303,10 @@ static int cpu_has_aliasing_icache(unsigned int arch) static void __init cacheid_init(void) { - unsigned int cachetype = read_cpuid_cachetype(); unsigned int arch = cpu_architecture(); if (arch >= CPU_ARCH_ARMv6) { + unsigned int cachetype = read_cpuid_cachetype(); if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ arch = CPU_ARCH_ARMv7; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1f2ccccaf00..f40fb0dcb3b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -66,6 +66,7 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_COMPLETION, }; static DECLARE_COMPLETION(cpu_running); @@ -429,6 +430,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_COMPLETION, "completion interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -554,6 +556,19 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +static DEFINE_PER_CPU(struct completion *, cpu_completion); + +int register_ipi_completion(struct completion *completion, int cpu) +{ + per_cpu(cpu_completion, cpu) = completion; + return IPI_COMPLETION; +} + +static void ipi_complete(unsigned int cpu) +{ + complete(per_cpu(cpu_completion, cpu)); +} + /* * Main handler for inter-processor interrupts */ @@ -604,6 +619,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_COMPLETION: + irq_enter(); + ipi_complete(cpu); + irq_exit(); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 45eac87ed66..5bc1a63284e 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -41,7 +41,7 @@ void scu_enable(void __iomem *scu_base) #ifdef CONFIG_ARM_ERRATA_764369 /* Cortex-A9 only */ - if ((read_cpuid(CPUID_ID) & 0xff0ffff0) == 0x410fc090) { + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { scu_ctrl = __raw_readl(scu_base + 0x30); if (!(scu_ctrl & 1)) __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index fc96ce6f235..8dc5e76cb78 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -17,7 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o -obj-y += arm.o guest.o mmu.o emulate.o reset.o +obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c1fe498983a..a0dfc2a53f9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -30,11 +30,9 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#include <asm/unified.h> #include <asm/uaccess.h> #include <asm/ptrace.h> #include <asm/mman.h> -#include <asm/cputype.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <asm/virt.h> @@ -44,14 +42,13 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_psci.h> -#include <asm/opcodes.h> #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); #endif static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static struct vfp_hard_struct __percpu *kvm_host_vfp_state; +static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state; static unsigned long hyp_default_vectors; /* Per-CPU variable containing the currently running vcpu. */ @@ -304,22 +301,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -int __attribute_const__ kvm_target_cpu(void) -{ - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { - case ARM_CPU_PART_CORTEX_A15: - return KVM_ARM_TARGET_CORTEX_A15; - default: - return -EINVAL; - } -} - int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { int ret; @@ -482,163 +463,6 @@ static void update_vttbr(struct kvm *kvm) spin_unlock(&kvm_vmid_lock); } -static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* SVC called from Hyp mode should never get here */ - kvm_debug("SVC called from Hyp mode shouldn't go here\n"); - BUG(); - return -EINVAL; /* Squash warning */ -} - -static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), - vcpu->arch.hsr & HSR_HVC_IMM_MASK); - - if (kvm_psci_call(vcpu)) - return 1; - - kvm_inject_undefined(vcpu); - return 1; -} - -static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - if (kvm_psci_call(vcpu)) - return 1; - - kvm_inject_undefined(vcpu); - return 1; -} - -static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* The hypervisor should never cause aborts */ - kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n", - vcpu->arch.hxfar, vcpu->arch.hsr); - return -EFAULT; -} - -static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* This is either an error in the ws. code or an external abort */ - kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n", - vcpu->arch.hxfar, vcpu->arch.hsr); - return -EFAULT; -} - -typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); -static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, - [HSR_EC_CP15_32] = kvm_handle_cp15_32, - [HSR_EC_CP15_64] = kvm_handle_cp15_64, - [HSR_EC_CP14_MR] = kvm_handle_cp14_access, - [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store, - [HSR_EC_CP14_64] = kvm_handle_cp14_access, - [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, - [HSR_EC_CP10_ID] = kvm_handle_cp10_id, - [HSR_EC_SVC_HYP] = handle_svc_hyp, - [HSR_EC_HVC] = handle_hvc, - [HSR_EC_SMC] = handle_smc, - [HSR_EC_IABT] = kvm_handle_guest_abort, - [HSR_EC_IABT_HYP] = handle_pabt_hyp, - [HSR_EC_DABT] = kvm_handle_guest_abort, - [HSR_EC_DABT_HYP] = handle_dabt_hyp, -}; - -/* - * A conditional instruction is allowed to trap, even though it - * wouldn't be executed. So let's re-implement the hardware, in - * software! - */ -static bool kvm_condition_valid(struct kvm_vcpu *vcpu) -{ - unsigned long cpsr, cond, insn; - - /* - * Exception Code 0 can only happen if we set HCR.TGE to 1, to - * catch undefined instructions, and then we won't get past - * the arm_exit_handlers test anyway. - */ - BUG_ON(((vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT) == 0); - - /* Top two bits non-zero? Unconditional. */ - if (vcpu->arch.hsr >> 30) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - /* Is condition field valid? */ - if ((vcpu->arch.hsr & HSR_CV) >> HSR_CV_SHIFT) - cond = (vcpu->arch.hsr & HSR_COND) >> HSR_COND_SHIFT; - else { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - /* Shift makes it look like an ARM-mode instruction */ - insn = cond << 28; - return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; -} - -/* - * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on - * proper exit to QEMU. - */ -static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index) -{ - unsigned long hsr_ec; - - switch (exception_index) { - case ARM_EXCEPTION_IRQ: - return 1; - case ARM_EXCEPTION_UNDEFINED: - kvm_err("Undefined exception in Hyp mode at: %#08x\n", - vcpu->arch.hyp_pc); - BUG(); - panic("KVM: Hypervisor undefined exception!\n"); - case ARM_EXCEPTION_DATA_ABORT: - case ARM_EXCEPTION_PREF_ABORT: - case ARM_EXCEPTION_HVC: - hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT; - - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) - || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unkown exception class: %#08lx, " - "hsr: %#08x\n", hsr_ec, - (unsigned int)vcpu->arch.hsr); - BUG(); - } - - /* - * See ARM ARM B1.14.1: "Hyp traps on instructions - * that fail their condition code check" - */ - if (!kvm_condition_valid(vcpu)) { - bool is_wide = vcpu->arch.hsr & HSR_IL; - kvm_skip_instr(vcpu, is_wide); - return 1; - } - - return arm_exit_handlers[hsr_ec](vcpu, run); - default: - kvm_pr_unimpl("Unsupported exception type: %d", - exception_index); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - return 0; - } -} - static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { if (likely(vcpu->arch.has_run_once)) @@ -973,7 +797,6 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *vector) { unsigned long long pgd_ptr; - unsigned long pgd_low, pgd_high; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; @@ -982,20 +805,11 @@ static void cpu_init_hyp_mode(void *vector) __hyp_set_vectors((unsigned long)vector); pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); - pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); - pgd_high = (pgd_ptr >> 32ULL); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; - /* - * Call initialization code, and switch to the full blown - * HYP code. The init code doesn't need to preserve these registers as - * r1-r3 and r12 are already callee save according to the AAPCS. - * Note that we slightly misuse the prototype by casing the pgd_low to - * a void *. - */ - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); } /** @@ -1078,7 +892,7 @@ static int init_hyp_mode(void) /* * Map the host VFP structures */ - kvm_host_vfp_state = alloc_percpu(struct vfp_hard_struct); + kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t); if (!kvm_host_vfp_state) { err = -ENOMEM; kvm_err("Cannot allocate host VFP state\n"); @@ -1086,7 +900,7 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - struct vfp_hard_struct *vfp; + kvm_kernel_vfp_t *vfp; vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); err = create_hyp_mappings(vfp, vfp + 1); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 7bed7556077..8eea97be1ed 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -76,7 +76,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - u32 val; + unsigned long val; int cpu; if (!p->is_write) @@ -293,12 +293,12 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } /* If access function fails, it should complain. */ } else { - kvm_err("Unsupported guest CP15 access at: %08x\n", + kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); } @@ -315,14 +315,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (vcpu->arch.hsr >> 1) & 0xf; - params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf; - params.is_write = ((vcpu->arch.hsr & 1) == 0); + params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; + params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = true; - params.Op1 = (vcpu->arch.hsr >> 16) & 0xf; + params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf; params.Op2 = 0; - params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf; + params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; params.CRn = 0; return emulate_cp15(vcpu, ¶ms); @@ -347,14 +347,14 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (vcpu->arch.hsr >> 1) & 0xf; - params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf; - params.is_write = ((vcpu->arch.hsr & 1) == 0); + params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; + params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = false; - params.CRn = (vcpu->arch.hsr >> 10) & 0xf; - params.Op1 = (vcpu->arch.hsr >> 14) & 0x7; - params.Op2 = (vcpu->arch.hsr >> 17) & 0x7; + params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; + params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 14) & 0x7; + params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7; params.Rt2 = 0; return emulate_cp15(vcpu, ¶ms); diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 992adfafa2f..b7301d3e479 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -84,7 +84,7 @@ static inline bool read_zero(struct kvm_vcpu *vcpu, static inline bool write_to_read_only(struct kvm_vcpu *vcpu, const struct coproc_params *params) { - kvm_debug("CP15 write to read-only register at: %08x\n", + kvm_debug("CP15 write to read-only register at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); return false; @@ -93,7 +93,7 @@ static inline bool write_to_read_only(struct kvm_vcpu *vcpu, static inline bool read_from_write_only(struct kvm_vcpu *vcpu, const struct coproc_params *params) { - kvm_debug("CP15 read to write-only register at: %08x\n", + kvm_debug("CP15 read to write-only register at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); return false; diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d61450ac666..bdede9e7da5 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -20,6 +20,7 @@ #include <linux/kvm_host.h> #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> +#include <asm/opcodes.h> #include <trace/events/kvm.h> #include "trace.h" @@ -109,10 +110,10 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { * Return a pointer to the register number valid in the current mode of * the virtual CPU. */ -u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) +unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) { - u32 *reg_array = (u32 *)&vcpu->arch.regs; - u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { case USR_MODE...SVC_MODE: @@ -141,9 +142,9 @@ u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) /* * Return the SPSR for the current mode of the virtual CPU. */ -u32 *vcpu_spsr(struct kvm_vcpu *vcpu) +unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) { - u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { case SVC_MODE: return &vcpu->arch.regs.KVM_ARM_SVC_spsr; @@ -160,20 +161,48 @@ u32 *vcpu_spsr(struct kvm_vcpu *vcpu) } } -/** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest - * @vcpu: the vcpu pointer - * @run: the kvm_run structure pointer - * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. +/* + * A conditional instruction is allowed to trap, even though it + * wouldn't be executed. So let's re-implement the hardware, in + * software! */ -int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +bool kvm_condition_valid(struct kvm_vcpu *vcpu) { - trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); - return 1; + unsigned long cpsr, cond, insn; + + /* + * Exception Code 0 can only happen if we set HCR.TGE to 1, to + * catch undefined instructions, and then we won't get past + * the arm_exit_handlers test anyway. + */ + BUG_ON(!kvm_vcpu_trap_get_class(vcpu)); + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + /* Is condition field valid? */ + if ((kvm_vcpu_get_hsr(vcpu) & HSR_CV) >> HSR_CV_SHIFT) + cond = (kvm_vcpu_get_hsr(vcpu) & HSR_COND) >> HSR_COND_SHIFT; + else { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + /* Shift makes it look like an ARM-mode instruction */ + insn = cond << 28; + return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; } /** @@ -257,9 +286,9 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { - u32 new_lr_value; - u32 new_spsr_value; - u32 cpsr = *vcpu_cpsr(vcpu); + unsigned long new_lr_value; + unsigned long new_spsr_value; + unsigned long cpsr = *vcpu_cpsr(vcpu); u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset = 4; @@ -291,9 +320,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) */ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - u32 new_lr_value; - u32 new_spsr_value; - u32 cpsr = *vcpu_cpsr(vcpu); + unsigned long new_lr_value; + unsigned long new_spsr_value; + unsigned long cpsr = *vcpu_cpsr(vcpu); u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset; diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 2339d9609d3..152d0361218 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <asm/cputype.h> #include <asm/uaccess.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> @@ -180,6 +181,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_CORTEX_A15: + return KVM_ARM_TARGET_CORTEX_A15; + default: + return -EINVAL; + } +} + int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init) { diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c new file mode 100644 index 00000000000..26ad17310a1 --- /dev/null +++ b/arch/arm/kvm/handle_exit.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_psci.h> +#include <trace/events/kvm.h> + +#include "trace.h" + +#include "trace.h" + +typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); + +static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* SVC called from Hyp mode should never get here */ + kvm_debug("SVC called from Hyp mode shouldn't go here\n"); + BUG(); + return -EINVAL; /* Squash warning */ +} + +static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), + kvm_vcpu_hvc_get_imm(vcpu)); + + if (kvm_psci_call(vcpu)) + return 1; + + kvm_inject_undefined(vcpu); + return 1; +} + +static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (kvm_psci_call(vcpu)) + return 1; + + kvm_inject_undefined(vcpu); + return 1; +} + +static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* The hypervisor should never cause aborts */ + kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", + kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; +} + +static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* This is either an error in the ws. code or an external abort */ + kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", + kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; +} + +/** + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * @vcpu: the vcpu pointer + * @run: the kvm_run structure pointer + * + * Simply sets the wait_for_interrupts flag on the vcpu structure, which will + * halt execution of world-switches and schedule other host processes until + * there is an incoming IRQ or FIQ to the VM. + */ +static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + trace_kvm_wfi(*vcpu_pc(vcpu)); + kvm_vcpu_block(vcpu); + return 1; +} + +static exit_handle_fn arm_exit_handlers[] = { + [HSR_EC_WFI] = kvm_handle_wfi, + [HSR_EC_CP15_32] = kvm_handle_cp15_32, + [HSR_EC_CP15_64] = kvm_handle_cp15_64, + [HSR_EC_CP14_MR] = kvm_handle_cp14_access, + [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store, + [HSR_EC_CP14_64] = kvm_handle_cp14_access, + [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, + [HSR_EC_CP10_ID] = kvm_handle_cp10_id, + [HSR_EC_SVC_HYP] = handle_svc_hyp, + [HSR_EC_HVC] = handle_hvc, + [HSR_EC_SMC] = handle_smc, + [HSR_EC_IABT] = kvm_handle_guest_abort, + [HSR_EC_IABT_HYP] = handle_pabt_hyp, + [HSR_EC_DABT] = kvm_handle_guest_abort, + [HSR_EC_DABT_HYP] = handle_dabt_hyp, +}; + +static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || + !arm_exit_handlers[hsr_ec]) { + kvm_err("Unkown exception class: hsr: %#08x\n", + (unsigned int)kvm_vcpu_get_hsr(vcpu)); + BUG(); + } + + return arm_exit_handlers[hsr_ec]; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exit_handle_fn exit_handler; + + switch (exception_index) { + case ARM_EXCEPTION_IRQ: + return 1; + case ARM_EXCEPTION_UNDEFINED: + kvm_err("Undefined exception in Hyp mode at: %#08lx\n", + kvm_vcpu_get_hyp_pc(vcpu)); + BUG(); + panic("KVM: Hypervisor undefined exception!\n"); + case ARM_EXCEPTION_DATA_ABORT: + case ARM_EXCEPTION_PREF_ABORT: + case ARM_EXCEPTION_HVC: + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + + exit_handler = kvm_get_exit_handler(vcpu); + + return exit_handler(vcpu, run); + default: + kvm_pr_unimpl("Unsupported exception type: %d", + exception_index); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; + } +} diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 8ca87ab0919..f7793df62f5 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -35,15 +35,18 @@ __kvm_hyp_code_start: /******************************************************************** * Flush per-VMID TLBs * - * void __kvm_tlb_flush_vmid(struct kvm *kvm); + * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); * * We rely on the hardware to broadcast the TLB invalidation to all CPUs * inside the inner-shareable domain (which is the case for all v7 * implementations). If we come across a non-IS SMP implementation, we'll * have to use an IPI based mechanism. Until then, we stick to the simple * hardware assisted version. + * + * As v7 does not support flushing per IPA, just nuke the whole TLB + * instead, ignoring the ipa value. */ -ENTRY(__kvm_tlb_flush_vmid) +ENTRY(__kvm_tlb_flush_vmid_ipa) push {r2, r3} add r0, r0, #KVM_VTTBR @@ -60,7 +63,7 @@ ENTRY(__kvm_tlb_flush_vmid) pop {r2, r3} bx lr -ENDPROC(__kvm_tlb_flush_vmid) +ENDPROC(__kvm_tlb_flush_vmid_ipa) /******************************************************************** * Flush TLBs and instruction caches of all CPUs inside the inner-shareable @@ -235,9 +238,9 @@ ENTRY(kvm_call_hyp) * instruction is issued since all traps are disabled when running the host * kernel as per the Hyp-mode initialization at boot time. * - * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc + * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc * below) when the HVC instruction is called from SVC mode (i.e. a guest or the - * host kernel) and they cause a trap to the vector page + offset 0xc when HVC + * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC * instructions are called from within Hyp-mode. * * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 98a870ff1a5..72a12f2171b 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -33,16 +33,16 @@ */ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { - __u32 *dest; + unsigned long *dest; unsigned int len; int mask; if (!run->mmio.is_write) { dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); - memset(dest, 0, sizeof(int)); + *dest = 0; len = run->mmio.len; - if (len > 4) + if (len > sizeof(unsigned long)) return -EINVAL; memcpy(dest, run->mmio.data, len); @@ -50,7 +50,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, *((u64 *)run->mmio.data)); - if (vcpu->arch.mmio_decode.sign_extend && len < 4) { + if (vcpu->arch.mmio_decode.sign_extend && + len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); *dest = (*dest ^ mask) - mask; } @@ -65,40 +66,29 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long rt, len; bool is_write, sign_extend; - if ((vcpu->arch.hsr >> 8) & 1) { + if (kvm_vcpu_dabt_isextabt(vcpu)) { /* cache operation on I/O addr, tell guest unsupported */ - kvm_inject_dabt(vcpu, vcpu->arch.hxfar); + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); return 1; } - if ((vcpu->arch.hsr >> 7) & 1) { + if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ - kvm_inject_dabt(vcpu, vcpu->arch.hxfar); + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); return 1; } - switch ((vcpu->arch.hsr >> 22) & 0x3) { - case 0: - len = 1; - break; - case 1: - len = 2; - break; - case 2: - len = 4; - break; - default: - kvm_err("Hardware is weird: SAS 0b11 is reserved\n"); - return -EFAULT; - } + len = kvm_vcpu_dabt_get_as(vcpu); + if (unlikely(len < 0)) + return len; - is_write = vcpu->arch.hsr & HSR_WNR; - sign_extend = vcpu->arch.hsr & HSR_SSE; - rt = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT; + is_write = kvm_vcpu_dabt_iswrite(vcpu); + sign_extend = kvm_vcpu_dabt_issext(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); if (kvm_vcpu_reg_is_pc(vcpu, rt)) { /* IO memory trying to read/write pc */ - kvm_inject_pabt(vcpu, vcpu->arch.hxfar); + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); return 1; } @@ -112,7 +102,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 0; } @@ -130,7 +120,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, * space do its magic. */ - if (vcpu->arch.hsr & HSR_ISV) { + if (kvm_vcpu_dabt_isvalid(vcpu)) { ret = decode_hsr(vcpu, fault_ipa, &mmio); if (ret) return ret; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 99e07c7dd74..2f12e405640 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -20,7 +20,6 @@ #include <linux/kvm_host.h> #include <linux/io.h> #include <trace/events/kvm.h> -#include <asm/idmap.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/kvm_arm.h> @@ -28,8 +27,6 @@ #include <asm/kvm_mmio.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> -#include <asm/mach/map.h> -#include <trace/events/kvm.h> #include "trace.h" @@ -37,19 +34,9 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); -static void kvm_tlb_flush_vmid(struct kvm *kvm) +static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); -} - -static void kvm_set_pte(pte_t *pte, pte_t new_pte) -{ - pte_val(*pte) = new_pte; - /* - * flush_pmd_entry just takes a void pointer and cleans the necessary - * cache entries, so we can reuse the function for ptes. - */ - flush_pmd_entry(pte); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, @@ -98,33 +85,42 @@ static void free_ptes(pmd_t *pmd, unsigned long addr) } } +static void free_hyp_pgd_entry(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long hyp_addr = KERN_TO_HYP(addr); + + pgd = hyp_pgd + pgd_index(hyp_addr); + pud = pud_offset(pgd, hyp_addr); + + if (pud_none(*pud)) + return; + BUG_ON(pud_bad(*pud)); + + pmd = pmd_offset(pud, hyp_addr); + free_ptes(pmd, addr); + pmd_free(NULL, pmd); + pud_clear(pud); +} + /** * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables * * Assumes this is a page table used strictly in Hyp-mode and therefore contains - * only mappings in the kernel memory area, which is above PAGE_OFFSET. + * either mappings in the kernel memory area (above PAGE_OFFSET), or + * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). */ void free_hyp_pmds(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; unsigned long addr; mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { - pgd = hyp_pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); - - if (pud_none(*pud)) - continue; - BUG_ON(pud_bad(*pud)); - - pmd = pmd_offset(pud, addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); - } + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + free_hyp_pgd_entry(addr); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + free_hyp_pgd_entry(addr); mutex_unlock(&kvm_hyp_pgd_mutex); } @@ -136,7 +132,9 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, struct page *page; for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - pte = pte_offset_kernel(pmd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + + pte = pte_offset_kernel(pmd, hyp_addr); BUG_ON(!virt_addr_valid(addr)); page = virt_to_page(addr); kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); @@ -151,7 +149,9 @@ static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, unsigned long addr; for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - pte = pte_offset_kernel(pmd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + + pte = pte_offset_kernel(pmd, hyp_addr); BUG_ON(pfn_valid(*pfn_base)); kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); (*pfn_base)++; @@ -166,12 +166,13 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, unsigned long addr, next; for (addr = start; addr < end; addr = next) { - pmd = pmd_offset(pud, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + pmd = pmd_offset(pud, hyp_addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { - pte = pte_alloc_one_kernel(NULL, addr); + pte = pte_alloc_one_kernel(NULL, hyp_addr); if (!pte) { kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; @@ -206,17 +207,23 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) unsigned long addr, next; int err = 0; - BUG_ON(start > end); - if (start < PAGE_OFFSET) + if (start >= end) + return -EINVAL; + /* Check for a valid kernel memory mapping */ + if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) + return -EINVAL; + /* Check for a valid kernel IO mapping */ + if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) return -EINVAL; mutex_lock(&kvm_hyp_pgd_mutex); for (addr = start; addr < end; addr = next) { - pgd = hyp_pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + pgd = hyp_pgd + pgd_index(hyp_addr); + pud = pud_offset(pgd, hyp_addr); if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, addr); + pmd = pmd_alloc_one(NULL, hyp_addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; @@ -236,12 +243,13 @@ out: } /** - * create_hyp_mappings - map a kernel virtual address range in Hyp mode + * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range * @to: The virtual kernel end address of the range (exclusive) * - * The same virtual address as the kernel virtual address is also used in - * Hyp-mode mapping to the same underlying physical pages. + * The same virtual address as the kernel virtual address is also used + * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying + * physical pages. * * Note: Wrapping around zero in the "to" address is not supported. */ @@ -251,10 +259,13 @@ int create_hyp_mappings(void *from, void *to) } /** - * create_hyp_io_mappings - map a physical IO range in Hyp mode - * @from: The virtual HYP start address of the range - * @to: The virtual HYP end address of the range (exclusive) + * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode + * @from: The kernel start VA of the range + * @to: The kernel end VA of the range (exclusive) * @addr: The physical start address which gets mapped + * + * The resulting HYP VA is the same as the kernel VA, modulo + * HYP_PAGE_OFFSET. */ int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) { @@ -290,7 +301,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); - clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); + kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; @@ -422,22 +433,22 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; /* ignore calls from kvm_set_spte_hva */ pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); - pmd += pmd_index(addr); get_page(virt_to_page(pud)); - } else - pmd = pmd_offset(pud, addr); + } + + pmd = pmd_offset(pud, addr); /* Create 2nd stage page table mapping - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); - clean_pte_table(pte); + kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); - pte += pte_index(addr); get_page(virt_to_page(pmd)); - } else - pte = pte_offset_kernel(pmd, addr); + } + + pte = pte_offset_kernel(pmd, addr); if (iomap && pte_present(*pte)) return -EFAULT; @@ -446,7 +457,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, old_pte = *pte; kvm_set_pte(pte, *new_pte); if (pte_present(old_pte)) - kvm_tlb_flush_vmid(kvm); + kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pte)); @@ -473,7 +484,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, pfn = __phys_to_pfn(pa); for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { - pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR); + pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); + kvm_set_s2pte_writable(&pte); ret = mmu_topup_memory_cache(&cache, 2, 2); if (ret) @@ -492,29 +504,6 @@ out: return ret; } -static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) -{ - /* - * If we are going to insert an instruction page and the icache is - * either VIPT or PIPT, there is a potential problem where the host - * (or another VM) may have used the same page as this guest, and we - * read incorrect data from the icache. If we're using a PIPT cache, - * we can invalidate just that page, but if we are using a VIPT cache - * we need to invalidate the entire icache - damn shame - as written - * in the ARM ARM (DDI 0406C.b - Page B3-1393). - * - * VIVT caches are tagged using both the ASID and the VMID and doesn't - * need any kind of flushing (DDI 0406C.b - Page B3-1392). - */ - if (icache_is_pipt()) { - unsigned long hva = gfn_to_hva(kvm, gfn); - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); - } else if (!icache_is_vivt_asid_tagged()) { - /* any kind of VIPT cache */ - __flush_icache_all(); - } -} - static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn, struct kvm_memory_slot *memslot, unsigned long fault_status) @@ -526,7 +515,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long mmu_seq; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; - write_fault = kvm_is_write_fault(vcpu->arch.hsr); + write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; @@ -560,7 +549,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (writable) { - pte_val(new_pte) |= L_PTE_S2_RDWR; + kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); @@ -585,7 +574,6 @@ out_unlock: */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { - unsigned long hsr_ec; unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; @@ -593,18 +581,17 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn_t gfn; int ret, idx; - hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; - is_iabt = (hsr_ec == HSR_EC_IABT); - fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr, - vcpu->arch.hxfar, fault_ipa); + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE); + fault_status = kvm_vcpu_trap_get_fault(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { - kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n", - hsr_ec, fault_status); + kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), fault_status); return -EFAULT; } @@ -614,7 +601,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { if (is_iabt) { /* Prefetch Abort on I/O address */ - kvm_inject_pabt(vcpu, vcpu->arch.hxfar); + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); ret = 1; goto out_unlock; } @@ -626,8 +613,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - /* Adjust page offset */ - fault_ipa |= vcpu->arch.hxfar & ~PAGE_MASK; + /* + * The IPA is reported as [MAX:12], so we need to + * complement it with the bottom 12 bits from the + * faulting VA. This is always 12 bits, irrespective + * of the page size. + */ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); ret = io_mem_abort(vcpu, run, fault_ipa); goto out_unlock; } @@ -682,7 +674,7 @@ static void handle_hva_to_gpa(struct kvm *kvm, static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); - kvm_tlb_flush_vmid(kvm); + kvm_tlb_flush_vmid_ipa(kvm, gpa); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -776,7 +768,7 @@ void kvm_clear_hyp_idmap(void) pmd = pmd_offset(pud, addr); pud_clear(pud); - clean_pmd_entry(pmd); + kvm_clean_pmd_entry(pmd); pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); } while (pgd++, addr = next, addr < end); } diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index 0e4cfe123b3..17c5ac7d10e 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -1477,7 +1477,7 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) if (addr & ~KVM_PHYS_MASK) return -E2BIG; - if (addr & ~PAGE_MASK) + if (addr & (SZ_4K - 1)) return -EINVAL; mutex_lock(&kvm->lock); diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 8a68f1ec66b..577298ed5a4 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -300,7 +300,7 @@ void __init omap3xxx_check_revision(void) * If the processor type is Cortex-A8 and the revision is 0x0 * it means its Cortex r0p0 which is 3430 ES1.0. */ - cpuid = read_cpuid(CPUID_ID); + cpuid = read_cpuid_id(); if ((((cpuid >> 4) & 0xfff) == 0xc08) && ((cpuid & 0xf) == 0x0)) { omap_revision = OMAP3430_REV_ES1_0; cpu_rev = "1.0"; @@ -460,7 +460,7 @@ void __init omap4xxx_check_revision(void) * Use ARM register to detect the correct ES version */ if (!rev && (hawkeye != 0xb94e) && (hawkeye != 0xb975)) { - idcode = read_cpuid(CPUID_ID); + idcode = read_cpuid_id(); rev = (idcode & 0xf) - 1; } diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index e7a449758ab..f0897732a96 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -202,7 +202,7 @@ static void __init omap4_smp_init_cpus(void) unsigned int i = 0, ncores = 1, cpu_id; /* Use ARM cpuid check here, as SoC detection will not work so early */ - cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; + cpu_id = read_cpuid_id() & CPU_MASK; if (cpu_id == CPU_CORTEX_A9) { /* * Currently we can't call ioremap here because diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 4045c4931a3..35955b54944 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -397,6 +397,13 @@ config CPU_V7 select CPU_PABRT_V7 select CPU_TLB_V7 if MMU +config CPU_THUMBONLY + bool + # There are no CPUs available with MMU that don't implement an ARM ISA: + depends on !MMU + help + Select this if your CPU doesn't support the 32 bit ARM instructions. + # Figure out what processor architecture version we should be using. # This defines the compiler instruction set which depends on the machine type. config CPU_32v3 @@ -605,7 +612,7 @@ config ARCH_DMA_ADDR_T_64BIT bool config ARM_THUMB - bool "Support Thumb user binaries" + bool "Support Thumb user binaries" if !CPU_THUMBONLY depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || CPU_V7 || CPU_FEROCEON default y help diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index db26e2e543f..6f4585b8907 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -961,12 +961,14 @@ static int __init alignment_init(void) return -ENOMEM; #endif +#ifdef CONFIG_CPU_CP15 if (cpu_is_v6_unaligned()) { cr_alignment &= ~CR_A; cr_no_alignment &= ~CR_A; set_cr(cr_alignment); ai_usermode = safe_usermode(ai_usermode, false); } +#endif hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN, "alignment exception"); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index a84ff763ac3..e0d8565671a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -113,6 +113,7 @@ static struct cachepolicy cache_policies[] __initdata = { } }; +#ifdef CONFIG_CPU_CP15 /* * These are useful for identifying cache coherency * problems by allowing the cache or the cache and @@ -211,6 +212,22 @@ void adjust_cr(unsigned long mask, unsigned long set) } #endif +#else /* ifdef CONFIG_CPU_CP15 */ + +static int __init early_cachepolicy(char *p) +{ + pr_warning("cachepolicy kernel parameter not supported without cp15\n"); +} +early_param("cachepolicy", early_cachepolicy); + +static int __init noalign_setup(char *__unused) +{ + pr_warning("noalign kernel parameter not supported without cp15\n"); +} +__setup("noalign", noalign_setup); + +#endif /* ifdef CONFIG_CPU_CP15 / else */ + #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index e2931de32e6..5533a88c96a 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; + raw_spin_lock(&irq_controller_lock); mask = 0xff << shift; bit = gic_cpu_map[cpu] << shift; - - raw_spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); raw_spin_unlock(&irq_controller_lock); @@ -652,7 +651,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic) void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { int cpu; - unsigned long map = 0; + unsigned long flags, map = 0; + + raw_spin_lock_irqsave(&irq_controller_lock, flags); /* Convert our logical CPU mask into a physical one. */ for_each_cpu(cpu, mask) @@ -666,9 +667,145 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* this always happens on GIC0 */ writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } #endif +#ifdef CONFIG_BL_SWITCHER +/* + * gic_send_sgi - send a SGI directly to given CPU interface number + * + * cpu_id: the ID for the destination CPU interface + * irq: the IPI number to send a SGI for + */ +void gic_send_sgi(unsigned int cpu_id, unsigned int irq) +{ + BUG_ON(cpu_id >= NR_GIC_CPU_IF); + cpu_id = 1 << cpu_id; + /* this always happens on GIC0 */ + writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); +} + +/* + * gic_get_cpu_id - get the CPU interface ID for the specified CPU + * + * @cpu: the logical CPU number to get the GIC ID for. + * + * Return the CPU interface ID for the given logical CPU number, + * or -1 if the CPU number is too large or the interface ID is + * unknown (more than one bit set). + */ +int gic_get_cpu_id(unsigned int cpu) +{ + unsigned int cpu_bit; + + if (cpu >= NR_GIC_CPU_IF) + return -1; + cpu_bit = gic_cpu_map[cpu]; + if (cpu_bit & (cpu_bit - 1)) + return -1; + return __ffs(cpu_bit); +} + +/* + * gic_migrate_target - migrate IRQs to another PU interface + * + * @new_cpu_id: the CPU target ID to migrate IRQs to + * + * Migrate all peripheral interrupts with a target matching the current CPU + * to the interface corresponding to @new_cpu_id. The CPU interface mapping + * is also updated. Targets to other CPU interfaces are unchanged. + * This must be called with IRQs locally disabled. + */ +void gic_migrate_target(unsigned int new_cpu_id) +{ + unsigned int old_cpu_id, gic_irqs, gic_nr = 0; + void __iomem *dist_base; + int i, ror_val, cpu = smp_processor_id(); + u32 val, old_mask, active_mask; + + if (gic_nr >= MAX_GIC_NR) + BUG(); + + dist_base = gic_data_dist_base(&gic_data[gic_nr]); + if (!dist_base) + return; + gic_irqs = gic_data[gic_nr].gic_irqs; + + old_cpu_id = __ffs(gic_cpu_map[cpu]); + old_mask = 0x01010101 << old_cpu_id; + ror_val = (old_cpu_id - new_cpu_id) & 31; + + raw_spin_lock(&irq_controller_lock); + + gic_cpu_map[cpu] = 1 << new_cpu_id; + + for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) { + val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4); + active_mask = val & old_mask; + if (active_mask) { + val &= ~active_mask; + val |= ror32(active_mask, ror_val); + writel_relaxed(val, dist_base + GIC_DIST_TARGET + i * 4); + } + } + + raw_spin_unlock(&irq_controller_lock); + + /* + * Now let's migrate and clear any potential SGIs that might be + * pending for us (old_cpu_id). Since GIC_DIST_SGI_PENDING_SET + * is a banked register, we can only forward the SGI using + * GIC_DIST_SOFTINT. The original SGI source is lost but Linux + * doesn't use that information anyway. + * + * For the same reason we do not adjust SGI source information + * for previously sent SGIs by us to other CPUs either. + */ + for (i = 0; i < 16; i += 4) { + int j; + val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i); + if (!val) + continue; + writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i); + for (j = i; j < i + 4; j++) { + if (val & 0xff) + writel_relaxed((1 << (new_cpu_id + 16)) | j, + dist_base + GIC_DIST_SOFTINT); + val >>= 8; + } + } +} + +/* + * gic_get_sgir_physaddr - get the physical address for the SGI register + * + * REturn the physical address of the SGI register to be used + * by some early assembly code when the kernel is not yet available. + */ +static unsigned long gic_dist_physaddr; + +unsigned long gic_get_sgir_physaddr(void) +{ + if (!gic_dist_physaddr) + return 0; + return gic_dist_physaddr + GIC_DIST_SOFTINT; +} + +void __init gic_init_physaddr(struct device_node *node) +{ + struct resource res; + if (of_address_to_resource(node, 0, &res) == 0) { + gic_dist_physaddr = res.start; + pr_info("GIC physical location is %#lx\n", gic_dist_physaddr); + } +} + +#else +#define gic_init_physaddr(node) do { } while(0) +#endif + static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { @@ -850,6 +987,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent) percpu_offset = 0; gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node); + if (!gic_cnt) + gic_init_physaddr(node); if (parent) { irq = irq_of_parse_and_map(node, 0); diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index b5696108c06..40643ca79cd 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -31,6 +31,8 @@ #define GIC_DIST_TARGET 0x800 #define GIC_DIST_CONFIG 0xc00 #define GIC_DIST_SOFTINT 0xf00 +#define GIC_DIST_SGI_PENDING_CLEAR 0xf10 +#define GIC_DIST_SGI_PENDING_SET 0xf20 #define GICH_HCR 0x0 #define GICH_VTR 0x4 @@ -75,6 +77,11 @@ static inline void gic_init(unsigned int nr, int start, gic_init_bases(nr, start, dist, cpu, 0, NULL); } +void gic_send_sgi(unsigned int cpu_id, unsigned int irq); +int gic_get_cpu_id(unsigned int cpu); +void gic_migrate_target(unsigned int new_cpu_id); +unsigned long gic_get_sgir_physaddr(void); + #endif /* __ASSEMBLY */ #endif diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h new file mode 100644 index 00000000000..f76dd4de625 --- /dev/null +++ b/include/trace/events/power_cpu_migrate.h @@ -0,0 +1,67 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power + +#if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_CPU_MIGRATE_H + +#include <linux/tracepoint.h> + +#define __cpu_migrate_proto \ + TP_PROTO(u64 timestamp, \ + u32 cpu_hwid) +#define __cpu_migrate_args \ + TP_ARGS(timestamp, \ + cpu_hwid) + +DECLARE_EVENT_CLASS(cpu_migrate, + + __cpu_migrate_proto, + __cpu_migrate_args, + + TP_STRUCT__entry( + __field(u64, timestamp ) + __field(u32, cpu_hwid ) + ), + + TP_fast_assign( + __entry->timestamp = timestamp; + __entry->cpu_hwid = cpu_hwid; + ), + + TP_printk("timestamp=%llu cpu_hwid=0x%08lX", + (unsigned long long)__entry->timestamp, + (unsigned long)__entry->cpu_hwid + ) +); + +#define __define_cpu_migrate_event(name) \ + DEFINE_EVENT(cpu_migrate, cpu_migrate_##name, \ + __cpu_migrate_proto, \ + __cpu_migrate_args \ + ) + +__define_cpu_migrate_event(begin); +__define_cpu_migrate_event(finish); +__define_cpu_migrate_event(current); + +#undef __define_cpu_migrate +#undef __cpu_migrate_proto +#undef __cpu_migrate_args + +/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */ +#ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING +#define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING + +/* + * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate + * a whole-cluster migration: + */ +#define CPU_MIGRATE_ALL_CPUS 0x80000000U +#endif + +#endif /* _TRACE_POWER_CPU_MIGRATE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE power_cpu_migrate +#include <trace/define_trace.h> |