aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/Kconfig6
-rw-r--r--arch/arm/kvm/Makefile4
-rw-r--r--arch/arm/kvm/arch_timer.c7
-rw-r--r--arch/arm/kvm/arm.c330
-rw-r--r--arch/arm/kvm/coproc.c28
-rw-r--r--arch/arm/kvm/coproc.h4
-rw-r--r--arch/arm/kvm/emulate.c75
-rw-r--r--arch/arm/kvm/guest.c17
-rw-r--r--arch/arm/kvm/handle_exit.c164
-rw-r--r--arch/arm/kvm/init.S78
-rw-r--r--arch/arm/kvm/interrupts.S13
-rw-r--r--arch/arm/kvm/mmio.c46
-rw-r--r--arch/arm/kvm/mmu.c550
-rw-r--r--arch/arm/kvm/perf.c68
-rw-r--r--arch/arm/kvm/vgic.c2
15 files changed, 806 insertions, 586 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 49dd64e579c..370e1a8af6a 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -41,9 +41,9 @@ config KVM_ARM_HOST
Provides host support for ARM processors.
config KVM_ARM_MAX_VCPUS
- int "Number maximum supported virtual CPUs per VM"
- depends on KVM_ARM_HOST
- default 4
+ int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
+ default 4 if KVM_ARM_HOST
+ default 0
help
Static number of max supported virtual CPUs per VM.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index fc96ce6f235..53c5ed83d16 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -17,7 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
obj-y += kvm-arm.o init.o interrupts.o
-obj-y += arm.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o
+obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
+obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
index 6ac938d4629..c55b6089e92 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/arch/arm/kvm/arch_timer.c
@@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
+#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
#include <asm/kvm_vgic.h>
@@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
+ timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
vcpu->arch.timer_cpu.irq->irq,
vcpu->arch.timer_cpu.irq->level);
@@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
cycle_t cval, now;
u64 ns;
- /* Check if the timer is enabled and unmasked first */
- if ((timer->cntv_ctl & 3) != 1)
+ if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
+ !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
return;
cval = timer->cntv_cval;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c1fe498983a..ef1703b9587 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -16,6 +16,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
@@ -30,11 +31,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-#include <asm/unified.h>
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/mman.h>
-#include <asm/cputype.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/virt.h>
@@ -44,14 +43,13 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_psci.h>
-#include <asm/opcodes.h>
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
#endif
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
+static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
static unsigned long hyp_default_vectors;
/* Per-CPU variable containing the currently running vcpu. */
@@ -209,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_MAX_VCPUS;
break;
default:
- r = 0;
+ r = kvm_arch_dev_ioctl_check_extension(ext);
break;
}
return r;
@@ -221,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-int kvm_arch_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- int user_alloc)
-{
- return 0;
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
- bool user_alloc)
+ enum kvm_mr_change change)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- bool user_alloc)
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
{
}
@@ -304,22 +293,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
}
-int __attribute_const__ kvm_target_cpu(void)
-{
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
-
- if (implementor != ARM_CPU_IMP_ARM)
- return -EINVAL;
-
- switch (part_number) {
- case ARM_CPU_PART_CORTEX_A15:
- return KVM_ARM_TARGET_CORTEX_A15;
- default:
- return -EINVAL;
- }
-}
-
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
int ret;
@@ -345,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = cpu;
- vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
+ vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
/*
* Check whether this vcpu requires the cache to be flushed on
@@ -482,163 +455,6 @@ static void update_vttbr(struct kvm *kvm)
spin_unlock(&kvm_vmid_lock);
}
-static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- /* SVC called from Hyp mode should never get here */
- kvm_debug("SVC called from Hyp mode shouldn't go here\n");
- BUG();
- return -EINVAL; /* Squash warning */
-}
-
-static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
- vcpu->arch.hsr & HSR_HVC_IMM_MASK);
-
- if (kvm_psci_call(vcpu))
- return 1;
-
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- if (kvm_psci_call(vcpu))
- return 1;
-
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- /* The hypervisor should never cause aborts */
- kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
- vcpu->arch.hxfar, vcpu->arch.hsr);
- return -EFAULT;
-}
-
-static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- /* This is either an error in the ws. code or an external abort */
- kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
- vcpu->arch.hxfar, vcpu->arch.hsr);
- return -EFAULT;
-}
-
-typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
-static exit_handle_fn arm_exit_handlers[] = {
- [HSR_EC_WFI] = kvm_handle_wfi,
- [HSR_EC_CP15_32] = kvm_handle_cp15_32,
- [HSR_EC_CP15_64] = kvm_handle_cp15_64,
- [HSR_EC_CP14_MR] = kvm_handle_cp14_access,
- [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store,
- [HSR_EC_CP14_64] = kvm_handle_cp14_access,
- [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access,
- [HSR_EC_CP10_ID] = kvm_handle_cp10_id,
- [HSR_EC_SVC_HYP] = handle_svc_hyp,
- [HSR_EC_HVC] = handle_hvc,
- [HSR_EC_SMC] = handle_smc,
- [HSR_EC_IABT] = kvm_handle_guest_abort,
- [HSR_EC_IABT_HYP] = handle_pabt_hyp,
- [HSR_EC_DABT] = kvm_handle_guest_abort,
- [HSR_EC_DABT_HYP] = handle_dabt_hyp,
-};
-
-/*
- * A conditional instruction is allowed to trap, even though it
- * wouldn't be executed. So let's re-implement the hardware, in
- * software!
- */
-static bool kvm_condition_valid(struct kvm_vcpu *vcpu)
-{
- unsigned long cpsr, cond, insn;
-
- /*
- * Exception Code 0 can only happen if we set HCR.TGE to 1, to
- * catch undefined instructions, and then we won't get past
- * the arm_exit_handlers test anyway.
- */
- BUG_ON(((vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT) == 0);
-
- /* Top two bits non-zero? Unconditional. */
- if (vcpu->arch.hsr >> 30)
- return true;
-
- cpsr = *vcpu_cpsr(vcpu);
-
- /* Is condition field valid? */
- if ((vcpu->arch.hsr & HSR_CV) >> HSR_CV_SHIFT)
- cond = (vcpu->arch.hsr & HSR_COND) >> HSR_COND_SHIFT;
- else {
- /* This can happen in Thumb mode: examine IT state. */
- unsigned long it;
-
- it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
-
- /* it == 0 => unconditional. */
- if (it == 0)
- return true;
-
- /* The cond for this insn works out as the top 4 bits. */
- cond = (it >> 4);
- }
-
- /* Shift makes it look like an ARM-mode instruction */
- insn = cond << 28;
- return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
-}
-
-/*
- * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
- * proper exit to QEMU.
- */
-static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index)
-{
- unsigned long hsr_ec;
-
- switch (exception_index) {
- case ARM_EXCEPTION_IRQ:
- return 1;
- case ARM_EXCEPTION_UNDEFINED:
- kvm_err("Undefined exception in Hyp mode at: %#08x\n",
- vcpu->arch.hyp_pc);
- BUG();
- panic("KVM: Hypervisor undefined exception!\n");
- case ARM_EXCEPTION_DATA_ABORT:
- case ARM_EXCEPTION_PREF_ABORT:
- case ARM_EXCEPTION_HVC:
- hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
-
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers)
- || !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unkown exception class: %#08lx, "
- "hsr: %#08x\n", hsr_ec,
- (unsigned int)vcpu->arch.hsr);
- BUG();
- }
-
- /*
- * See ARM ARM B1.14.1: "Hyp traps on instructions
- * that fail their condition code check"
- */
- if (!kvm_condition_valid(vcpu)) {
- bool is_wide = vcpu->arch.hsr & HSR_IL;
- kvm_skip_instr(vcpu, is_wide);
- return 1;
- }
-
- return arm_exit_handlers[hsr_ec](vcpu, run);
- default:
- kvm_pr_unimpl("Unsupported exception type: %d",
- exception_index);
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- return 0;
- }
-}
-
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.has_run_once))
@@ -676,6 +492,11 @@ static void vcpu_pause(struct kvm_vcpu *vcpu)
wait_event_interruptible(*wq, !vcpu->arch.pause);
}
+static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.target >= 0;
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -692,8 +513,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int ret;
sigset_t sigsaved;
- /* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */
- if (unlikely(vcpu->arch.target < 0))
+ if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
ret = kvm_vcpu_first_run_init(vcpu);
@@ -815,7 +635,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
return 0;
}
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+ bool line_status)
{
u32 irq = irq_level->irq;
unsigned int irq_type, vcpu_idx, irq_num;
@@ -893,6 +714,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+
+ if (unlikely(!kvm_vcpu_initialized(vcpu)))
+ return -ENOEXEC;
+
if (copy_from_user(&reg, argp, sizeof(reg)))
return -EFAULT;
if (ioctl == KVM_SET_ONE_REG)
@@ -905,6 +730,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_reg_list reg_list;
unsigned n;
+ if (unlikely(!kvm_vcpu_initialized(vcpu)))
+ return -ENOEXEC;
+
if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
return -EFAULT;
n = reg_list.n;
@@ -970,40 +798,48 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
-static void cpu_init_hyp_mode(void *vector)
+static void cpu_init_hyp_mode(void *dummy)
{
+ unsigned long long boot_pgd_ptr;
unsigned long long pgd_ptr;
- unsigned long pgd_low, pgd_high;
unsigned long hyp_stack_ptr;
unsigned long stack_page;
unsigned long vector_ptr;
/* Switch from the HYP stub to our own HYP init vector */
- __hyp_set_vectors((unsigned long)vector);
+ __hyp_set_vectors(kvm_get_idmap_vector());
+ boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
- pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
- pgd_high = (pgd_ptr >> 32ULL);
stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
vector_ptr = (unsigned long)__kvm_hyp_vector;
- /*
- * Call initialization code, and switch to the full blown
- * HYP code. The init code doesn't need to preserve these registers as
- * r1-r3 and r12 are already callee save according to the AAPCS.
- * Note that we slightly misuse the prototype by casing the pgd_low to
- * a void *.
- */
- kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+ __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
}
+static int hyp_init_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *cpu)
+{
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ cpu_init_hyp_mode(NULL);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block hyp_init_cpu_nb = {
+ .notifier_call = hyp_init_cpu_notify,
+};
+
/**
* Inits Hyp-mode on all online CPUs
*/
static int init_hyp_mode(void)
{
- phys_addr_t init_phys_addr;
int cpu;
int err = 0;
@@ -1036,24 +872,6 @@ static int init_hyp_mode(void)
}
/*
- * Execute the init code on each CPU.
- *
- * Note: The stack is not mapped yet, so don't do anything else than
- * initializing the hypervisor mode on each CPU using a local stack
- * space for temporary storage.
- */
- init_phys_addr = virt_to_phys(__kvm_hyp_init);
- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, cpu_init_hyp_mode,
- (void *)(long)init_phys_addr, 1);
- }
-
- /*
- * Unmap the identity mapping
- */
- kvm_clear_hyp_idmap();
-
- /*
* Map the Hyp-code called directly from the host
*/
err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
@@ -1076,33 +894,38 @@ static int init_hyp_mode(void)
}
/*
- * Map the host VFP structures
+ * Map the host CPU structures
*/
- kvm_host_vfp_state = alloc_percpu(struct vfp_hard_struct);
- if (!kvm_host_vfp_state) {
+ kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
+ if (!kvm_host_cpu_state) {
err = -ENOMEM;
- kvm_err("Cannot allocate host VFP state\n");
+ kvm_err("Cannot allocate host CPU state\n");
goto out_free_mappings;
}
for_each_possible_cpu(cpu) {
- struct vfp_hard_struct *vfp;
+ kvm_cpu_context_t *cpu_ctxt;
- vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
- err = create_hyp_mappings(vfp, vfp + 1);
+ cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+ err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1);
if (err) {
- kvm_err("Cannot map host VFP state: %d\n", err);
- goto out_free_vfp;
+ kvm_err("Cannot map host CPU state: %d\n", err);
+ goto out_free_context;
}
}
/*
+ * Execute the init code on each CPU.
+ */
+ on_each_cpu(cpu_init_hyp_mode, NULL, 1);
+
+ /*
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
if (err)
- goto out_free_vfp;
+ goto out_free_context;
#ifdef CONFIG_KVM_ARM_VGIC
vgic_present = true;
@@ -1115,12 +938,19 @@ static int init_hyp_mode(void)
if (err)
goto out_free_mappings;
+#ifndef CONFIG_HOTPLUG_CPU
+ free_boot_hyp_pgd();
+#endif
+
+ kvm_perf_init();
+
kvm_info("Hyp mode initialized successfully\n");
+
return 0;
-out_free_vfp:
- free_percpu(kvm_host_vfp_state);
+out_free_context:
+ free_percpu(kvm_host_cpu_state);
out_free_mappings:
- free_hyp_pmds();
+ free_hyp_pgds();
out_free_stack_pages:
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
@@ -1129,27 +959,42 @@ out_err:
return err;
}
+static void check_kvm_target_cpu(void *ret)
+{
+ *(int *)ret = kvm_target_cpu();
+}
+
/**
* Initialize Hyp-mode and memory mappings on all CPUs.
*/
int kvm_arch_init(void *opaque)
{
int err;
+ int ret, cpu;
if (!is_hyp_mode_available()) {
kvm_err("HYP mode not available\n");
return -ENODEV;
}
- if (kvm_target_cpu() < 0) {
- kvm_err("Target CPU not supported!\n");
- return -ENODEV;
+ for_each_online_cpu(cpu) {
+ smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
+ if (ret < 0) {
+ kvm_err("Error, CPU %d not supported!\n", cpu);
+ return -ENODEV;
+ }
}
err = init_hyp_mode();
if (err)
goto out_err;
+ err = register_cpu_notifier(&hyp_init_cpu_nb);
+ if (err) {
+ kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
+ goto out_err;
+ }
+
kvm_coproc_table_init();
return 0;
out_err:
@@ -1159,6 +1004,7 @@ out_err:
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
+ kvm_perf_teardown();
}
static int arm_init(void)
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 7bed7556077..8eea97be1ed 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -76,7 +76,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
const struct coproc_reg *r)
{
- u32 val;
+ unsigned long val;
int cpu;
if (!p->is_write)
@@ -293,12 +293,12 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
if (likely(r->access(vcpu, params, r))) {
/* Skip instruction, since it was emulated */
- kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
/* If access function fails, it should complain. */
} else {
- kvm_err("Unsupported guest CP15 access at: %08x\n",
+ kvm_err("Unsupported guest CP15 access at: %08lx\n",
*vcpu_pc(vcpu));
print_cp_instr(params);
}
@@ -315,14 +315,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params;
- params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
- params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
- params.is_write = ((vcpu->arch.hsr & 1) == 0);
+ params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+ params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
+ params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
params.is_64bit = true;
- params.Op1 = (vcpu->arch.hsr >> 16) & 0xf;
+ params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
params.Op2 = 0;
- params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf;
+ params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
params.CRn = 0;
return emulate_cp15(vcpu, &params);
@@ -347,14 +347,14 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params;
- params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
- params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
- params.is_write = ((vcpu->arch.hsr & 1) == 0);
+ params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+ params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
+ params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
params.is_64bit = false;
- params.CRn = (vcpu->arch.hsr >> 10) & 0xf;
- params.Op1 = (vcpu->arch.hsr >> 14) & 0x7;
- params.Op2 = (vcpu->arch.hsr >> 17) & 0x7;
+ params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
+ params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 14) & 0x7;
+ params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
params.Rt2 = 0;
return emulate_cp15(vcpu, &params);
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 992adfafa2f..b7301d3e479 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -84,7 +84,7 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
const struct coproc_params *params)
{
- kvm_debug("CP15 write to read-only register at: %08x\n",
+ kvm_debug("CP15 write to read-only register at: %08lx\n",
*vcpu_pc(vcpu));
print_cp_instr(params);
return false;
@@ -93,7 +93,7 @@ static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
const struct coproc_params *params)
{
- kvm_debug("CP15 read to write-only register at: %08x\n",
+ kvm_debug("CP15 read to write-only register at: %08lx\n",
*vcpu_pc(vcpu));
print_cp_instr(params);
return false;
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index d61450ac666..bdede9e7da5 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -20,6 +20,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
+#include <asm/opcodes.h>
#include <trace/events/kvm.h>
#include "trace.h"
@@ -109,10 +110,10 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
* Return a pointer to the register number valid in the current mode of
* the virtual CPU.
*/
-u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
+unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
{
- u32 *reg_array = (u32 *)&vcpu->arch.regs;
- u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK;
+ unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs;
+ unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
case USR_MODE...SVC_MODE:
@@ -141,9 +142,9 @@ u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
/*
* Return the SPSR for the current mode of the virtual CPU.
*/
-u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
+unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
{
- u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK;
+ unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
case SVC_MODE:
return &vcpu->arch.regs.KVM_ARM_SVC_spsr;
@@ -160,20 +161,48 @@ u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
}
}
-/**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
- * @vcpu: the vcpu pointer
- * @run: the kvm_run structure pointer
- *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+/*
+ * A conditional instruction is allowed to trap, even though it
+ * wouldn't be executed. So let's re-implement the hardware, in
+ * software!
*/
-int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+bool kvm_condition_valid(struct kvm_vcpu *vcpu)
{
- trace_kvm_wfi(*vcpu_pc(vcpu));
- kvm_vcpu_block(vcpu);
- return 1;
+ unsigned long cpsr, cond, insn;
+
+ /*
+ * Exception Code 0 can only happen if we set HCR.TGE to 1, to
+ * catch undefined instructions, and then we won't get past
+ * the arm_exit_handlers test anyway.
+ */
+ BUG_ON(!kvm_vcpu_trap_get_class(vcpu));
+
+ /* Top two bits non-zero? Unconditional. */
+ if (kvm_vcpu_get_hsr(vcpu) >> 30)
+ return true;
+
+ cpsr = *vcpu_cpsr(vcpu);
+
+ /* Is condition field valid? */
+ if ((kvm_vcpu_get_hsr(vcpu) & HSR_CV) >> HSR_CV_SHIFT)
+ cond = (kvm_vcpu_get_hsr(vcpu) & HSR_COND) >> HSR_COND_SHIFT;
+ else {
+ /* This can happen in Thumb mode: examine IT state. */
+ unsigned long it;
+
+ it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+ /* it == 0 => unconditional. */
+ if (it == 0)
+ return true;
+
+ /* The cond for this insn works out as the top 4 bits. */
+ cond = (it >> 4);
+ }
+
+ /* Shift makes it look like an ARM-mode instruction */
+ insn = cond << 28;
+ return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
}
/**
@@ -257,9 +286,9 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
*/
void kvm_inject_undefined(struct kvm_vcpu *vcpu)
{
- u32 new_lr_value;
- u32 new_spsr_value;
- u32 cpsr = *vcpu_cpsr(vcpu);
+ unsigned long new_lr_value;
+ unsigned long new_spsr_value;
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
bool is_thumb = (cpsr & PSR_T_BIT);
u32 vect_offset = 4;
@@ -291,9 +320,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
*/
static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
{
- u32 new_lr_value;
- u32 new_spsr_value;
- u32 cpsr = *vcpu_cpsr(vcpu);
+ unsigned long new_lr_value;
+ unsigned long new_spsr_value;
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
bool is_thumb = (cpsr & PSR_T_BIT);
u32 vect_offset;
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 2339d9609d3..152d0361218 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <asm/cputype.h>
#include <asm/uaccess.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
@@ -180,6 +181,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+int __attribute_const__ kvm_target_cpu(void)
+{
+ unsigned long implementor = read_cpuid_implementor();
+ unsigned long part_number = read_cpuid_part_number();
+
+ if (implementor != ARM_CPU_IMP_ARM)
+ return -EINVAL;
+
+ switch (part_number) {
+ case ARM_CPU_PART_CORTEX_A15:
+ return KVM_ARM_TARGET_CORTEX_A15;
+ default:
+ return -EINVAL;
+ }
+}
+
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
new file mode 100644
index 00000000000..3d74a0be47d
--- /dev/null
+++ b/arch/arm/kvm/handle_exit.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+#include "trace.h"
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* SVC called from Hyp mode should never get here */
+ kvm_debug("SVC called from Hyp mode shouldn't go here\n");
+ BUG();
+ return -EINVAL; /* Squash warning */
+}
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+ kvm_vcpu_hvc_get_imm(vcpu));
+
+ if (kvm_psci_call(vcpu))
+ return 1;
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ if (kvm_psci_call(vcpu))
+ return 1;
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* The hypervisor should never cause aborts */
+ kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
+ kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
+ return -EFAULT;
+}
+
+static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* This is either an error in the ws. code or an external abort */
+ kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
+ kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
+ return -EFAULT;
+}
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu: the vcpu pointer
+ * @run: the kvm_run structure pointer
+ *
+ * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
+ * halt execution of world-switches and schedule other host processes until
+ * there is an incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ trace_kvm_wfi(*vcpu_pc(vcpu));
+ kvm_vcpu_block(vcpu);
+ return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+ [HSR_EC_WFI] = kvm_handle_wfi,
+ [HSR_EC_CP15_32] = kvm_handle_cp15_32,
+ [HSR_EC_CP15_64] = kvm_handle_cp15_64,
+ [HSR_EC_CP14_MR] = kvm_handle_cp14_access,
+ [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store,
+ [HSR_EC_CP14_64] = kvm_handle_cp14_access,
+ [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access,
+ [HSR_EC_CP10_ID] = kvm_handle_cp10_id,
+ [HSR_EC_SVC_HYP] = handle_svc_hyp,
+ [HSR_EC_HVC] = handle_hvc,
+ [HSR_EC_SMC] = handle_smc,
+ [HSR_EC_IABT] = kvm_handle_guest_abort,
+ [HSR_EC_IABT_HYP] = handle_pabt_hyp,
+ [HSR_EC_DABT] = kvm_handle_guest_abort,
+ [HSR_EC_DABT_HYP] = handle_dabt_hyp,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+ u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+ if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+ !arm_exit_handlers[hsr_ec]) {
+ kvm_err("Unknown exception class: hsr: %#08x\n",
+ (unsigned int)kvm_vcpu_get_hsr(vcpu));
+ BUG();
+ }
+
+ return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ exit_handle_fn exit_handler;
+
+ switch (exception_index) {
+ case ARM_EXCEPTION_IRQ:
+ return 1;
+ case ARM_EXCEPTION_UNDEFINED:
+ kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
+ kvm_vcpu_get_hyp_pc(vcpu));
+ BUG();
+ panic("KVM: Hypervisor undefined exception!\n");
+ case ARM_EXCEPTION_DATA_ABORT:
+ case ARM_EXCEPTION_PREF_ABORT:
+ case ARM_EXCEPTION_HVC:
+ /*
+ * See ARM ARM B1.14.1: "Hyp traps on instructions
+ * that fail their condition code check"
+ */
+ if (!kvm_condition_valid(vcpu)) {
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ return 1;
+ }
+
+ exit_handler = kvm_get_exit_handler(vcpu);
+
+ return exit_handler(vcpu, run);
+ default:
+ kvm_pr_unimpl("Unsupported exception type: %d",
+ exception_index);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return 0;
+ }
+}
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 9f37a79b880..f048338135f 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -21,13 +21,33 @@
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
/********************************************************************
* Hypervisor initialization
* - should be called with:
- * r0,r1 = Hypervisor pgd pointer
- * r2 = top of Hyp stack (kernel VA)
- * r3 = pointer to hyp vectors
+ * r0 = top of Hyp stack (kernel VA)
+ * r1 = pointer to hyp vectors
+ * r2,r3 = Hypervisor pgd pointer
+ *
+ * The init scenario is:
+ * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd,
+ * runtime stack, runtime vectors
+ * - Enable the MMU with the boot pgd
+ * - Jump to a target into the trampoline page (remember, this is the same
+ * physical page!)
+ * - Now switch to the runtime pgd (same VA, and still the same physical
+ * page!)
+ * - Invalidate TLBs
+ * - Set stack and vectors
+ * - Profit! (or eret, if you only care about the code).
+ *
+ * As we only have four registers available to pass parameters (and we
+ * need six), we split the init in two phases:
+ * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD.
+ * Provides the basic HYP init, and enable the MMU.
+ * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD.
+ * Switches to the runtime PGD, set stack and vectors.
*/
.text
@@ -47,22 +67,25 @@ __kvm_hyp_init:
W(b) .
__do_hyp_init:
+ cmp r0, #0 @ We have a SP?
+ bne phase2 @ Yes, second stage init
+
@ Set the HTTBR to point to the hypervisor PGD pointer passed
- mcrr p15, 4, r0, r1, c2
+ mcrr p15, 4, r2, r3, c2
@ Set the HTCR and VTCR to the same shareability and cacheability
@ settings as the non-secure TTBCR and with T0SZ == 0.
mrc p15, 4, r0, c2, c0, 2 @ HTCR
- ldr r12, =HTCR_MASK
- bic r0, r0, r12
+ ldr r2, =HTCR_MASK
+ bic r0, r0, r2
mrc p15, 0, r1, c2, c0, 2 @ TTBCR
and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
orr r0, r0, r1
mcr p15, 4, r0, c2, c0, 2 @ HTCR
mrc p15, 4, r1, c2, c1, 2 @ VTCR
- ldr r12, =VTCR_MASK
- bic r1, r1, r12
+ ldr r2, =VTCR_MASK
+ bic r1, r1, r2
bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
orr r1, r0, r1
orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
@@ -85,24 +108,41 @@ __do_hyp_init:
@ - Memory alignment checks: enabled
@ - MMU: enabled (this code must be run from an identity mapping)
mrc p15, 4, r0, c1, c0, 0 @ HSCR
- ldr r12, =HSCTLR_MASK
- bic r0, r0, r12
+ ldr r2, =HSCTLR_MASK
+ bic r0, r0, r2
mrc p15, 0, r1, c1, c0, 0 @ SCTLR
- ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
- and r1, r1, r12
- ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) )
- THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
- orr r1, r1, r12
+ ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
+ and r1, r1, r2
+ ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) )
+ THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
+ orr r1, r1, r2
orr r0, r0, r1
isb
mcr p15, 4, r0, c1, c0, 0 @ HSCR
- isb
- @ Set stack pointer and return to the kernel
- mov sp, r2
+ @ End of init phase-1
+ eret
+
+phase2:
+ @ Set stack pointer
+ mov sp, r0
@ Set HVBAR to point to the HYP vectors
- mcr p15, 4, r3, c12, c0, 0 @ HVBAR
+ mcr p15, 4, r1, c12, c0, 0 @ HVBAR
+
+ @ Jump to the trampoline page
+ ldr r0, =TRAMPOLINE_VA
+ adr r1, target
+ bfi r0, r1, #0, #PAGE_SHIFT
+ mov pc, r0
+
+target: @ We're now in the trampoline code, switch page tables
+ mcrr p15, 4, r2, r3, c2
+ isb
+
+ @ Invalidate the old TLBs
+ mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
+ dsb
eret
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 8ca87ab0919..f7793df62f5 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -35,15 +35,18 @@ __kvm_hyp_code_start:
/********************************************************************
* Flush per-VMID TLBs
*
- * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
*
* We rely on the hardware to broadcast the TLB invalidation to all CPUs
* inside the inner-shareable domain (which is the case for all v7
* implementations). If we come across a non-IS SMP implementation, we'll
* have to use an IPI based mechanism. Until then, we stick to the simple
* hardware assisted version.
+ *
+ * As v7 does not support flushing per IPA, just nuke the whole TLB
+ * instead, ignoring the ipa value.
*/
-ENTRY(__kvm_tlb_flush_vmid)
+ENTRY(__kvm_tlb_flush_vmid_ipa)
push {r2, r3}
add r0, r0, #KVM_VTTBR
@@ -60,7 +63,7 @@ ENTRY(__kvm_tlb_flush_vmid)
pop {r2, r3}
bx lr
-ENDPROC(__kvm_tlb_flush_vmid)
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
/********************************************************************
* Flush TLBs and instruction caches of all CPUs inside the inner-shareable
@@ -235,9 +238,9 @@ ENTRY(kvm_call_hyp)
* instruction is issued since all traps are disabled when running the host
* kernel as per the Hyp-mode initialization at boot time.
*
- * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc
+ * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
* below) when the HVC instruction is called from SVC mode (i.e. a guest or the
- * host kernel) and they cause a trap to the vector page + offset 0xc when HVC
+ * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
* instructions are called from within Hyp-mode.
*
* Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 98a870ff1a5..72a12f2171b 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -33,16 +33,16 @@
*/
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- __u32 *dest;
+ unsigned long *dest;
unsigned int len;
int mask;
if (!run->mmio.is_write) {
dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
- memset(dest, 0, sizeof(int));
+ *dest = 0;
len = run->mmio.len;
- if (len > 4)
+ if (len > sizeof(unsigned long))
return -EINVAL;
memcpy(dest, run->mmio.data, len);
@@ -50,7 +50,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
*((u64 *)run->mmio.data));
- if (vcpu->arch.mmio_decode.sign_extend && len < 4) {
+ if (vcpu->arch.mmio_decode.sign_extend &&
+ len < sizeof(unsigned long)) {
mask = 1U << ((len * 8) - 1);
*dest = (*dest ^ mask) - mask;
}
@@ -65,40 +66,29 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long rt, len;
bool is_write, sign_extend;
- if ((vcpu->arch.hsr >> 8) & 1) {
+ if (kvm_vcpu_dabt_isextabt(vcpu)) {
/* cache operation on I/O addr, tell guest unsupported */
- kvm_inject_dabt(vcpu, vcpu->arch.hxfar);
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
return 1;
}
- if ((vcpu->arch.hsr >> 7) & 1) {
+ if (kvm_vcpu_dabt_iss1tw(vcpu)) {
/* page table accesses IO mem: tell guest to fix its TTBR */
- kvm_inject_dabt(vcpu, vcpu->arch.hxfar);
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
return 1;
}
- switch ((vcpu->arch.hsr >> 22) & 0x3) {
- case 0:
- len = 1;
- break;
- case 1:
- len = 2;
- break;
- case 2:
- len = 4;
- break;
- default:
- kvm_err("Hardware is weird: SAS 0b11 is reserved\n");
- return -EFAULT;
- }
+ len = kvm_vcpu_dabt_get_as(vcpu);
+ if (unlikely(len < 0))
+ return len;
- is_write = vcpu->arch.hsr & HSR_WNR;
- sign_extend = vcpu->arch.hsr & HSR_SSE;
- rt = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
+ is_write = kvm_vcpu_dabt_iswrite(vcpu);
+ sign_extend = kvm_vcpu_dabt_issext(vcpu);
+ rt = kvm_vcpu_dabt_get_rd(vcpu);
if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
/* IO memory trying to read/write pc */
- kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
+ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
return 1;
}
@@ -112,7 +102,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* The MMIO instruction is emulated and should not be re-executed
* in the guest.
*/
- kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 0;
}
@@ -130,7 +120,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
* space do its magic.
*/
- if (vcpu->arch.hsr & HSR_ISV) {
+ if (kvm_vcpu_dabt_isvalid(vcpu)) {
ret = decode_hsr(vcpu, fault_ipa, &mmio);
if (ret)
return ret;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 99e07c7dd74..84ba67b982c 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -20,7 +20,6 @@
#include <linux/kvm_host.h>
#include <linux/io.h>
#include <trace/events/kvm.h>
-#include <asm/idmap.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/kvm_arm.h>
@@ -28,28 +27,30 @@
#include <asm/kvm_mmio.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
-#include <asm/mach/map.h>
-#include <trace/events/kvm.h>
#include "trace.h"
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+static pgd_t *boot_hyp_pgd;
+static pgd_t *hyp_pgd;
static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
-static void kvm_tlb_flush_vmid(struct kvm *kvm)
-{
- kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
-}
+static void *init_bounce_page;
+static unsigned long hyp_idmap_start;
+static unsigned long hyp_idmap_end;
+static phys_addr_t hyp_idmap_vector;
-static void kvm_set_pte(pte_t *pte, pte_t new_pte)
+static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
- pte_val(*pte) = new_pte;
/*
- * flush_pmd_entry just takes a void pointer and cleans the necessary
- * cache entries, so we can reuse the function for ptes.
+ * This function also gets called when dealing with HYP page
+ * tables. As HYP doesn't have an associated struct kvm (and
+ * the HYP page tables are fairly static), we don't do
+ * anything there.
*/
- flush_pmd_entry(pte);
+ if (kvm)
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -84,88 +85,170 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p;
}
-static void free_ptes(pmd_t *pmd, unsigned long addr)
+static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
- pte_t *pte;
- unsigned int i;
+ pmd_t *pmd_table = pmd_offset(pud, 0);
+ pud_clear(pud);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pmd_free(NULL, pmd_table);
+ put_page(virt_to_page(pud));
+}
- for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
- if (!pmd_none(*pmd) && pmd_table(*pmd)) {
- pte = pte_offset_kernel(pmd, addr);
- pte_free_kernel(NULL, pte);
- }
- pmd++;
+static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+{
+ pte_t *pte_table = pte_offset_kernel(pmd, 0);
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pte_free_kernel(NULL, pte_table);
+ put_page(virt_to_page(pmd));
+}
+
+static bool pmd_empty(pmd_t *pmd)
+{
+ struct page *pmd_page = virt_to_page(pmd);
+ return page_count(pmd_page) == 1;
+}
+
+static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+{
+ if (pte_present(*pte)) {
+ kvm_set_pte(pte, __pte(0));
+ put_page(virt_to_page(pte));
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
}
}
-/**
- * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
- *
- * Assumes this is a page table used strictly in Hyp-mode and therefore contains
- * only mappings in the kernel memory area, which is above PAGE_OFFSET.
- */
-void free_hyp_pmds(void)
+static bool pte_empty(pte_t *pte)
+{
+ struct page *pte_page = virt_to_page(pte);
+ return page_count(pte_page) == 1;
+}
+
+static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
+ unsigned long long start, u64 size)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- unsigned long addr;
+ pte_t *pte;
+ unsigned long long addr = start, end = start + size;
+ u64 range;
- mutex_lock(&kvm_hyp_pgd_mutex);
- for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
- pgd = hyp_pgd + pgd_index(addr);
+ while (addr < end) {
+ pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
-
- if (pud_none(*pud))
+ if (pud_none(*pud)) {
+ addr += PUD_SIZE;
continue;
- BUG_ON(pud_bad(*pud));
+ }
pmd = pmd_offset(pud, addr);
- free_ptes(pmd, addr);
- pmd_free(NULL, pmd);
- pud_clear(pud);
+ if (pmd_none(*pmd)) {
+ addr += PMD_SIZE;
+ continue;
+ }
+
+ pte = pte_offset_kernel(pmd, addr);
+ clear_pte_entry(kvm, pte, addr);
+ range = PAGE_SIZE;
+
+ /* If we emptied the pte, walk back up the ladder */
+ if (pte_empty(pte)) {
+ clear_pmd_entry(kvm, pmd, addr);
+ range = PMD_SIZE;
+ if (pmd_empty(pmd)) {
+ clear_pud_entry(kvm, pud, addr);
+ range = PUD_SIZE;
+ }
+ }
+
+ addr += range;
}
+}
+
+/**
+ * free_boot_hyp_pgd - free HYP boot page tables
+ *
+ * Free the HYP boot page tables. The bounce page is also freed.
+ */
+void free_boot_hyp_pgd(void)
+{
+ mutex_lock(&kvm_hyp_pgd_mutex);
+
+ if (boot_hyp_pgd) {
+ unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+ unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+ kfree(boot_hyp_pgd);
+ boot_hyp_pgd = NULL;
+ }
+
+ if (hyp_pgd)
+ unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+
+ kfree(init_bounce_page);
+ init_bounce_page = NULL;
+
mutex_unlock(&kvm_hyp_pgd_mutex);
}
-static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
- unsigned long end)
+/**
+ * free_hyp_pgds - free Hyp-mode page tables
+ *
+ * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
+ * therefore contains either mappings in the kernel memory area (above
+ * PAGE_OFFSET), or device mappings in the vmalloc range (from
+ * VMALLOC_START to VMALLOC_END).
+ *
+ * boot_hyp_pgd should only map two pages for the init code.
+ */
+void free_hyp_pgds(void)
{
- pte_t *pte;
unsigned long addr;
- struct page *page;
- for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
- pte = pte_offset_kernel(pmd, addr);
- BUG_ON(!virt_addr_valid(addr));
- page = virt_to_page(addr);
- kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
+ free_boot_hyp_pgd();
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+
+ if (hyp_pgd) {
+ for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+ unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+ for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
+ unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+
+ kfree(hyp_pgd);
+ hyp_pgd = NULL;
}
+
+ mutex_unlock(&kvm_hyp_pgd_mutex);
}
-static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
- unsigned long end,
- unsigned long *pfn_base)
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
{
pte_t *pte;
unsigned long addr;
- for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+ addr = start;
+ do {
pte = pte_offset_kernel(pmd, addr);
- BUG_ON(pfn_valid(*pfn_base));
- kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
- (*pfn_base)++;
- }
+ kvm_set_pte(pte, pfn_pte(pfn, prot));
+ get_page(virt_to_page(pte));
+ kvm_flush_dcache_to_poc(pte, sizeof(*pte));
+ pfn++;
+ } while (addr += PAGE_SIZE, addr != end);
}
static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
- unsigned long end, unsigned long *pfn_base)
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
{
pmd_t *pmd;
pte_t *pte;
unsigned long addr, next;
- for (addr = start; addr < end; addr = next) {
+ addr = start;
+ do {
pmd = pmd_offset(pud, addr);
BUG_ON(pmd_sect(*pmd));
@@ -177,42 +260,34 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
return -ENOMEM;
}
pmd_populate_kernel(NULL, pmd, pte);
+ get_page(virt_to_page(pmd));
+ kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
}
next = pmd_addr_end(addr, end);
- /*
- * If pfn_base is NULL, we map kernel pages into HYP with the
- * virtual address. Otherwise, this is considered an I/O
- * mapping and we map the physical region starting at
- * *pfn_base to [start, end[.
- */
- if (!pfn_base)
- create_hyp_pte_mappings(pmd, addr, next);
- else
- create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
- }
+ create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
+ pfn += (next - addr) >> PAGE_SHIFT;
+ } while (addr = next, addr != end);
return 0;
}
-static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
+static int __create_hyp_mappings(pgd_t *pgdp,
+ unsigned long start, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
{
- unsigned long start = (unsigned long)from;
- unsigned long end = (unsigned long)to;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long addr, next;
int err = 0;
- BUG_ON(start > end);
- if (start < PAGE_OFFSET)
- return -EINVAL;
-
mutex_lock(&kvm_hyp_pgd_mutex);
- for (addr = start; addr < end; addr = next) {
- pgd = hyp_pgd + pgd_index(addr);
+ addr = start & PAGE_MASK;
+ end = PAGE_ALIGN(end);
+ do {
+ pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none_or_clear_bad(pud)) {
@@ -223,43 +298,64 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
goto out;
}
pud_populate(NULL, pud, pmd);
+ get_page(virt_to_page(pud));
+ kvm_flush_dcache_to_poc(pud, sizeof(*pud));
}
next = pgd_addr_end(addr, end);
- err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
+ err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
if (err)
goto out;
- }
+ pfn += (next - addr) >> PAGE_SHIFT;
+ } while (addr = next, addr != end);
out:
mutex_unlock(&kvm_hyp_pgd_mutex);
return err;
}
/**
- * create_hyp_mappings - map a kernel virtual address range in Hyp mode
+ * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
* @to: The virtual kernel end address of the range (exclusive)
*
- * The same virtual address as the kernel virtual address is also used in
- * Hyp-mode mapping to the same underlying physical pages.
- *
- * Note: Wrapping around zero in the "to" address is not supported.
+ * The same virtual address as the kernel virtual address is also used
+ * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
+ * physical pages.
*/
int create_hyp_mappings(void *from, void *to)
{
- return __create_hyp_mappings(from, to, NULL);
+ unsigned long phys_addr = virt_to_phys(from);
+ unsigned long start = KERN_TO_HYP((unsigned long)from);
+ unsigned long end = KERN_TO_HYP((unsigned long)to);
+
+ /* Check for a valid kernel memory mapping */
+ if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
+ return -EINVAL;
+
+ return __create_hyp_mappings(hyp_pgd, start, end,
+ __phys_to_pfn(phys_addr), PAGE_HYP);
}
/**
- * create_hyp_io_mappings - map a physical IO range in Hyp mode
- * @from: The virtual HYP start address of the range
- * @to: The virtual HYP end address of the range (exclusive)
- * @addr: The physical start address which gets mapped
+ * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
+ * @from: The kernel start VA of the range
+ * @to: The kernel end VA of the range (exclusive)
+ * @phys_addr: The physical start address which gets mapped
+ *
+ * The resulting HYP VA is the same as the kernel VA, modulo
+ * HYP_PAGE_OFFSET.
*/
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
{
- unsigned long pfn = __phys_to_pfn(addr);
- return __create_hyp_mappings(from, to, &pfn);
+ unsigned long start = KERN_TO_HYP((unsigned long)from);
+ unsigned long end = KERN_TO_HYP((unsigned long)to);
+
+ /* Check for a valid kernel IO mapping */
+ if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
+ return -EINVAL;
+
+ return __create_hyp_mappings(hyp_pgd, start, end,
+ __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
/**
@@ -290,48 +386,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
- clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+ kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
}
-static void clear_pud_entry(pud_t *pud)
-{
- pmd_t *pmd_table = pmd_offset(pud, 0);
- pud_clear(pud);
- pmd_free(NULL, pmd_table);
- put_page(virt_to_page(pud));
-}
-
-static void clear_pmd_entry(pmd_t *pmd)
-{
- pte_t *pte_table = pte_offset_kernel(pmd, 0);
- pmd_clear(pmd);
- pte_free_kernel(NULL, pte_table);
- put_page(virt_to_page(pmd));
-}
-
-static bool pmd_empty(pmd_t *pmd)
-{
- struct page *pmd_page = virt_to_page(pmd);
- return page_count(pmd_page) == 1;
-}
-
-static void clear_pte_entry(pte_t *pte)
-{
- if (pte_present(*pte)) {
- kvm_set_pte(pte, __pte(0));
- put_page(virt_to_page(pte));
- }
-}
-
-static bool pte_empty(pte_t *pte)
-{
- struct page *pte_page = virt_to_page(pte);
- return page_count(pte_page) == 1;
-}
-
/**
* unmap_stage2_range -- Clear stage2 page table entries to unmap a range
* @kvm: The VM pointer
@@ -345,43 +405,7 @@ static bool pte_empty(pte_t *pte)
*/
static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- phys_addr_t addr = start, end = start + size;
- u64 range;
-
- while (addr < end) {
- pgd = kvm->arch.pgd + pgd_index(addr);
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud)) {
- addr += PUD_SIZE;
- continue;
- }
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- addr += PMD_SIZE;
- continue;
- }
-
- pte = pte_offset_kernel(pmd, addr);
- clear_pte_entry(pte);
- range = PAGE_SIZE;
-
- /* If we emptied the pte, walk back up the ladder */
- if (pte_empty(pte)) {
- clear_pmd_entry(pmd);
- range = PMD_SIZE;
- if (pmd_empty(pmd)) {
- clear_pud_entry(pud);
- range = PUD_SIZE;
- }
- }
-
- addr += range;
- }
+ unmap_range(kvm, kvm->arch.pgd, start, size);
}
/**
@@ -422,22 +446,22 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
return 0; /* ignore calls from kvm_set_spte_hva */
pmd = mmu_memory_cache_alloc(cache);
pud_populate(NULL, pud, pmd);
- pmd += pmd_index(addr);
get_page(virt_to_page(pud));
- } else
- pmd = pmd_offset(pud, addr);
+ }
+
+ pmd = pmd_offset(pud, addr);
/* Create 2nd stage page table mapping - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc(cache);
- clean_pte_table(pte);
+ kvm_clean_pte(pte);
pmd_populate_kernel(NULL, pmd, pte);
- pte += pte_index(addr);
get_page(virt_to_page(pmd));
- } else
- pte = pte_offset_kernel(pmd, addr);
+ }
+
+ pte = pte_offset_kernel(pmd, addr);
if (iomap && pte_present(*pte))
return -EFAULT;
@@ -446,7 +470,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
old_pte = *pte;
kvm_set_pte(pte, *new_pte);
if (pte_present(old_pte))
- kvm_tlb_flush_vmid(kvm);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
else
get_page(virt_to_page(pte));
@@ -473,7 +497,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
pfn = __phys_to_pfn(pa);
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
- pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR);
+ pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
+ kvm_set_s2pte_writable(&pte);
ret = mmu_topup_memory_cache(&cache, 2, 2);
if (ret)
@@ -492,29 +517,6 @@ out:
return ret;
}
-static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
-{
- /*
- * If we are going to insert an instruction page and the icache is
- * either VIPT or PIPT, there is a potential problem where the host
- * (or another VM) may have used the same page as this guest, and we
- * read incorrect data from the icache. If we're using a PIPT cache,
- * we can invalidate just that page, but if we are using a VIPT cache
- * we need to invalidate the entire icache - damn shame - as written
- * in the ARM ARM (DDI 0406C.b - Page B3-1393).
- *
- * VIVT caches are tagged using both the ASID and the VMID and doesn't
- * need any kind of flushing (DDI 0406C.b - Page B3-1392).
- */
- if (icache_is_pipt()) {
- unsigned long hva = gfn_to_hva(kvm, gfn);
- __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
- } else if (!icache_is_vivt_asid_tagged()) {
- /* any kind of VIPT cache */
- __flush_icache_all();
- }
-}
-
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn, struct kvm_memory_slot *memslot,
unsigned long fault_status)
@@ -526,7 +528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long mmu_seq;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
- write_fault = kvm_is_write_fault(vcpu->arch.hsr);
+ write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
if (fault_status == FSC_PERM && !write_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
@@ -560,7 +562,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (writable) {
- pte_val(new_pte) |= L_PTE_S2_RDWR;
+ kvm_set_s2pte_writable(&new_pte);
kvm_set_pfn_dirty(pfn);
}
stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
@@ -585,7 +587,6 @@ out_unlock:
*/
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- unsigned long hsr_ec;
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
@@ -593,18 +594,17 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
gfn_t gfn;
int ret, idx;
- hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
- is_iabt = (hsr_ec == HSR_EC_IABT);
- fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+ is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
- trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr,
- vcpu->arch.hxfar, fault_ipa);
+ trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
+ kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE);
+ fault_status = kvm_vcpu_trap_get_fault(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
- kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n",
- hsr_ec, fault_status);
+ kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
+ kvm_vcpu_trap_get_class(vcpu), fault_status);
return -EFAULT;
}
@@ -614,7 +614,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
- kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
+ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
ret = 1;
goto out_unlock;
}
@@ -626,8 +626,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- /* Adjust page offset */
- fault_ipa |= vcpu->arch.hxfar & ~PAGE_MASK;
+ /*
+ * The IPA is reported as [MAX:12], so we need to
+ * complement it with the bottom 12 bits from the
+ * faulting VA. This is always 12 bits, irrespective
+ * of the page size.
+ */
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
ret = io_mem_abort(vcpu, run, fault_ipa);
goto out_unlock;
}
@@ -682,7 +687,6 @@ static void handle_hva_to_gpa(struct kvm *kvm,
static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
unmap_stage2_range(kvm, gpa, PAGE_SIZE);
- kvm_tlb_flush_vmid(kvm);
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -736,47 +740,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
phys_addr_t kvm_mmu_get_httbr(void)
{
- VM_BUG_ON(!virt_addr_valid(hyp_pgd));
return virt_to_phys(hyp_pgd);
}
+phys_addr_t kvm_mmu_get_boot_httbr(void)
+{
+ return virt_to_phys(boot_hyp_pgd);
+}
+
+phys_addr_t kvm_get_idmap_vector(void)
+{
+ return hyp_idmap_vector;
+}
+
int kvm_mmu_init(void)
{
- if (!hyp_pgd) {
+ int err;
+
+ hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
+ hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
+ hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+
+ if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
+ /*
+ * Our init code is crossing a page boundary. Allocate
+ * a bounce page, copy the code over and use that.
+ */
+ size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
+ phys_addr_t phys_base;
+
+ init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!init_bounce_page) {
+ kvm_err("Couldn't allocate HYP init bounce page\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(init_bounce_page, __hyp_idmap_text_start, len);
+ /*
+ * Warning: the code we just copied to the bounce page
+ * must be flushed to the point of coherency.
+ * Otherwise, the data may be sitting in L2, and HYP
+ * mode won't be able to observe it as it runs with
+ * caches off at that point.
+ */
+ kvm_flush_dcache_to_poc(init_bounce_page, len);
+
+ phys_base = virt_to_phys(init_bounce_page);
+ hyp_idmap_vector += phys_base - hyp_idmap_start;
+ hyp_idmap_start = phys_base;
+ hyp_idmap_end = phys_base + len;
+
+ kvm_info("Using HYP init bounce page @%lx\n",
+ (unsigned long)phys_base);
+ }
+
+ hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+ boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+ if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
- return 0;
-}
+ /* Create the idmap in the boot page tables */
+ err = __create_hyp_mappings(boot_hyp_pgd,
+ hyp_idmap_start, hyp_idmap_end,
+ __phys_to_pfn(hyp_idmap_start),
+ PAGE_HYP);
-/**
- * kvm_clear_idmap - remove all idmaps from the hyp pgd
- *
- * Free the underlying pmds for all pgds in range and clear the pgds (but
- * don't free them) afterwards.
- */
-void kvm_clear_hyp_idmap(void)
-{
- unsigned long addr, end;
- unsigned long next;
- pgd_t *pgd = hyp_pgd;
- pud_t *pud;
- pmd_t *pmd;
+ if (err) {
+ kvm_err("Failed to idmap %lx-%lx\n",
+ hyp_idmap_start, hyp_idmap_end);
+ goto out;
+ }
- addr = virt_to_phys(__hyp_idmap_text_start);
- end = virt_to_phys(__hyp_idmap_text_end);
+ /* Map the very same page at the trampoline VA */
+ err = __create_hyp_mappings(boot_hyp_pgd,
+ TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
+ __phys_to_pfn(hyp_idmap_start),
+ PAGE_HYP);
+ if (err) {
+ kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
+ TRAMPOLINE_VA);
+ goto out;
+ }
- pgd += pgd_index(addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
+ /* Map the same page again into the runtime page tables */
+ err = __create_hyp_mappings(hyp_pgd,
+ TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
+ __phys_to_pfn(hyp_idmap_start),
+ PAGE_HYP);
+ if (err) {
+ kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
+ TRAMPOLINE_VA);
+ goto out;
+ }
- pud_clear(pud);
- clean_pmd_entry(pmd);
- pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
- } while (pgd++, addr = next, addr < end);
+ return 0;
+out:
+ free_hyp_pgds();
+ return err;
}
diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c
new file mode 100644
index 00000000000..1a3849da0b4
--- /dev/null
+++ b/arch/arm/kvm/perf.c
@@ -0,0 +1,68 @@
+/*
+ * Based on the x86 implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+static int kvm_is_in_guest(void)
+{
+ return kvm_arm_get_running_vcpu() != NULL;
+}
+
+static int kvm_is_user_mode(void)
+{
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_arm_get_running_vcpu();
+
+ if (vcpu)
+ return !vcpu_mode_priv(vcpu);
+
+ return 0;
+}
+
+static unsigned long kvm_get_guest_ip(void)
+{
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_arm_get_running_vcpu();
+
+ if (vcpu)
+ return *vcpu_pc(vcpu);
+
+ return 0;
+}
+
+static struct perf_guest_info_callbacks kvm_guest_cbs = {
+ .is_in_guest = kvm_is_in_guest,
+ .is_user_mode = kvm_is_user_mode,
+ .get_guest_ip = kvm_get_guest_ip,
+};
+
+int kvm_perf_init(void)
+{
+ return perf_register_guest_info_callbacks(&kvm_guest_cbs);
+}
+
+int kvm_perf_teardown(void)
+{
+ return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
+}
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 0e4cfe123b3..17c5ac7d10e 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1477,7 +1477,7 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
if (addr & ~KVM_PHYS_MASK)
return -E2BIG;
- if (addr & ~PAGE_MASK)
+ if (addr & (SZ_4K - 1))
return -EINVAL;
mutex_lock(&kvm->lock);