diff options
author | Mark Brown <broonie@kernel.org> | 2014-10-11 00:38:34 +0100 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2014-10-11 11:28:22 +0100 |
commit | aea5b31b7df7526dcb07000a50d35f3df3e6c283 (patch) | |
tree | c5f02bbdfac69a0165a3f366dc8f1a85ed459d3f /arch | |
parent | 1ab056a5fb0d123436b862806734c6a9dc4651a4 (diff) | |
parent | 2606d2448703e8995ca39a59d8a1106a1e0f034a (diff) |
Merge remote-tracking branch 'lsk/v3.10/topic/kvm' into linux-linaro-lsk
Conflicts:
arch/arm/kvm/arm.c
arch/arm64/Makefile
arch/arm64/kernel/asm-offsets.c
virt/kvm/kvm_main.c
Diffstat (limited to 'arch')
110 files changed, 8359 insertions, 3059 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index e780afbcee54..f0963bb79935 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -30,8 +30,8 @@ * Endian independent macros for shifting bytes within registers. */ #ifndef __ARMEB__ -#define pull lsr -#define push lsl +#define lspull lsr +#define lspush lsl #define get_byte_0 lsl #0 #define get_byte_1 lsr #8 #define get_byte_2 lsr #16 @@ -41,8 +41,8 @@ #define put_byte_2 lsl #16 #define put_byte_3 lsl #24 #else -#define pull lsl -#define push lsr +#define lspull lsl +#define lspush lsr #define get_byte_0 lsr #24 #define get_byte_1 lsr #16 #define get_byte_2 lsr #8 @@ -219,9 +219,9 @@ #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 .ifeqs "\mode","arm" - ALT_SMP(dmb) + ALT_SMP(dmb ish) .else - ALT_SMP(W(dmb)) + ALT_SMP(W(dmb) ish) .endif #elif __LINUX_ARM_ARCH__ == 6 ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 8dcd9c702d90..60f15e274e6d 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -14,27 +14,27 @@ #endif #if __LINUX_ARM_ARCH__ >= 7 -#define isb() __asm__ __volatile__ ("isb" : : : "memory") -#define dsb() __asm__ __volatile__ ("dsb" : : : "memory") -#define dmb() __asm__ __volatile__ ("dmb" : : : "memory") +#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory") +#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory") +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6 -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ +#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ : : "r" (0) : "memory") #elif defined(CONFIG_CPU_FA526) -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #else -#define isb() __asm__ __volatile__ ("" : : : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define isb(x) __asm__ __volatile__ ("" : : : "memory") +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif #ifdef CONFIG_ARCH_HAS_BARRIERS @@ -42,7 +42,7 @@ #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() -#define wmb() mb() +#define wmb() do { dsb(st); outer_sync(); } while (0) #else #define mb() barrier() #define rmb() barrier() @@ -54,9 +54,9 @@ #define smp_rmb() barrier() #define smp_wmb() barrier() #else -#define smp_mb() dmb() -#define smp_rmb() dmb() -#define smp_wmb() dmb() +#define smp_mb() dmb(ish) +#define smp_rmb() smp_mb() +#define smp_wmb() dmb(ishst) #endif #define read_barrier_depends() do { } while(0) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index dba62cb1ad08..3392fe2d3174 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -43,15 +43,18 @@ #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_INTEL 0x69 -#define ARM_CPU_PART_ARM1136 0xB360 -#define ARM_CPU_PART_ARM1156 0xB560 -#define ARM_CPU_PART_ARM1176 0xB760 -#define ARM_CPU_PART_ARM11MPCORE 0xB020 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 -#define ARM_CPU_PART_CORTEX_A7 0xC070 +/* ARM implemented processors */ +#define ARM_CPU_PART_ARM1136 0x4100b360 +#define ARM_CPU_PART_ARM1156 0x4100b560 +#define ARM_CPU_PART_ARM1176 0x4100b760 +#define ARM_CPU_PART_ARM11MPCORE 0x4100b020 +#define ARM_CPU_PART_CORTEX_A8 0x4100c080 +#define ARM_CPU_PART_CORTEX_A9 0x4100c090 +#define ARM_CPU_PART_CORTEX_A5 0x4100c050 +#define ARM_CPU_PART_CORTEX_A7 0x4100c070 +#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 +#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 +#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 @@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } -static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +/* + * The CPU part number is meaningless without referring to the CPU + * implementer: implementers are free to define their own part numbers + * which are permitted to clash with other implementer part numbers. + */ +static inline unsigned int __attribute_const__ read_cpuid_part(void) +{ + return read_cpuid_id() & 0xff00fff0; +} + +static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void) { return read_cpuid_id() & 0xFFF0; } static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) { - return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK; + return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h deleted file mode 100644 index 68cb9e1dfb81..000000000000 --- a/arch/arm/include/asm/kvm_arch_timer.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __ASM_ARM_KVM_ARCH_TIMER_H -#define __ASM_ARM_KVM_ARCH_TIMER_H - -#include <linux/clocksource.h> -#include <linux/hrtimer.h> -#include <linux/workqueue.h> - -struct arch_timer_kvm { -#ifdef CONFIG_KVM_ARM_TIMER - /* Is the timer enabled */ - bool enabled; - - /* Virtual offset */ - cycle_t cntvoff; -#endif -}; - -struct arch_timer_cpu { -#ifdef CONFIG_KVM_ARM_TIMER - /* Registers: control register, timer value */ - u32 cntv_ctl; /* Saved/restored */ - cycle_t cntv_cval; /* Saved/restored */ - - /* - * Anything that is not used directly from assembly code goes - * here. - */ - - /* Background timer used when the guest is not running */ - struct hrtimer timer; - - /* Work queued with the above timer expires */ - struct work_struct expired; - - /* Background timer active */ - bool armed; - - /* Timer IRQ */ - const struct kvm_irq_level *irq; -#endif -}; - -#ifdef CONFIG_KVM_ARM_TIMER -int kvm_timer_hyp_init(void); -int kvm_timer_init(struct kvm *kvm); -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); -#else -static inline int kvm_timer_hyp_init(void) -{ - return 0; -}; - -static inline int kvm_timer_init(struct kvm *kvm) -{ - return 0; -} - -static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} -#endif - -#endif diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 124623e5ef14..816db0bf2dd8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -55,8 +55,10 @@ * The bits we set in HCR: * TAC: Trap ACTLR * TSC: Trap SMC + * TVM: Trap VM ops (until MMU and caches are on) * TSW: Trap cache operations by set/way * TWI: Trap WFI + * TWE: Trap WFE * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain * FB: Force broadcast of all maintainance operations @@ -67,8 +69,7 @@ */ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP) -#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) + HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP) /* System Control Register (SCTLR) bits */ #define SCTLR_TE (1 << 30) @@ -95,12 +96,12 @@ #define TTBCR_IRGN1 (3 << 24) #define TTBCR_EPD1 (1 << 23) #define TTBCR_A1 (1 << 22) -#define TTBCR_T1SZ (3 << 16) +#define TTBCR_T1SZ (7 << 16) #define TTBCR_SH0 (3 << 12) #define TTBCR_ORGN0 (3 << 10) #define TTBCR_IRGN0 (3 << 8) #define TTBCR_EPD0 (1 << 7) -#define TTBCR_T0SZ 3 +#define TTBCR_T0SZ (7 << 0) #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) /* Hyp System Trap Register */ @@ -135,7 +136,6 @@ #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) -#define S2_PGD_SIZE (1 << S2_PGD_ORDER) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) @@ -209,6 +209,8 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_WFI_IS_WFE (1U << 0) + #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) #define HSR_DABT_S1PTW (1U << 7) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 4bb08e3e52bc..3a67bec72d0c 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -39,7 +39,7 @@ #define c6_IFAR 17 /* Instruction Fault Address Register */ #define c7_PAR 18 /* Physical Address Register */ #define c7_PAR_high 19 /* PAR top 32 bits */ -#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ +#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ #define c10_PRRR 21 /* Primary Region Remap Register */ #define c10_NMRR 22 /* Normal Memory Remap Register */ #define c12_VBAR 23 /* Vector Base Address Register */ @@ -48,7 +48,9 @@ #define c13_TID_URO 26 /* Thread ID, User R/O */ #define c13_TID_PRIV 27 /* Thread ID, Privileged */ #define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ -#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ +#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */ +#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */ +#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */ #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 @@ -59,6 +61,24 @@ #define ARM_EXCEPTION_FIQ 6 #define ARM_EXCEPTION_HVC 7 +/* + * The rr_lo_hi macro swaps a pair of registers depending on + * current endianness. It is used in conjunction with ldrd and strd + * instructions that load/store a 64-bit value from/to memory to/from + * a pair of registers which are used with the mrrc and mcrr instructions. + * If used with the ldrd/strd instructions, the a1 parameter is the first + * source/destination register and the a2 parameter is the second + * source/destination register. Note that the ldrd/strd instructions + * already swap the bytes within the words correctly according to the + * endianness setting, but the order of the registers need to be effectively + * swapped when used with the mrrc/mcrr instructions. + */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define rr_lo_hi(a1, a2) a2, a1 +#else +#define rr_lo_hi(a1, a2) a1, a2 +#endif + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; @@ -74,8 +94,6 @@ extern char __kvm_hyp_vector[]; extern char __kvm_hyp_code_start[]; extern char __kvm_hyp_code_end[]; -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 82b4babead2c..b9db269c6e61 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) return cpsr_mode > USR_MODE;; } -static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) -{ - return reg == 15; -} - static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) { return vcpu->arch.fault.hsr; @@ -154,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) { + return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) +{ return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; } @@ -162,4 +162,69 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.cp15[c0_MPIDR]; +} + +static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= PSR_E_BIT; +} + +static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) +{ + return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); +} + +static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return be16_to_cpu(data & 0xffff); + default: + return be32_to_cpu(data); + } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + default: + return le32_to_cpu(data); + } + } +} + +static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_be16(data & 0xffff); + default: + return cpu_to_be32(data); + } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + default: + return cpu_to_le32(data); + } + } +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786a6203..46e5d4da1989 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,30 +19,31 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include <linux/types.h> +#include <linux/kvm_types.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/fpstate.h> -#include <asm/kvm_arch_timer.h> +#include <kvm/arm_arch_timer.h> +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 1 - -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) +#define KVM_VCPU_MAX_FEATURES 2 -#include <asm/kvm_vgic.h> +#include <kvm/arm_vgic.h> -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_reset_coprocs(struct kvm_vcpu *vcpu); @@ -101,6 +102,12 @@ struct kvm_vcpu_arch { /* The CPU type we expose to the VM */ u32 midr; + /* HYP trapping configuration */ + u32 hcr; + + /* Interrupt related fields */ + u32 irq_lines; /* IRQ and FIQ levels */ + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -128,9 +135,6 @@ struct kvm_vcpu_arch { /* IO related fields */ struct kvm_decode mmio_decode; - /* Interrupt related fields */ - u32 irq_lines; /* IRQ and FIQ levels */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -146,19 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -183,15 +185,14 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, - unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { @@ -221,7 +222,18 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) return 0; } +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + BUG_ON(vgic->type != VGIC_V2); +} + int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 472ac7091003..3f688b458143 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -62,9 +62,15 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) +{ + *pmd = new_pmd; + flush_pmd_entry(pmd); +} + static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { - pte_val(*pte) = new_pte; + *pte = new_pte; /* * flush_pmd_entry just takes a void pointer and cleans the necessary * cache entries, so we can reuse the function for ptes. @@ -72,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) flush_pmd_entry(pte); } -static inline bool kvm_is_write_fault(unsigned long hsr) -{ - unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; - if (hsr_ec == HSR_EC_IABT) - return false; - else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) - return false; - else - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) { clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); @@ -103,10 +98,51 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= L_PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= L_PMD_S2_RDWR; +} + +/* Open coded p*d_addr_end that can deal with 64bit addresses */ +#define kvm_pgd_addr_end(addr, end) \ +({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + +#define kvm_pud_addr_end(addr,end) (end) + +#define kvm_pmd_addr_end(addr, end) \ +({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) + +static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; +} + +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, + unsigned long size) { + if (!vcpu_has_cache_enabled(vcpu)) + kvm_flush_dcache_to_poc((void *)hva, size); + /* * If we are going to insert an instruction page and the icache is * either VIPT or PIPT, there is a potential problem where the host @@ -120,15 +156,16 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) * need any kind of flushing (DDI 0406C.b - Page B3-1392). */ if (icache_is_pipt()) { - unsigned long hva = gfn_to_hva(kvm, gfn); - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); + __cpuc_coherent_user_range(hva, hva + size); } else if (!icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); } } -#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) + +void stage2_flush_vm(struct kvm *kvm); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h index 9a83d98bf170..6bda945d31fa 100644 --- a/arch/arm/include/asm/kvm_psci.h +++ b/arch/arm/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM_KVM_PSCI_H__ #define __ARM_KVM_PSCI_H__ -bool kvm_psci_call(struct kvm_vcpu *vcpu); +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h deleted file mode 100644 index 343744e4809c..000000000000 --- a/arch/arm/include/asm/kvm_vgic.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __ASM_ARM_KVM_VGIC_H -#define __ASM_ARM_KVM_VGIC_H - -#include <linux/kernel.h> -#include <linux/kvm.h> -#include <linux/irqreturn.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/irqchip/arm-gic.h> - -#define VGIC_NR_IRQS 128 -#define VGIC_NR_SGIS 16 -#define VGIC_NR_PPIS 16 -#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS -#define VGIC_MAX_LRS (1 << 6) - -/* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) -#error Invalid number of CPU interfaces -#endif - -#if (VGIC_NR_IRQS & 31) -#error "VGIC_NR_IRQS must be a multiple of 32" -#endif - -#if (VGIC_NR_IRQS > 1024) -#error "VGIC_NR_IRQS must be <= 1024" -#endif - -/* - * The GIC distributor registers describing interrupts have two parts: - * - 32 per-CPU interrupts (SGI + PPI) - * - a bunch of shared interrupts (SPI) - */ -struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; -}; - -struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; -}; - -struct vgic_dist { -#ifdef CONFIG_KVM_ARM_VGIC - spinlock_t lock; - bool ready; - - /* Virtual control interface mapping */ - void __iomem *vctrl_base; - - /* Distributor and vcpu interface mapping in the guest */ - phys_addr_t vgic_dist_base; - phys_addr_t vgic_cpu_base; - - /* Distributor enabled */ - u32 enabled; - - /* Interrupt enabled (one bit per IRQ) */ - struct vgic_bitmap irq_enabled; - - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; - - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; - - /* Interrupt priority. Not used yet. */ - struct vgic_bytemap irq_priority; - - /* Level/edge triggered */ - struct vgic_bitmap irq_cfg; - - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; - - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; - - /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; -#endif -}; - -struct vgic_cpu { -#ifdef CONFIG_KVM_ARM_VGIC - /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; - - /* Pending interrupts on this VCPU */ - DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); - - /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); - - /* Number of list registers on this CPU */ - int nr_lr; - - /* CPU vif control registers for world switch */ - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; -#endif -}; - -#define LR_EMPTY 0xff - -struct kvm; -struct kvm_vcpu; -struct kvm_run; -struct kvm_exit_mmio; - -#ifdef CONFIG_KVM_ARM_VGIC -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); -int kvm_vgic_hyp_init(void); -int kvm_vgic_init(struct kvm *kvm); -int kvm_vgic_create(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level); -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio); - -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) -#define vgic_initialized(k) ((k)->arch.vgic.ready) - -#else -static inline int kvm_vgic_hyp_init(void) -{ - return 0; -} - -static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - return 0; -} - -static inline int kvm_vgic_init(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_create(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - return false; -} - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 0; -} - -static inline bool vgic_initialized(struct kvm *kvm) -{ - return true; -} -#endif - -#endif diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 57870ab313c5..21b458e6b0b8 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -157,6 +157,7 @@ */ #define __PV_BITS_31_24 0x81000000 +extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x); extern unsigned long __pv_phys_offset; #define PHYS_OFFSET __pv_phys_offset @@ -233,6 +234,21 @@ static inline void *phys_to_virt(phys_addr_t x) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) /* + * These are for systems that have a hardware interconnect supported alias of + * physical memory for idmap purposes. Most cases should leave these + * untouched. + */ +static inline phys_addr_t __virt_to_idmap(unsigned long x) +{ + if (arch_virt_to_idmap) + return arch_virt_to_idmap(x); + else + return __virt_to_phys(x); +} + +#define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x)) + +/* * Virtual <-> DMA view memory address translations * Again, these are *only* valid on the kernel direct mapped RAM * memory. Use of these is *deprecated* (and that doesn't mean diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 54733e5ef7a1..c5d94b31bde4 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -126,6 +126,8 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Hyp-mode PL2 PTE definitions for LPAE. */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 76a52aed14e8..b1b7a4907489 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device; #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) -#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h index 18d169373612..1a292d8be988 100644 --- a/arch/arm/include/asm/smp_scu.h +++ b/arch/arm/include/asm/smp_scu.h @@ -11,7 +11,7 @@ static inline bool scu_a9_has_base(void) { - return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; } static inline unsigned long scu_a9_get_base(void) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c1ee007523d7..09ee408c1a67 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -20,10 +20,12 @@ #define __ARM_KVM_H__ #include <linux/types.h> +#include <linux/psci.h> #include <asm/ptrace.h> #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -63,7 +65,8 @@ struct kvm_regs { /* Supported Processor Types */ #define KVM_ARM_TARGET_CORTEX_A15 0 -#define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_TARGET_CORTEX_A7 1 +#define KVM_ARM_NUM_TARGETS 2 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 @@ -82,6 +85,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -118,6 +122,26 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +#define ARM_CP15_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) + +#define __ARM_CP15_REG(op1,crn,crm,op2) \ + (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \ + ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \ + ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \ + ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \ + ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2)) + +#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32) + +#define __ARM_CP15_REG64(op1,crm) \ + (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) +#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) + +#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) + /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) @@ -142,6 +166,15 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 @@ -172,9 +205,9 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce6b48e..776d9186e9c1 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -168,6 +168,7 @@ int main(void) DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); + DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); @@ -175,13 +176,13 @@ int main(void) DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); #ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); #ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 0b48a38e3cf4..e0665b871f5b 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -234,49 +234,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); + switch (read_cpuid_part()) { /* ARM Ltd CPUs. */ - if (implementor == ARM_CPU_IMP_ARM) { - switch (part_number) { - case ARM_CPU_PART_ARM1136: - case ARM_CPU_PART_ARM1156: - case ARM_CPU_PART_ARM1176: - ret = armv6pmu_init(pmu); - break; - case ARM_CPU_PART_ARM11MPCORE: - ret = armv6mpcore_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A8: - ret = armv7_a8_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A9: - ret = armv7_a9_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A5: - ret = armv7_a5_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A15: - ret = armv7_a15_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A7: - ret = armv7_a7_pmu_init(pmu); - break; - } - /* Intel CPUs [xscale]. */ - } else if (implementor == ARM_CPU_IMP_INTEL) { - switch (xscale_cpu_arch_version()) { - case ARM_CPU_XSCALE_ARCH_V1: - ret = xscale1pmu_init(pmu); - break; - case ARM_CPU_XSCALE_ARCH_V2: - ret = xscale2pmu_init(pmu); - break; + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: + ret = armv6pmu_init(pmu); + break; + case ARM_CPU_PART_ARM11MPCORE: + ret = armv6mpcore_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A8: + ret = armv7_a8_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A9: + ret = armv7_a9_pmu_init(pmu); + break; + + default: + if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: + ret = xscale1pmu_init(pmu); + break; + case ARM_CPU_XSCALE_ARCH_V2: + ret = xscale2pmu_init(pmu); + break; + } } + break; } /* assume PMU support all the CPUs in this case */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index dc2843f337af..d6e3d1ca4ddf 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -91,8 +91,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) * its stack and the page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; - secondary_data.pgdir = virt_to_phys(idmap_pgd); - secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir); + secondary_data.pgdir = virt_to_idmap(idmap_pgd); + secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir); __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 370e1a8af6ac..466bd299b1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST depends on ARM_VIRT_EXT && ARM_LPAE @@ -41,9 +42,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST - default 4 if KVM_ARM_HOST - default 0 + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 help Static number of max supported virtual CPUs per VM. @@ -67,6 +68,4 @@ config KVM_ARM_TIMER ---help--- Adds support for the Architected Timers in virtual machines -source drivers/virtio/Kconfig - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 53c5ed83d16f..f7057ed045b6 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -14,10 +14,12 @@ CFLAGS_mmu.o := -I. AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) -kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +KVM := ../../../virt/kvm +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o -obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c deleted file mode 100644 index c55b6089e923..000000000000 --- a/arch/arm/kvm/arch_timer.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/of_irq.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> - -#include <clocksource/arm_arch_timer.h> -#include <asm/arch_timer.h> - -#include <asm/kvm_vgic.h> -#include <asm/kvm_arch_timer.h> - -static struct timecounter *timecounter; -static struct workqueue_struct *wqueue; -static struct kvm_irq_level timer_irq = { - .level = 1, -}; - -static cycle_t kvm_phys_timer_read(void) -{ - return timecounter->cc->read(timecounter->cc); -} - -static bool timer_is_armed(struct arch_timer_cpu *timer) -{ - return timer->armed; -} - -/* timer_arm: as in "arm the timer", not as in ARM the company */ -static void timer_arm(struct arch_timer_cpu *timer, u64 ns) -{ - timer->armed = true; - hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), - HRTIMER_MODE_ABS); -} - -static void timer_disarm(struct arch_timer_cpu *timer) -{ - if (timer_is_armed(timer)) { - hrtimer_cancel(&timer->timer); - cancel_work_sync(&timer->expired); - timer->armed = false; - } -} - -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vcpu->arch.timer_cpu.irq->irq, - vcpu->arch.timer_cpu.irq->level); -} - -static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) -{ - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; - - /* - * We disable the timer in the world switch and let it be - * handled by kvm_timer_sync_hwstate(). Getting a timer - * interrupt at this point is a sure sign of some major - * breakage. - */ - pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); - return IRQ_HANDLED; -} - -static void kvm_timer_inject_irq_work(struct work_struct *work) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); -} - -static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) -{ - struct arch_timer_cpu *timer; - timer = container_of(hrt, struct arch_timer_cpu, timer); - queue_work(wqueue, &timer->expired); - return HRTIMER_NORESTART; -} - -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); -} - -/** - * kvm_timer_sync_hwstate - sync timer state from cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. - */ -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; - - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - BUG_ON(timer_is_armed(timer)); - - if (cval <= now) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ - kvm_timer_inject_irq(vcpu); - return; - } - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now); - timer_arm(timer, ns); -} - -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = kvm_timer_expire; - timer->irq = &timer_irq; -} - -static void kvm_timer_init_interrupt(void *info) -{ - enable_percpu_irq(timer_irq.irq, 0); -} - - -static int kvm_timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - kvm_timer_init_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(timer_irq.irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block kvm_timer_cpu_nb = { - .notifier_call = kvm_timer_cpu_notify, -}; - -static const struct of_device_id arch_timer_of_match[] = { - { .compatible = "arm,armv7-timer", }, - {}, -}; - -int kvm_timer_hyp_init(void) -{ - struct device_node *np; - unsigned int ppi; - int err; - - timecounter = arch_timer_get_timecounter(); - if (!timecounter) - return -ENODEV; - - np = of_find_matching_node(NULL, arch_timer_of_match); - if (!np) { - kvm_err("kvm_arch_timer: can't find DT node\n"); - return -ENODEV; - } - - ppi = irq_of_parse_and_map(np, 2); - if (!ppi) { - kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); - err = -EINVAL; - goto out; - } - - err = request_percpu_irq(ppi, kvm_arch_timer_handler, - "kvm guest timer", kvm_get_running_vcpus()); - if (err) { - kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", - ppi, err); - goto out; - } - - timer_irq.irq = ppi; - - err = register_cpu_notifier(&kvm_timer_cpu_nb); - if (err) { - kvm_err("Cannot register timer CPU notifier\n"); - goto out_free; - } - - wqueue = create_singlethread_workqueue("kvm_arch_timer"); - if (!wqueue) { - err = -ENOMEM; - goto out_free; - } - - kvm_info("%s IRQ%d\n", np->name, ppi); - on_each_cpu(kvm_timer_init_interrupt, NULL, 1); - - goto out; -out_free: - free_percpu_irq(ppi, kvm_get_running_vcpus()); -out: - of_node_put(np); - return err; -} - -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer_disarm(timer); -} - -int kvm_timer_init(struct kvm *kvm) -{ - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); - kvm->arch.timer.enabled = 1; - } - - return 0; -} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 1d55afe7fd4b..d0c8ee654bbf 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -82,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) /** * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. */ -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) { return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } @@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = 0; } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} /** * kvm_arch_init_vm - initializes a VM data structure @@ -138,6 +127,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_timer_init(kvm); + /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; @@ -153,15 +144,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) -{ - return 0; -} /** * kvm_arch_destroy_vm - destroy the VM data structure @@ -179,20 +161,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } + + kvm_vgic_destroy(kvm); } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { case KVM_CAP_IRQCHIP: r = vgic_present; break; + case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_READONLY_MEM: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -220,29 +207,6 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) -{ - return 0; -} - -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ -} - -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { @@ -281,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); + kvm_vgic_vcpu_destroy(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -296,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { - int ret; - /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; - /* Set up VGIC */ - ret = kvm_vgic_vcpu_init(vcpu); - if (ret) - return ret; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; @@ -335,6 +289,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * The arch-generic KVM code expects the cpu field of a vcpu to be -1 + * if the vcpu is no longer assigned to a cpu. This is used for the + * optimized make_all_cpus_request path. + */ + vcpu->cpu = -1; + kvm_arm_set_running_vcpu(NULL); } @@ -449,15 +410,17 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); + BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; - kvm->arch.vttbr |= vmid; + kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); } static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + int ret; + if (likely(vcpu->arch.has_run_once)) return 0; @@ -467,22 +430,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Initialize the VGIC before running a vcpu the first time on * this VM. */ - if (irqchip_in_kernel(vcpu->kvm) && - unlikely(!vgic_initialized(vcpu->kvm))) { - int ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_initialized(vcpu->kvm))) { + ret = kvm_vgic_init(vcpu->kvm); if (ret) return ret; } - /* - * Handle the "start in power-off" case by calling into the - * PSCI code. - */ - if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { - *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; - kvm_psci_call(vcpu); - } - return 0; } @@ -696,6 +649,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Handle the "start in power-off" case by marking the VCPU as paused. + */ + if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu->arch.pause = true; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -709,8 +680,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&init, argp, sizeof(init))) return -EFAULT; - return kvm_vcpu_set_target(vcpu, &init); - + return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { @@ -768,7 +738,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, case KVM_ARM_DEVICE_VGIC_V2: if (!vgic_present) return -ENXIO; - return kvm_vgic_set_addr(kvm, type, dev_addr->addr); + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; } @@ -794,6 +764,19 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } default: return -EINVAL; } @@ -801,8 +784,8 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *dummy) { - unsigned long long boot_pgd_ptr; - unsigned long long pgd_ptr; + phys_addr_t boot_pgd_ptr; + phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; @@ -810,8 +793,8 @@ static void cpu_init_hyp_mode(void *dummy) /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); - boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); - pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + pgd_ptr = kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; @@ -825,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self, switch (action) { case CPU_STARTING: case CPU_STARTING_FROZEN: - cpu_init_hyp_mode(NULL); + if (__hyp_get_vectors() == hyp_default_vectors) + cpu_init_hyp_mode(NULL); break; } @@ -841,7 +825,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { + if (cmd == CPU_PM_EXIT && + __hyp_get_vectors() == hyp_default_vectors) { cpu_init_hyp_mode(NULL); return NOTIFY_OK; } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index db9cf692d4dd..7928dbdf2102 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -23,6 +23,7 @@ #include <asm/kvm_host.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> #include <asm/cacheflush.h> #include <asm/cputype.h> #include <trace/events/kvm.h> @@ -43,6 +44,31 @@ static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ #define CSSELR_MAX 12 +/* + * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some + * of cp15 registers can be viewed either as couple of two u32 registers + * or one u64 register. Current u64 register encoding is that least + * significant u32 word is followed by most significant u32 word. + */ +static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu, + const struct coproc_reg *r, + u64 val) +{ + vcpu->arch.cp15[r->reg] = val & 0xffffffff; + vcpu->arch.cp15[r->reg + 1] = val >> 32; +} + +static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, + const struct coproc_reg *r) +{ + u64 val; + + val = vcpu->arch.cp15[r->reg + 1]; + val = val << 32; + val = val | vcpu->arch.cp15[r->reg]; + return val; +} + int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run) { kvm_inject_undefined(vcpu); @@ -71,6 +97,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + /* + * Compute guest MPIDR. We build a virtual cluster out of the + * vcpu_id, but we read the 'U' bit from the underlying + * hardware directly. + */ + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | + (vcpu->vcpu_id & 3)); +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p); + return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 l2ctlr, ncores; + + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + l2ctlr &= ~(3 << 24); + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + /* How many cores in the current cluster and the next ones */ + ncores -= (vcpu->vcpu_id & ~3); + /* Cap it to the maximum number of cores in a single cluster */ + ncores = min(ncores, 3U); + l2ctlr |= (ncores & 3) << 24; + + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 actlr; + + /* ACTLR contains SMP bit: make sure you create all cpus first! */ + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + /* Make the SMP bit consistent with the guest configuration */ + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) + actlr |= 1U << 6; + else + actlr &= ~(1U << 6); + + vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = 0; + return true; +} + /* See note at ARM ARM B1.14.4 */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, @@ -113,6 +231,44 @@ done: } /* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + BUG_ON(!p->is_write); + + vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); + if (p->is_64bit) + vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); + + return true; +} + +/* + * SCTLR accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + * + * Used by the cpu-specific code. + */ +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ + vcpu->arch.hcr &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } + + return true; +} + +/* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was * NAKed, so it will read the PMCR anyway. @@ -153,37 +309,52 @@ static bool pm_fake(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { + /* MPIDR: we use VMPIDR for guest access. */ + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, + NULL, reset_mpidr, c0_MPIDR }, + /* CSSELR: swapped by interrupt.S. */ { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, NULL, reset_unknown, c0_CSSELR }, - /* TTBR0/TTBR1: swapped by interrupt.S. */ - { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, - { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, + /* ACTLR: trapped by HCR.TAC bit. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, + access_actlr, reset_actlr, c1_ACTLR }, - /* TTBCR: swapped by interrupt.S. */ + /* CPACR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, + NULL, reset_val, c1_CPACR, 0x00000000 }, + + /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */ + { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c2_TTBR1 }, { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c2_TTBCR, 0x00000000 }, + access_vm_reg, reset_val, c2_TTBCR, 0x00000000 }, + { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 }, + /* DACR: swapped by interrupt.S. */ { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c3_DACR }, + access_vm_reg, reset_unknown, c3_DACR }, /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */ { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_DFSR }, + access_vm_reg, reset_unknown, c5_DFSR }, { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_IFSR }, + access_vm_reg, reset_unknown, c5_IFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_ADFSR }, + access_vm_reg, reset_unknown, c5_ADFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_AIFSR }, + access_vm_reg, reset_unknown, c5_AIFSR }, /* DFAR/IFAR: swapped by interrupt.S. */ { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c6_DFAR }, + access_vm_reg, reset_unknown, c6_DFAR }, { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_unknown, c6_IFAR }, + access_vm_reg, reset_unknown, c6_IFAR }, /* PAR swapped by interrupt.S */ { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, @@ -195,6 +366,13 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, /* + * L2CTLR access (guest wants to know #CPUs). + */ + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + + /* * Dummy performance monitor implementation. */ { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr}, @@ -213,9 +391,15 @@ static const struct coproc_reg cp15_regs[] = { /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */ { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c10_PRRR}, + access_vm_reg, reset_unknown, c10_PRRR}, { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c10_NMRR}, + access_vm_reg, reset_unknown, c10_NMRR}, + + /* AMAIR0/AMAIR1: swapped by interrupt.S. */ + { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c10_AMAIR0}, + { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c10_AMAIR1}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, @@ -223,7 +407,7 @@ static const struct coproc_reg cp15_regs[] = { /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */ { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_val, c13_CID, 0x00000000 }, + access_vm_reg, reset_val, c13_CID, 0x00000000 }, { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_unknown, c13_TID_URW }, { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32, @@ -234,6 +418,9 @@ static const struct coproc_reg cp15_regs[] = { /* CNTKCTL: swapped by interrupt.S. */ { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + + /* The Configuration Base Address Register. */ + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; /* Target specific emulation tables */ @@ -241,6 +428,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { + unsigned int i; + + for (i = 1; i < table->num; i++) + BUG_ON(cmp_reg(&table->table[i-1], + &table->table[i]) >= 0); + target_tables[table->target] = table; } @@ -323,7 +516,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = true; @@ -331,7 +524,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf; params.Op2 = 0; params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; - params.CRn = 0; + params.CRm = 0; return emulate_cp15(vcpu, ¶ms); } @@ -514,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, }; +/* + * Reads a register value from a userspace address to a kernel + * variable. Make sure that register size matches sizeof(*__val). + */ static int reg_from_user(void *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } +/* + * Writes a register value to a userspace address from a kernel variable. + * Make sure that register size matches sizeof(*__val). + */ static int reg_to_user(void __user *uaddr, const void *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; @@ -534,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) { struct coproc_params params; const struct coproc_reg *r; + int ret; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -542,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - return reg_to_user(uaddr, &r->val, id); + ret = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val = r->val; + + ret = reg_to_user(uaddr, &val, id); + } else if (KVM_REG_SIZE(id) == 8) { + ret = reg_to_user(uaddr, &r->val, id); + } + return ret; } static int set_invariant_cp15(u64 id, void __user *uaddr) @@ -550,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) struct coproc_params params; const struct coproc_reg *r; int err; - u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + u64 val; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -558,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - err = reg_from_user(&val, uaddr, id); + err = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val32; + + err = reg_from_user(&val32, uaddr, id); + if (!err) + val = val32; + } else if (KVM_REG_SIZE(id) == 8) { + err = reg_from_user(&val, uaddr, id); + } if (err) return err; @@ -574,7 +791,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); @@ -836,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(reg->id, uaddr); @@ -847,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return get_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit. */ - return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + val = vcpu_cp15_reg64_get(vcpu, r); + ret = reg_to_user(uaddr, &val, reg->id); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + } + + return ret; } int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(reg->id, uaddr); @@ -866,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return set_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit */ - return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + ret = reg_from_user(&val, uaddr, reg->id); + if (!ret) + vcpu_cp15_reg64_set(vcpu, r, val); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + } + + return ret; } static unsigned int num_demux_regs(void) diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 0461d5c8d3de..1a44bbe39643 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p) { /* Look, we even formatted it for you to paste into the table! */ if (p->is_64bit) { - kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n", - p->CRm, p->Op1, p->is_write ? "write" : "read"); + kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n", + p->CRn, p->Op1, p->is_write ? "write" : "read"); } else { kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32," " func_%s },\n", @@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1, return -1; if (i1->CRn != i2->CRn) return i1->CRn - i2->CRn; - if (i1->is_64 != i2->is_64) - return i2->is_64 - i1->is_64; if (i1->CRm != i2->CRm) return i1->CRm - i2->CRm; if (i1->Op1 != i2->Op1) return i1->Op1 - i2->Op1; - return i1->Op2 - i2->Op2; + if (i1->Op2 != i2->Op2) + return i1->Op2 - i2->Op2; + return i2->is_64 - i1->is_64; } @@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define is64 .is_64 = true #define is32 .is_64 = false +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r); + #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index cf93472b9dd6..e6f4ae48bda9 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,101 +17,12 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_host.h> -#include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> #include <linux/init.h> -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - /* - * Compute guest MPIDR: - * (Even if we present only one VCPU to the guest on an SMP - * host we don't set the U bit in the MPIDR, or vice versa, as - * revealing the underlying hardware properties is likely to - * be the best choice). - */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); -} - #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; - return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return write_to_read_only(vcpu, p); - return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; - return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 l2ctlr, ncores; - - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); - l2ctlr &= ~(3 << 24); - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; - l2ctlr |= (ncores & 3) << 24; - - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 actlr; - - /* ACTLR contains SMP bit: make sure you create all cpus first! */ - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); - /* Make the SMP bit consistent with the guest configuration */ - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) - actlr |= 1U << 6; - else - actlr &= ~(1U << 6); - - vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = 0; - return true; -} - /* * A15-specific CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { - /* MPIDR: we use VMPIDR for guest access. */ - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, - NULL, reset_mpidr, c0_MPIDR }, - /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_val, c1_SCTLR, 0x00C50078 }, - /* ACTLR: trapped by HCR.TAC bit. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, - access_actlr, reset_actlr, c1_ACTLR }, - /* CPACR: swapped by interrupt.S. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c1_CPACR, 0x00000000 }, - - /* - * L2CTLR access (guest wants to know #CPUs). - */ - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - - /* The Configuration Base Address Register. */ - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, + access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, }; static struct kvm_coproc_target_table a15_target_table = { @@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = { static int __init coproc_a15_init(void) { - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) - BUG_ON(cmp_reg(&a15_regs[i-1], - &a15_regs[i]) >= 0); - kvm_register_target_coproc_table(&a15_target_table); return 0; } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 000000000000..17fc7cd479d3 --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell <rusty@rustcorp.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * Jonathan Austin <jonathan.austin@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include <linux/kvm_host.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> +#include <linux/init.h> + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { + /* SCTLR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, + access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A7, + .table = a7_regs, + .num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ + kvm_register_target_coproc_table(&a7_target_table); + return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index bdede9e7da51..d6c005283678 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; if (is_pabt) { - /* Set DFAR and DFSR */ + /* Set IFAR and IFSR */ vcpu->arch.cp15[c6_IFAR] = addr; is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); /* Always give debug fault for now - should give guest a clue */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d03612181..cc0b78769bd8 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } @@ -109,6 +110,73 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + return 0; +} + +static bool is_timer_reg(u64 index) +{ + return false; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return -EFAULT; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + static unsigned long num_core_regs(void) { return sizeof(struct kvm_regs) / sizeof(u32); @@ -121,7 +189,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + NUM_TIMER_REGS; } /** @@ -133,6 +202,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -140,6 +210,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_coproc_indices(vcpu, uindices); } @@ -153,6 +228,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -166,6 +244,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); } @@ -183,13 +264,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __attribute_const__ kvm_target_cpu(void) { - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: return KVM_ARM_TARGET_CORTEX_A15; default: @@ -202,7 +279,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, { unsigned int i; - /* We can only do a cortex A15 for now. */ + /* We can only cope with guest==host and only on A15/A7 (for now). */ if (init->target != kvm_target_cpu()) return -EINVAL; @@ -222,6 +299,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3d74a0be47db..4c979d466cc1 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -26,8 +26,6 @@ #include "trace.h" -#include "trace.h" - typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) @@ -40,21 +38,22 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { + int ret; + trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); - if (kvm_psci_call(vcpu)) + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); return 1; + } - kvm_inject_undefined(vcpu); - return 1; + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } @@ -76,23 +75,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests * @vcpu: the vcpu pointer * @run: the kvm_run structure pointer * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, + [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, [HSR_EC_CP14_MR] = kvm_handle_cp14_access, diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index f048338135f7..2cc14dfad049 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -71,7 +71,7 @@ __do_hyp_init: bne phase2 @ Yes, second stage init @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. @@ -137,12 +137,12 @@ phase2: mov pc, r0 target: @ We're now in the trampoline code, switch page tables - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 isb @ Invalidate the old TLBs mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH - dsb + dsb ish eret diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 16cd4ba5d7fd..01dcb0e752d9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -52,10 +52,10 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) - dsb + dsb ish isb mov r2, #0 mov r3, #0 @@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context) mcr p15, 4, r0, c8, c3, 4 /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ mcr p15, 0, r0, c7, c1, 0 - dsb + dsb ish isb @ Not necessary if followed by eret bx lr @@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run) ldr r1, [vcpu, #VCPU_KVM] add r1, r1, #KVM_VTTBR ldrd r2, r3, [r1] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR @ We're all done, just restore the GPRs and go to the guest restore_guest_regs @@ -199,8 +199,13 @@ after_vfp_restore: restore_host_regs clrex @ Clear exclusive monitor +#ifndef CONFIG_CPU_ENDIAN_BE8 mov r0, r1 @ Return the return code mov r1, #0 @ Clear upper bits in return value +#else + @ r1 already has return code + mov r0, #0 @ Clear upper bits in return value +#endif /* CONFIG_CPU_ENDIAN_BE8 */ bx lr @ return to IOCTL /******************************************************************** @@ -220,6 +225,10 @@ after_vfp_restore: * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in r0 and r1. * + * A function pointer with a value of 0xffffffff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in + * arch/arm/kernel/hyp_stub.S. + * * The calling convention follows the standard AAPCS: * r0 - r3: caller save * r12: caller save @@ -363,6 +372,11 @@ hyp_hvc: host_switch_to_hyp: pop {r0, r1, r2} + /* Check for __hyp_get_vectors */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + beq 1f + push {lr} mrs lr, SPSR push {lr} @@ -378,7 +392,7 @@ THUMB( orr lr, #1) pop {lr} msr SPSR_csxf, lr pop {lr} - eret +1: eret guest_trap: load_vcpu @ Load VCPU pointer to r0 @@ -492,10 +506,10 @@ __kvm_hyp_code_end: .section ".rodata" und_die_str: - .ascii "unexpected undefined exception in Hyp mode at: %#08x" + .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" pabt_die_str: - .ascii "unexpected prefetch abort in Hyp mode at: %#08x" + .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" dabt_die_str: - .ascii "unexpected data abort in Hyp mode at: %#08x" + .ascii "unexpected data abort in Hyp mode at: %#08x\n" svc_die_str: - .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x" + .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 6f18695a09cb..98c8c5b9a87f 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,4 +1,5 @@ #include <linux/irqchip/arm-gic.h> +#include <asm/assembler.h> #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -303,13 +304,17 @@ vcpu .req r0 @ vcpu pointer always in r0 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL mrrc p15, 0, r4, r5, c7 @ PAR + mrc p15, 0, r6, c10, c3, 0 @ AMAIR0 + mrc p15, 0, r7, c10, c3, 1 @ AMAIR1 .if \store_to_vcpu == 0 - push {r2,r4-r5} + push {r2,r4-r7} .else str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] add r12, vcpu, #CP15_OFFSET(c7_PAR) strd r4, r5, [r12] + str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] .endif .endm @@ -322,15 +327,19 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 - pop {r2,r4-r5} + pop {r2,r4-r7} .else ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] add r12, vcpu, #CP15_OFFSET(c7_PAR) ldrd r4, r5, [r12] + ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] .endif mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL mcrr p15, 0, r4, r5, c7 @ PAR + mcr p15, 0, r6, c10, c3, 0 @ AMAIR0 + mcr p15, 0, r7, c10, c3, 1 @ AMAIR1 .if \read_from_vcpu == 0 pop {r2-r12} @@ -412,15 +421,23 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r8, [r2, #GICH_ELRSR0] ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] - - str r3, [r11, #VGIC_CPU_HCR] - str r4, [r11, #VGIC_CPU_VMCR] - str r5, [r11, #VGIC_CPU_MISR] - str r6, [r11, #VGIC_CPU_EISR] - str r7, [r11, #(VGIC_CPU_EISR + 4)] - str r8, [r11, #VGIC_CPU_ELRSR] - str r9, [r11, #(VGIC_CPU_ELRSR + 4)] - str r10, [r11, #VGIC_CPU_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r5, r5 ) +ARM_BE8(rev r6, r6 ) +ARM_BE8(rev r7, r7 ) +ARM_BE8(rev r8, r8 ) +ARM_BE8(rev r9, r9 ) +ARM_BE8(rev r10, r10 ) + + str r3, [r11, #VGIC_V2_CPU_HCR] + str r4, [r11, #VGIC_V2_CPU_VMCR] + str r5, [r11, #VGIC_V2_CPU_MISR] + str r6, [r11, #VGIC_V2_CPU_EISR] + str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] + str r8, [r11, #VGIC_V2_CPU_ELRSR] + str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] + str r10, [r11, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ mov r5, #0 @@ -428,9 +445,10 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Save list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 +ARM_BE8(rev r6, r6 ) str r6, [r3], #4 subs r4, r4, #1 bne 1b @@ -455,9 +473,12 @@ vcpu .req r0 @ vcpu pointer always in r0 add r11, vcpu, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr r3, [r11, #VGIC_CPU_HCR] - ldr r4, [r11, #VGIC_CPU_VMCR] - ldr r8, [r11, #VGIC_CPU_APR] + ldr r3, [r11, #VGIC_V2_CPU_HCR] + ldr r4, [r11, #VGIC_V2_CPU_VMCR] + ldr r8, [r11, #VGIC_V2_CPU_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r8, r8 ) str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -465,9 +486,10 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Restore list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 +ARM_BE8(rev r6, r6 ) str r6, [r2], #4 subs r4, r4, #1 bne 1b @@ -498,7 +520,7 @@ vcpu .req r0 @ vcpu pointer always in r0 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL isb - mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL + mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 strd r2, r3, [r5] @@ -538,12 +560,12 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r2, [r4, #KVM_TIMER_CNTVOFF] ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] - mcrr p15, 4, r2, r3, c14 @ CNTVOFF + mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 ldrd r2, r3, [r5] - mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL + mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL isb ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] @@ -597,17 +619,14 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ .macro configure_hyp_role operation - mrc p15, 4, r2, c1, c1, 0 @ HCR - bic r2, r2, #HCR_VIRT_EXCP_MASK - ldr r3, =HCR_GUEST_MASK .if \operation == vmentry - orr r2, r2, r3 + ldr r2, [vcpu, #VCPU_HCR] ldr r3, [vcpu, #VCPU_IRQ_LINES] orr r2, r2, r3 .else - bic r2, r2, r3 + mov r2, #0 .endif - mcr p15, 4, r2, c1, c1, 0 + mcr p15, 4, r2, c1, c1, 0 @ HCR .endm .macro load_vcpu diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 72a12f2171b2..4cb5a93182e9 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -23,6 +23,68 @@ #include "trace.h" +static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) +{ + void *datap = NULL; + union { + u8 byte; + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + tmp.byte = data; + datap = &tmp.byte; + break; + case 2: + tmp.hword = data; + datap = &tmp.hword; + break; + case 4: + tmp.word = data; + datap = &tmp.word; + break; + case 8: + tmp.dword = data; + datap = &tmp.dword; + break; + } + + memcpy(buf, datap, len); +} + +static unsigned long mmio_read_buf(char *buf, unsigned int len) +{ + unsigned long data = 0; + union { + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + data = buf[0]; + break; + case 2: + memcpy(&tmp.hword, buf, len); + data = tmp.hword; + break; + case 4: + memcpy(&tmp.word, buf, len); + data = tmp.word; + break; + case 8: + memcpy(&tmp.dword, buf, len); + data = tmp.dword; + break; + } + + return data; +} + /** * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation * @vcpu: The VCPU pointer @@ -33,28 +95,27 @@ */ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { - unsigned long *dest; + unsigned long data; unsigned int len; int mask; if (!run->mmio.is_write) { - dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); - *dest = 0; - len = run->mmio.len; if (len > sizeof(unsigned long)) return -EINVAL; - memcpy(dest, run->mmio.data, len); - - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - *((u64 *)run->mmio.data)); + data = mmio_read_buf(run->mmio.data, len); if (vcpu->arch.mmio_decode.sign_extend && len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); - *dest = (*dest ^ mask) - mask; + data = (data ^ mask) - mask; } + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + data); + data = vcpu_data_host_to_guest(vcpu, data, len); + *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data; } return 0; @@ -63,7 +124,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_exit_mmio *mmio) { - unsigned long rt, len; + unsigned long rt; + int len; bool is_write, sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { @@ -86,12 +148,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - if (kvm_vcpu_reg_is_pc(vcpu, rt)) { - /* IO memory trying to read/write pc */ - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - mmio->is_write = is_write; mmio->phys_addr = fault_ipa; mmio->len = len; @@ -110,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { struct kvm_exit_mmio mmio; + unsigned long data; unsigned long rt; int ret; @@ -130,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); + trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, fault_ipa, - (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); + (mmio.is_write) ? data : 0); if (mmio.is_write) - memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); + mmio_write_buf(mmio.data, mmio.len, data); if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index e04613906f1b..eea03069161b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@ #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> +#include <linux/hugetlb.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -41,6 +42,10 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + +#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -85,9 +90,19 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } +static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +{ + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); + pgd_clear(pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); +} + static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { pmd_t *pmd_table = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); pmd_free(NULL, pmd_table); @@ -97,73 +112,186 @@ static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(kvm_pmd_huge(*pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); pte_free_kernel(NULL, pte_table); put_page(virt_to_page(pmd)); } -static bool pmd_empty(pmd_t *pmd) +static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) { - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (kvm_pte_table_empty(start_pte)) + clear_pmd_entry(kvm, pmd, start_addr); } -static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) +static void unmap_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); + } else { + unmap_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); + + if (kvm_pmd_table_empty(start_pmd)) + clear_pud_entry(kvm, pud, start_addr); } -static bool pte_empty(pte_t *pte) +static void unmap_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) { - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pud)); + } else { + unmap_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); + + if (kvm_pud_table_empty(start_pud)) + clear_pgd_entry(kvm, pgd, start_addr); } + static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - unsigned long long start, u64 size) + phys_addr_t start, u64 size) { pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + pgd = pgdp + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + unmap_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ pte_t *pte; - unsigned long long addr = start, end = start + size; - u64 range; - while (addr < end) { - pgd = pgdp + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - addr += PUD_SIZE; - continue; + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); } + } while (pte++, addr += PAGE_SIZE, addr != end); +} - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr += PMD_SIZE; - continue; +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); + } else { + stage2_flush_ptes(kvm, pmd, addr, next); + } } + } while (pmd++, addr = next, addr != end); +} - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - range = PAGE_SIZE; - - /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { - clear_pmd_entry(kvm, pmd, addr); - range = PMD_SIZE; - if (pmd_empty(pmd)) { - clear_pud_entry(kvm, pud, addr); - range = PUD_SIZE; +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); + } else { + stage2_flush_pmds(kvm, pud, addr, next); } } + } while (pud++, addr = next, addr != end); +} - addr += range; - } +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + stage2_flush_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } /** @@ -178,14 +306,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -215,7 +343,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -404,9 +532,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; - /* stage-2 pgd must be aligned to its size */ - VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; @@ -451,29 +576,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return 0; /* ignore calls from kvm_set_spte_hva */ + return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } - pmd = pmd_offset(pud, addr); + return pmd_offset(pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; - /* Create 2nd stage page table mapping - Level 2 */ + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + kvm_set_pmd(pmd, *new_pmd); + if (pmd_present(old_pmd)) + kvm_tlb_flush_vmid_ipa(kvm, addr); + else + get_page(virt_to_page(pmd)); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ + pmd_t *pmd; + pte_t *pte, old_pte; + + /* Create stage-2 page table mapping - Level 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ @@ -520,7 +687,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); - kvm_set_s2pte_writable(&pte); ret = mmu_topup_memory_cache(&cache, 2, 2); if (ret) @@ -539,23 +705,97 @@ out: return ret; } +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +{ + pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompound(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + +static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn, struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) { - pte_t new_pte; - pfn_t pfn; int ret; - bool write_fault, writable; + bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + pfn_t pfn; + pgprot_t mem_type = PAGE_S2; - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (is_vm_hugetlb_page(vma)) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. + */ + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) + force_pte = true; + } + up_read(¤t->mm->mmap_sem); + /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -573,26 +813,44 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - new_pte = pfn_pte(pfn, PAGE_S2); - coherent_icache_guest_page(vcpu->kvm, gfn); + if (kvm_is_mmio_pfn(pfn)) + mem_type = PAGE_S2_DEVICE; - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (writable) { - kvm_set_s2pte_writable(&new_pte); - kvm_set_pfn_dirty(pfn); + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, mem_type); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + kvm_set_s2pmd_writable(&new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, mem_type); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); + } + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, + mem_type == PAGE_S2_DEVICE); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); + out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return 0; + return ret; } /** @@ -612,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; - bool is_iabt; + unsigned long hva; + bool is_iabt, write_fault, writable; gfn_t gfn; int ret, idx; @@ -623,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault(vcpu); + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { - kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", - kvm_vcpu_trap_get_class(vcpu), fault_status); + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); return -EFAULT; } idx = srcu_read_lock(&vcpu->kvm->srcu); gfn = fault_ipa >> PAGE_SHIFT; - if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(vcpu); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); @@ -641,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - if (fault_status != FSC_FAULT) { - kvm_err("Unsupported fault status on io memory: %#lx\n", - fault_status); - ret = -EFAULT; - goto out_unlock; - } - /* * The IPA is reported as [MAX:12], so we need to * complement it with the bottom 12 bits from the @@ -659,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - memslot = gfn_to_memslot(vcpu->kvm, gfn); - - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; out_unlock: @@ -779,9 +1034,9 @@ int kvm_mmu_init(void) { int err; - hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); - hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); - hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { /* @@ -791,7 +1046,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -808,7 +1063,7 @@ int kvm_mmu_init(void) */ kvm_flush_dcache_to_poc(init_bounce_page, len); - phys_base = virt_to_phys(init_bounce_page); + phys_base = kvm_virt_to_phys(init_bounce_page); hyp_idmap_vector += phys_base - hyp_idmap_start; hyp_idmap_start = phys_base; hyp_idmap_end = phys_base + len; @@ -817,8 +1072,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; @@ -864,3 +1120,49 @@ out: free_hyp_pgds(); return err; } + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) +{ + gpa_t gpa = old->base_gfn << PAGE_SHIFT; + phys_addr_t size = old->npages << PAGE_SHIFT; + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); + } +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + return 0; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 7ee5bb7a3667..09cf37737ee2 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include <linux/wait.h> +#include <asm/cputype.h> #include <asm/kvm_emulate.h> #include <asm/kvm_psci.h> @@ -26,6 +27,36 @@ * as described in ARM document number ARM DEN 0022A. */ +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + + return PSCI_RET_SUCCESS; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -34,25 +65,41 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long context_id; + unsigned long mpidr; phys_addr_t target_pc; + int i; cpu_id = *vcpu_reg(source_vcpu, 1); if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - if (cpu_id >= atomic_read(&kvm->online_vcpus)) - return KVM_PSCI_RET_INVAL; - - target_pc = *vcpu_reg(source_vcpu, 2); + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { + vcpu = tmp; + break; + } + } - vcpu = kvm_get_vcpu(kvm, cpu_id); + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu) + return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.pause) { + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } - wq = kvm_arch_vcpu_wq(vcpu); - if (!waitqueue_active(wq)) - return KVM_PSCI_RET_INVAL; + target_pc = *vcpu_reg(source_vcpu, 2); + context_id = *vcpu_reg(source_vcpu, 3); kvm_reset_vcpu(vcpu); @@ -62,26 +109,165 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu_set_thumb(vcpu); } + /* Propagate caller endianness */ + if (kvm_vcpu_is_be(source_vcpu)) + kvm_vcpu_set_be(vcpu); + *vcpu_pc(vcpu) = target_pc; + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general puspose registers are undefined upon CPU_ON. + */ + *vcpu_reg(vcpu, 0) = context_id; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ + wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); - return KVM_PSCI_RET_SUCCESS; + return PSCI_RET_SUCCESS; } -/** - * kvm_psci_call - handle PSCI call if r0 value is in range - * @vcpu: Pointer to the VCPU struct - * - * Handle PSCI calls from guests through traps from HVC or SMC instructions. - * The calling convention is similar to SMC calls to the secure world where - * the function number is placed in r0 and this function returns true if the - * function number specified in r0 is withing the PSCI range, and false - * otherwise. - */ -bool kvm_psci_call(struct kvm_vcpu *vcpu) +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = *vcpu_reg(vcpu, 1); + lowest_affinity_level = *vcpu_reg(vcpu, 2); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if (((mpidr & target_affinity_mask) == target_affinity) && + !tmp->arch.pause) { + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + +int kvm_psci_version(struct kvm_vcpu *vcpu) +{ + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + return KVM_ARM_PSCI_0_2; + + return KVM_ARM_PSCI_0_1; +} + +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + int ret = 1; + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = 2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN64_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We should'nt be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidently/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + default: + return -EINVAL; + } + + *vcpu_reg(vcpu, 0) = val; + return ret; +} + +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -89,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu) switch (psci_fn) { case KVM_PSCI_FN_CPU_OFF: kvm_psci_vcpu_off(vcpu); - val = KVM_PSCI_RET_SUCCESS; + val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; case KVM_PSCI_FN_CPU_SUSPEND: case KVM_PSCI_FN_MIGRATE: - val = KVM_PSCI_RET_NI; + val = PSCI_RET_NOT_SUPPORTED; break; - default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC instructions. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +int kvm_psci_call(struct kvm_vcpu *vcpu) +{ + switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); + default: + return -EINVAL; + }; } diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554cd..f558c073c023 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -27,16 +27,21 @@ #include <asm/kvm_arm.h> #include <asm/kvm_coproc.h> +#include <kvm/arm_arch_timer.h> + /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values */ -static const int a15_max_cpu_idx = 3; - -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level cortexa_vtimer_irq = { + { .irq = 27 }, + .level = 1, +}; + /******************************************************************************* * Exported reset function @@ -51,24 +56,28 @@ static struct kvm_regs a15_regs_reset = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; + const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > a15_max_cpu_idx) - return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); + cpu_vtimer_irq = &cortexa_vtimer_irq; break; default: return -ENODEV; } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* Reset arch_timer context */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index a8e73ed5ad5b..b1d640f78623 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault, __entry->ipa = ipa; ), - TP_printk("guest fault at PC %#08lx (hxfar %#08lx, " - "ipa %#16llx, hsr %#08lx", - __entry->vcpu_pc, __entry->hxfar, - __entry->ipa, __entry->hsr) + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) ); TRACE_EVENT(kvm_irq_line, diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c deleted file mode 100644 index 17c5ac7d10ed..000000000000 --- a/arch/arm/kvm/vgic.c +++ /dev/null @@ -1,1499 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <linux/irqchip/arm-gic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -/* - * How the whole thing works (courtesy of Christoffer Dall): - * - * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_state vgic - * bitmap (this bitmap is updated by both user land ioctls and guest - * mmio ops, and other in-kernel peripherals such as the - * arch. timers) and indicate the 'wire' state. - * - Every time the bitmap changes, the irq_pending_on_cpu oracle is - * recalculated - * - To calculate the oracle, we need info for each cpu from - * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_state & dist->irq_enable - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR - * registers, stored on each vcpu. We only keep one bit of - * information per interrupt, making sure that only one vcpu can - * accept the interrupt. - * - The same is true when injecting an interrupt, except that we only - * consider a single interrupt at a time. The irq_spi_cpu array - * contains the target CPU for each SPI. - * - * The handling of level interrupts adds some extra complexity. We - * need to track when the interrupt has been EOIed, so we can sample - * the 'line' again. This is achieved as such: - * - * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_active is set. As long as this bit is set, the line - * will be ignored for further interrupts. The interrupt is injected - * into the vcpu with the GICH_LR_EOI bit set (generate a - * maintenance interrupt on EOI). - * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_active. This allow the - * interrupt line to be sampled again. - */ - -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) - -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - -#define ACCESS_READ_VALUE (1 << 0) -#define ACCESS_READ_RAZ (0 << 0) -#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) -#define ACCESS_WRITE_IGNORED (0 << 1) -#define ACCESS_WRITE_SETBIT (1 << 1) -#define ACCESS_WRITE_CLEARBIT (2 << 1) -#define ACCESS_WRITE_VALUE (3 << 1) -#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) - -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_update_state(struct kvm *kvm); -static void vgic_kick_vcpus(struct kvm *kvm); -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); -static u32 vgic_nr_lr; - -static unsigned int vgic_maint_irq; - -static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, - int cpuid, u32 offset) -{ - offset >>= 2; - if (!offset) - return x->percpu[cpuid].reg; - else - return x->shared.reg + offset - 1; -} - -static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, - int cpuid, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->percpu[cpuid].reg_ul); - - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); -} - -static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val) -{ - unsigned long *reg; - - if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->percpu[cpuid].reg_ul; - } else { - reg = x->shared.reg_ul; - irq -= VGIC_NR_PRIVATE_IRQS; - } - - if (val) - set_bit(irq, reg); - else - clear_bit(irq, reg); -} - -static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) -{ - if (unlikely(cpuid >= VGIC_MAX_CPUS)) - return NULL; - return x->percpu[cpuid].reg_ul; -} - -static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) -{ - return x->shared.reg_ul; -} - -static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) -{ - offset >>= 2; - BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 4) - return x->percpu[cpuid] + offset; - else - return x->shared + offset - 8; -} - -#define VGIC_CFG_LEVEL 0 -#define VGIC_CFG_EDGE 1 - -static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int irq_val; - - irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); - return irq_val == VGIC_CFG_EDGE; -} - -static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); -} - -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); -} - -static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); -} - -static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - set_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - clear_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) -{ - return *((u32 *)mmio->data) & mask; -} - -static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) -{ - *((u32 *)mmio->data) = value & mask; -} - -/** - * vgic_reg_access - access vgic register - * @mmio: pointer to the data describing the mmio access - * @reg: pointer to the virtual backing of vgic distributor data - * @offset: least significant 2 bits used for word offset - * @mode: ACCESS_ mode (see defines above) - * - * Helper to make vgic register access easier using one of the access - * modes defined for vgic register access - * (read,raz,write-ignored,setbit,clearbit,write) - */ -static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode) -{ - int word_offset = (offset & 3) * 8; - u32 mask = (1UL << (mmio->len * 8)) - 1; - u32 regval; - - /* - * Any alignment fault should have been delivered to the guest - * directly (ARM ARM B3.12.7 "Prioritization of aborts"). - */ - - if (reg) { - regval = *reg; - } else { - BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); - regval = 0; - } - - if (mmio->is_write) { - u32 data = mmio_data_read(mmio, mask) << word_offset; - switch (ACCESS_WRITE_MASK(mode)) { - case ACCESS_WRITE_IGNORED: - return; - - case ACCESS_WRITE_SETBIT: - regval |= data; - break; - - case ACCESS_WRITE_CLEARBIT: - regval &= ~data; - break; - - case ACCESS_WRITE_VALUE: - regval = (regval & ~(mask << word_offset)) | data; - break; - } - *reg = regval; - } else { - switch (ACCESS_READ_MASK(mode)) { - case ACCESS_READ_RAZ: - regval = 0; - /* fall through */ - - case ACCESS_READ_VALUE: - mmio_data_write(mmio, mask, regval >> word_offset); - } - } -} - -static bool handle_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - u32 word_offset = offset & 3; - - switch (offset & ~3) { - case 0: /* CTLR */ - reg = vcpu->kvm->arch.vgic.enabled; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = reg & 1; - vgic_update_state(vcpu->kvm); - return true; - } - break; - - case 4: /* TYPER */ - reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (VGIC_NR_IRQS >> 5) - 1; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - - case 8: /* IIDR */ - reg = 0x4B00043B; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - } - - return false; -} - -static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - if (offset < 4) /* Force SGI enabled */ - *reg |= 0xffff; - vgic_retire_disabled_irqs(vcpu); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -#define GICD_ITARGETSR_SIZE 32 -#define GICD_CPUTARGETS_BITS 8 -#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) -static u32 vgic_get_target_reg(struct kvm *kvm, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 val = 0; - - irq -= VGIC_NR_PRIVATE_IRQS; - - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - if (test_bit(irq + i, bmap)) - val |= 1 << (c + i * 8); - } - - return val; -} - -static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 target; - - irq -= VGIC_NR_PRIVATE_IRQS; - - /* - * Pick the LSB in each byte. This ensures we target exactly - * one vcpu per IRQ. If the byte is null, assume we target - * CPU0. - */ - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { - int shift = i * GICD_CPUTARGETS_BITS; - target = ffs((val >> shift) & 0xffU); - target = target ? (target - 1) : 0; - dist->irq_spi_cpu[irq + i] = target; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - if (c == target) - set_bit(irq + i, bmap); - else - clear_bit(irq + i, bmap); - } - } -} - -static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - /* We treat the banked interrupts targets as read-only */ - if (offset < 32) { - u32 roreg = 1 << vcpu->vcpu_id; - roreg |= roreg << 8; - roreg |= roreg << 16; - - vgic_reg_access(mmio, &roreg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static u32 vgic_cfg_expand(u16 val) -{ - u32 res = 0; - int i; - - /* - * Turn a 16bit value like abcd...mnop into a 32bit word - * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. - */ - for (i = 0; i < 16; i++) - res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); - - return res; -} - -static u16 vgic_cfg_compress(u32 val) -{ - u16 res = 0; - int i; - - /* - * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like - * abcd...mnop which is what we really care about. - */ - for (i = 0; i < 16; i++) - res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; - - return res; -} - -/* - * The distributor uses 2 bits per IRQ for the CFG register, but the - * LSB is always 0. As such, we only keep the upper bit, and use the - * two above functions to compress/expand the bits - */ -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 val; - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - if (offset & 2) - val = *reg >> 16; - else - val = *reg & 0xffff; - - val = vgic_cfg_expand(val); - vgic_reg_access(mmio, &val, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - if (offset < 4) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ - return false; - } - - val = vgic_cfg_compress(val); - if (offset & 2) { - *reg &= 0xffff; - *reg |= val << 16; - } else { - *reg &= 0xffff << 16; - *reg |= val; - } - } - - return false; -} - -static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_dispatch_sgi(vcpu, reg); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -/* - * I would have liked to use the kvm_bus_io_*() API instead, but it - * cannot cope with banked registers (only the VM pointer is passed - * around, and we need the vcpu). One of these days, someone please - * fix it! - */ -struct mmio_range { - phys_addr_t base; - unsigned long len; - bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); -}; - -static const struct mmio_range vgic_ranges[] = { - { - .base = GIC_DIST_CTRL, - .len = 12, - .handle_mmio = handle_mmio_misc, - }, - { - .base = GIC_DIST_IGROUP, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ENABLE_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_set_enable_reg, - }, - { - .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_clear_enable_reg, - }, - { - .base = GIC_DIST_PENDING_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_set_pending_reg, - }, - { - .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_clear_pending_reg, - }, - { - .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_PRI, - .len = VGIC_NR_IRQS, - .handle_mmio = handle_mmio_priority_reg, - }, - { - .base = GIC_DIST_TARGET, - .len = VGIC_NR_IRQS, - .handle_mmio = handle_mmio_target_reg, - }, - { - .base = GIC_DIST_CONFIG, - .len = VGIC_NR_IRQS / 4, - .handle_mmio = handle_mmio_cfg_reg, - }, - { - .base = GIC_DIST_SOFTINT, - .len = 4, - .handle_mmio = handle_mmio_sgi_reg, - }, - {} -}; - -static const -struct mmio_range *find_matching_range(const struct mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t base) -{ - const struct mmio_range *r = ranges; - phys_addr_t addr = mmio->phys_addr - base; - - while (r->len) { - if (addr >= r->base && - (addr + mmio->len) <= (r->base + r->len)) - return r; - r++; - } - - return NULL; -} - -/** - * vgic_handle_mmio - handle an in-kernel MMIO access - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - const struct mmio_range *range; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long base = dist->vgic_dist_base; - bool updated_state; - unsigned long offset; - - if (!irqchip_in_kernel(vcpu->kvm) || - mmio->phys_addr < base || - (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - range = find_matching_range(vgic_ranges, mmio, base); - if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; - } - - spin_lock(&vcpu->kvm->arch.vgic.lock); - offset = mmio->phys_addr - range->base - base; - updated_state = range->handle_mmio(vcpu, mmio, offset); - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); - kvm_handle_mmio_return(vcpu, run); - - if (updated_state) - vgic_kick_vcpus(vcpu->kvm); - - return true; -} - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int nrcpus = atomic_read(&kvm->online_vcpus); - u8 target_cpus; - int sgi, mode, c, vcpu_id; - - vcpu_id = vcpu->vcpu_id; - - sgi = reg & 0xf; - target_cpus = (reg >> 16) & 0xff; - mode = (reg >> 24) & 3; - - switch (mode) { - case 0: - if (!target_cpus) - return; - - case 1: - target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; - break; - - case 2: - target_cpus = 1 << vcpu_id; - break; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (target_cpus & 1) { - /* Flag the SGI as pending */ - vgic_dist_irq_set(vcpu, sgi); - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; - kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); - } - - target_cpus >>= 1; - } -} - -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pending, *enabled, *pend_percpu, *pend_shared; - unsigned long pending_private, pending_shared; - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; - pend_shared = vcpu->arch.vgic_cpu.pending_shared; - - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - - pending = vgic_bitmap_get_shared_map(&dist->irq_state); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); - bitmap_and(pend_shared, pend_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - VGIC_NR_SHARED_IRQS); - - pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); - return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < VGIC_NR_SHARED_IRQS); -} - -/* - * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. - */ -static void vgic_update_state(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int c; - - if (!dist->enabled) { - set_bit(0, &dist->irq_pending_on_cpu); - return; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); - set_bit(c, &dist->irq_pending_on_cpu); - } - } -} - -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) - -/* - * An interrupt may have been disabled after being made pending on the - * CPU interface (the classic case is a timer running while we're - * rebooting the guest - the interrupt would kick as soon as the CPU - * interface gets enabled, with deadly consequences). - * - * The solution is to examine already active LRs, and check the - * interrupt is still enabled. If not, just retire it. - */ -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int lr; - - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - clear_bit(lr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); - } - } -} - -/* - * Queue an interrupt to a CPU virtual interface. Return true on success, - * or false if it wasn't possible to queue it. - */ -static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int lr; - - /* Sanitize the input... */ - BUG_ON(sgi_source_id & ~7); - BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= VGIC_NR_IRQS); - - kvm_debug("Queue IRQ%d\n", irq); - - lr = vgic_cpu->vgic_irq_lr_map[irq]; - - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; - } - - /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) - return false; - - kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); - - if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; - - return true; -} - -static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long sources; - int vcpu_id = vcpu->vcpu_id; - int c; - - sources = dist->irq_sgi_sources[vcpu_id][irq]; - - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { - if (vgic_queue_irq(vcpu, c, irq)) - clear_bit(c, &sources); - } - - dist->irq_sgi_sources[vcpu_id][irq] = sources; - - /* - * If the sources bitmap has been cleared it means that we - * could queue all the SGIs onto link registers (see the - * clear_bit above), and therefore we are done with them in - * our emulated gic and can get rid of them. - */ - if (!sources) { - vgic_dist_irq_clear(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) -{ - if (vgic_irq_is_active(vcpu, irq)) - return true; /* level interrupt, already queued */ - - if (vgic_queue_irq(vcpu, 0, irq)) { - if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - } else { - vgic_irq_set_active(vcpu, irq); - } - - return true; - } - - return false; -} - -/* - * Fill the list registers with pending interrupts before running the - * guest. - */ -static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i, vcpu_id; - int overflow = 0; - - vcpu_id = vcpu->vcpu_id; - - /* - * We may not have any pending interrupt, or the interrupts - * may have been serviced from another vcpu. In all cases, - * move along. - */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); - goto epilog; - } - - /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { - if (!vgic_queue_sgi(vcpu, i)) - overflow = 1; - } - - /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { - if (!vgic_queue_hwirq(vcpu, i)) - overflow = 1; - } - - /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { - if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) - overflow = 1; - } - -epilog: - if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; - } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; - /* - * We're about to run this VCPU, and we've consumed - * everything the distributor had in store for - * us. Claim we don't have anything pending. We'll - * adjust that if needed while exiting. - */ - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); - } -} - -static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - bool level_pending = false; - - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { - /* - * Some level interrupts have been EOIed. Clear their - * active bit. - */ - int lr, irq; - - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, - vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; - - /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); - level_pending = true; - } else { - vgic_cpu_irq_clear(vcpu, irq); - } - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; - } - } - - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; - - return level_pending; -} - -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ -static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int lr, pending; - bool level_pending; - - level_pending = vgic_process_maintenance(vcpu); - - /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, - vgic_cpu->nr_lr) { - int irq; - - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) - continue; - - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - } - - /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, - vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); -} - -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_flush_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); -} - -static void vgic_kick_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int c; - - /* - * We've injected an interrupt, time to find out who deserves - * a good kick... - */ - kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) - kvm_vcpu_kick(vcpu); - } -} - -static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) -{ - int is_edge = vgic_irq_is_edge(vcpu, irq); - int state = vgic_dist_irq_is_pending(vcpu, irq); - - /* - * Only inject an interrupt if: - * - edge triggered and we have a rising edge - * - level triggered and we change level - */ - if (is_edge) - return level > state; - else - return level != state; -} - -static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int is_edge, is_level; - int enabled; - bool ret = true; - - spin_lock(&dist->lock); - - vcpu = kvm_get_vcpu(kvm, cpuid); - is_edge = vgic_irq_is_edge(vcpu, irq_num); - is_level = !is_edge; - - if (!vgic_validate_injection(vcpu, irq_num, level)) { - ret = false; - goto out; - } - - if (irq_num >= VGIC_NR_PRIVATE_IRQS) { - cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; - vcpu = kvm_get_vcpu(kvm, cpuid); - } - - kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - - if (level) - vgic_dist_irq_set(vcpu, irq_num); - else - vgic_dist_irq_clear(vcpu, irq_num); - - enabled = vgic_irq_is_enabled(vcpu, irq_num); - - if (!enabled) { - ret = false; - goto out; - } - - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { - /* - * Level interrupt in progress, will be picked up - * when EOId. - */ - ret = false; - goto out; - } - - if (level) { - vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, &dist->irq_pending_on_cpu); - } - -out: - spin_unlock(&dist->lock); - - return ret; -} - -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: activates an interrupt - * false: deactivates an interrupt - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level) -{ - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) - vgic_kick_vcpus(kvm); - - return 0; -} - -static irqreturn_t vgic_maintenance_handler(int irq, void *data) -{ - /* - * We cannot rely on the vgic maintenance interrupt to be - * delivered synchronously. This means we can only use it to - * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). - */ - return IRQ_HANDLED; -} - -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) - return -EBUSY; - - for (i = 0; i < VGIC_NR_IRQS; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } - - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_vmcr = 0; - - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ - - return 0; -} - -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic_maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -int kvm_vgic_hyp_init(void) -{ - int ret; - struct resource vctrl_res; - struct resource vcpu_res; - - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); - if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); - return -ENODEV; - } - - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } - - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); - goto out; - } - - ret = register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic_vcpu_base = vcpu_res.start; - - goto out; - -out_unmap: - iounmap(vgic_vctrl_base); -out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); - return ret; -} - -int kvm_vgic_init(struct kvm *kvm) -{ - int ret = 0, i; - - mutex_lock(&kvm->lock); - - if (vgic_initialized(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; - } - - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) - vgic_set_target_reg(kvm, 0, i); - - kvm_timer_init(kvm); - kvm->arch.vgic.ready = true; -out: - mutex_unlock(&kvm->lock); - return ret; -} - -int kvm_vgic_create(struct kvm *kvm) -{ - int ret = 0; - - mutex_lock(&kvm->lock); - - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { - ret = -EEXIST; - goto out; - } - - spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - -out: - mutex_unlock(&kvm->lock); - return ret; -} - -static bool vgic_ioaddr_overlap(struct kvm *kvm) -{ - phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; - phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; - - if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) - return 0; - if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || - (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) - return -EBUSY; - return 0; -} - -static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t size) -{ - int ret; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - if (addr + size < addr) - return -EINVAL; - - ret = vgic_ioaddr_overlap(kvm); - if (ret) - return ret; - *ioaddr = addr; - return ret; -} - -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - int r = 0; - struct vgic_dist *vgic = &kvm->arch.vgic; - - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - - mutex_lock(&kvm->lock); - switch (type) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - addr, KVM_VGIC_V2_DIST_SIZE); - break; - case KVM_VGIC_V2_ADDR_TYPE_CPU: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - addr, KVM_VGIC_V2_CPU_SIZE); - break; - default: - r = -ENODEV; - } - - mutex_unlock(&kvm->lock); - return r; -} diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb007..3bc8eb811a73 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -197,24 +197,24 @@ 12: PLD( pld [r1, #124] ) 13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull + mov r3, lr, lspull #\pull subs r2, r2, #32 ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r7, lspush #\push + mov r7, r7, lspull #\pull + orr r7, r7, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f bge 12b PLD( cmn r2, #96 ) @@ -225,10 +225,10 @@ 14: ands ip, r2, #28 beq 16f -15: mov r3, lr, pull #\pull +15: mov r3, lr, lspull #\pull ldr1w r1, lr, abort=21f subs ip, ip, #4 - orr r3, r3, lr, push #\push + orr r3, r3, lr, lspush #\push str1w r0, r3, abort=21f bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a09..d6e742d24007 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -141,7 +141,7 @@ FN_ENTRY tst len, #2 mov r5, r4, get_byte_0 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 @@ -171,23 +171,23 @@ FN_ENTRY cmp ip, #2 beq .Lsrc2_aligned bhi .Lsrc3_aligned - mov r4, r5, pull #8 @ C = 0 + mov r4, r5, lspull #8 @ C = 0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 - mov r7, r7, pull #8 - orr r7, r7, r8, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #8 + mov r4, r8, lspull #8 sub ip, ip, #16 teq ip, #0 bne 1b @@ -196,50 +196,50 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #8 + mov r4, r6, lspull #8 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #24 + orr r4, r4, r5, lspush #24 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #8 + mov r4, r5, lspull #8 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 b .Lexit -.Lsrc2_aligned: mov r4, r5, pull #16 +.Lsrc2_aligned: mov r4, r5, lspull #16 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 - mov r7, r7, pull #16 - orr r7, r7, r8, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #16 + mov r4, r8, lspull #16 sub ip, ip, #16 teq ip, #0 bne 1b @@ -248,20 +248,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #16 + mov r4, r6, lspull #16 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #16 + orr r4, r4, r5, lspush #16 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #16 + mov r4, r5, lspull #16 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -276,24 +276,24 @@ FN_ENTRY load1b r5 b .Lexit -.Lsrc3_aligned: mov r4, r5, pull #24 +.Lsrc3_aligned: mov r4, r5, lspull #24 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 - mov r7, r7, pull #24 - orr r7, r7, r8, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #24 + mov r4, r8, lspull #24 sub ip, ip, #16 teq ip, #0 bne 1b @@ -302,20 +302,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #24 + mov r4, r6, lspull #24 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #8 + orr r4, r4, r5, lspush #8 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #24 + mov r4, r5, lspull #24 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -326,7 +326,7 @@ FN_ENTRY load1l r4 mov r5, r4, get_byte_0 strb r5, [dst], #1 - adcs sum, sum, r4, push #24 + adcs sum, sum, r4, lspush #24 mov r5, r4, get_byte_1 b .Lexit FN_EXIT diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4b..7a7430950c79 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -47,25 +47,25 @@ ENTRY(__raw_readsl) strb ip, [r1], #1 4: subs r2, r2, #1 - mov ip, r3, pull #24 + mov ip, r3, lspull #24 ldrne r3, [r0] - orrne ip, ip, r3, push #8 + orrne ip, ip, r3, lspush #8 strne ip, [r1], #4 bne 4b b 8f 5: subs r2, r2, #1 - mov ip, r3, pull #16 + mov ip, r3, lspull #16 ldrne r3, [r0] - orrne ip, ip, r3, push #16 + orrne ip, ip, r3, lspush #16 strne ip, [r1], #4 bne 5b b 7f 6: subs r2, r2, #1 - mov ip, r3, pull #8 + mov ip, r3, lspull #8 ldrne r3, [r0] - orrne ip, ip, r3, push #24 + orrne ip, ip, r3, lspush #24 strne ip, [r1], #4 bne 6b diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725c..d0d104a0dd11 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -41,26 +41,26 @@ ENTRY(__raw_writesl) blt 5f bgt 6f -4: mov ip, r3, pull #16 +4: mov ip, r3, lspull #16 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #16 + orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b mov pc, lr -5: mov ip, r3, pull #8 +5: mov ip, r3, lspull #8 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #24 + orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b mov pc, lr -6: mov ip, r3, pull #24 +6: mov ip, r3, lspull #24 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #8 + orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b mov pc, lr diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 938fc14f962d..d1fc0c0c342c 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -147,24 +147,24 @@ ENTRY(memmove) 12: PLD( pld [r1, #-128] ) 13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push + mov lr, r3, lspush #\push subs r2, r2, #32 ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r7, lspull #\pull + mov r7, r7, lspush #\push + orr r7, r7, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull stmdb r0!, {r4 - r9, ip, lr} bge 12b PLD( cmn r2, #96 ) @@ -175,10 +175,10 @@ ENTRY(memmove) 14: ands ip, r2, #28 beq 16f -15: mov lr, r3, push #\push +15: mov lr, r3, lspush #\push ldr r3, [r1, #-4]! subs ip, ip, #4 - orr lr, lr, r3, pull #\pull + orr lr, lr, r3, lspull #\pull str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index 5c908b1cb8ed..e50520904b76 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -117,9 +117,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 ldr r7, [r1], #4 - orr r3, r3, r7, push #24 + orr r3, r3, r7, lspush #24 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -131,30 +131,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_1rem8lp -.Lc2u_1cpy8lp: mov r3, r7, pull #8 +.Lc2u_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_1cpy8lp .Lc2u_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #24 + orrne r3, r3, r7, lspush #24 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi @@ -172,9 +172,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 ldr r7, [r1], #4 - orr r3, r3, r7, push #16 + orr r3, r3, r7, lspush #16 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -186,30 +186,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_2rem8lp -.Lc2u_2cpy8lp: mov r3, r7, pull #16 +.Lc2u_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_2cpy8lp .Lc2u_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #16 + orrne r3, r3, r7, lspush #16 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi @@ -227,9 +227,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 ldr r7, [r1], #4 - orr r3, r3, r7, push #8 + orr r3, r3, r7, lspush #8 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -241,30 +241,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_3rem8lp -.Lc2u_3cpy8lp: mov r3, r7, pull #24 +.Lc2u_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_3cpy8lp .Lc2u_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #8 + orrne r3, r3, r7, lspush #8 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi @@ -382,9 +382,9 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault .Lcfu_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #24 + orr r3, r3, r7, lspush #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -396,30 +396,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_1rem8lp -.Lcfu_1cpy8lp: mov r3, r7, pull #8 +.Lcfu_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} bpl .Lcfu_1cpy8lp .Lcfu_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #24 + orrne r3, r3, r7, lspush #24 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_1fupi @@ -437,9 +437,9 @@ USER( TUSER( ldrne) r7, [r1], #4) @ May fault .Lcfu_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #16 + orr r3, r3, r7, lspush #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -452,30 +452,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault blt .Lcfu_2rem8lp -.Lcfu_2cpy8lp: mov r3, r7, pull #16 +.Lcfu_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} bpl .Lcfu_2cpy8lp .Lcfu_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #16 + orrne r3, r3, r7, lspush #16 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_2fupi @@ -493,9 +493,9 @@ USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault .Lcfu_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #8 + orr r3, r3, r7, lspush #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -507,30 +507,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_3rem8lp -.Lcfu_3cpy8lp: mov r3, r7, pull #24 +.Lcfu_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} @ Shouldnt fault - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} subs ip, ip, #16 bpl .Lcfu_3cpy8lp .Lcfu_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #8 + orrne r3, r3, r7, lspush #8 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_3fupi diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index c61d2373408c..990837379112 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -10,6 +10,7 @@ #include <asm/system_info.h> pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x); #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, @@ -74,8 +75,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, unsigned long addr, end; unsigned long next; - addr = virt_to_phys(text_start); - end = virt_to_phys(text_end); + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e0a9b48b02c3..279594b83781 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -443,6 +443,8 @@ source "drivers/firmware/Kconfig" source "fs/Kconfig" +source "arch/arm64/kvm/Kconfig" + source "arch/arm64/Kconfig.debug" source "security/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8f63c8a21b7e..8a311839e2d7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -43,6 +43,7 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_KVM) += arch/arm64/kvm/ core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index c404fb0df3a6..27f54a7cc81b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,6 +41,7 @@ #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 +#define ARM_CPU_PART_CORTEX_A53 0xD030 #define ARM_CPU_PART_CORTEX_A57 0xD070 #define APM_CPU_PART_POTENZA 0x0000 diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7c951a510b54..aab72ce22348 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,15 @@ #ifdef __KERNEL__ +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + #define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) /* AArch64 */ @@ -73,11 +82,6 @@ #define CACHE_FLUSH_IS_SAFE 1 -enum debug_el { - DBG_ACTIVE_EL0 = 0, - DBG_ACTIVE_EL1, -}; - /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 @@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); +enum debug_el { + DBG_ACTIVE_EL0 = 0, + DBG_ACTIVE_EL1, +}; + void enable_debug_monitors(enum debug_el el); void disable_debug_monitors(enum debug_el el); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h new file mode 100644 index 000000000000..7fd3e27e3ccc --- /dev/null +++ b/arch/arm64/include/asm/kvm_arm.h @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_ARM_H__ +#define __ARM64_KVM_ARM_H__ + +#include <asm/types.h> + +/* Hyp Configuration Register (HCR) bits */ +#define HCR_ID (UL(1) << 33) +#define HCR_CD (UL(1) << 32) +#define HCR_RW_SHIFT 31 +#define HCR_RW (UL(1) << HCR_RW_SHIFT) +#define HCR_TRVM (UL(1) << 30) +#define HCR_HCD (UL(1) << 29) +#define HCR_TDZ (UL(1) << 28) +#define HCR_TGE (UL(1) << 27) +#define HCR_TVM (UL(1) << 26) +#define HCR_TTLB (UL(1) << 25) +#define HCR_TPU (UL(1) << 24) +#define HCR_TPC (UL(1) << 23) +#define HCR_TSW (UL(1) << 22) +#define HCR_TAC (UL(1) << 21) +#define HCR_TIDCP (UL(1) << 20) +#define HCR_TSC (UL(1) << 19) +#define HCR_TID3 (UL(1) << 18) +#define HCR_TID2 (UL(1) << 17) +#define HCR_TID1 (UL(1) << 16) +#define HCR_TID0 (UL(1) << 15) +#define HCR_TWE (UL(1) << 14) +#define HCR_TWI (UL(1) << 13) +#define HCR_DC (UL(1) << 12) +#define HCR_BSU (3 << 10) +#define HCR_BSU_IS (UL(1) << 10) +#define HCR_FB (UL(1) << 9) +#define HCR_VA (UL(1) << 8) +#define HCR_VI (UL(1) << 7) +#define HCR_VF (UL(1) << 6) +#define HCR_AMO (UL(1) << 5) +#define HCR_IMO (UL(1) << 4) +#define HCR_FMO (UL(1) << 3) +#define HCR_PTW (UL(1) << 2) +#define HCR_SWIO (UL(1) << 1) +#define HCR_VM (UL(1) << 0) + +/* + * The bits we set in HCR: + * RW: 64bit by default, can be overriden for 32bit VMs + * TAC: Trap ACTLR + * TSC: Trap SMC + * TVM: Trap VM ops (until M+C set in SCTLR_EL1) + * TSW: Trap cache operations by set/way + * TWE: Trap WFE + * TWI: Trap WFI + * TIDCP: Trap L2CTLR/L2ECTLR + * BSU_IS: Upgrade barriers to the inner shareable domain + * FB: Force broadcast of all maintainance operations + * AMO: Override CPSR.A and enable signaling with VA + * IMO: Override CPSR.I and enable signaling with VI + * FMO: Override CPSR.F and enable signaling with VF + * SWIO: Turn set/way invalidates into set/way clean+invalidate + */ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ + HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) +#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) + + +/* Hyp System Control Register (SCTLR_EL2) bits */ +#define SCTLR_EL2_EE (1 << 25) +#define SCTLR_EL2_WXN (1 << 19) +#define SCTLR_EL2_I (1 << 12) +#define SCTLR_EL2_SA (1 << 3) +#define SCTLR_EL2_C (1 << 2) +#define SCTLR_EL2_A (1 << 1) +#define SCTLR_EL2_M 1 +#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ + SCTLR_EL2_SA | SCTLR_EL2_I) + +/* TCR_EL2 Registers bits */ +#define TCR_EL2_TBI (1 << 20) +#define TCR_EL2_PS (7 << 16) +#define TCR_EL2_PS_40B (2 << 16) +#define TCR_EL2_TG0 (1 << 14) +#define TCR_EL2_SH0 (3 << 12) +#define TCR_EL2_ORGN0 (3 << 10) +#define TCR_EL2_IRGN0 (3 << 8) +#define TCR_EL2_T0SZ 0x3f +#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ + TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) + +#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) + +/* VTCR_EL2 Registers bits */ +#define VTCR_EL2_PS_MASK (7 << 16) +#define VTCR_EL2_TG0_MASK (1 << 14) +#define VTCR_EL2_TG0_4K (0 << 14) +#define VTCR_EL2_TG0_64K (1 << 14) +#define VTCR_EL2_SH0_MASK (3 << 12) +#define VTCR_EL2_SH0_INNER (3 << 12) +#define VTCR_EL2_ORGN0_MASK (3 << 10) +#define VTCR_EL2_ORGN0_WBWA (1 << 10) +#define VTCR_EL2_IRGN0_MASK (3 << 8) +#define VTCR_EL2_IRGN0_WBWA (1 << 8) +#define VTCR_EL2_SL0_MASK (3 << 6) +#define VTCR_EL2_SL0_LVL1 (1 << 6) +#define VTCR_EL2_T0SZ_MASK 0x3f +#define VTCR_EL2_T0SZ_40B 24 + +/* + * We configure the Stage-2 page tables to always restrict the IPA space to be + * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are + * not known to exist and will break with this configuration. + * + * Note that when using 4K pages, we concatenate two first level page tables + * together. + * + * The magic numbers used for VTTBR_X in this patch can be found in Tables + * D4-23 and D4-25 in ARM DDI 0487A.b. + */ +#ifdef CONFIG_ARM64_64K_PAGES +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 64kB pages (TG0 = 1) + * 2 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) +#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) +#else +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 4kB pages (TG0 = 0) + * 3 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) +#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) +#endif + +#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) +#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (48LLU) +#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) + +/* Hyp System Trap Register */ +#define HSTR_EL2_TTEE (1 << 16) +#define HSTR_EL2_T(x) (1 << x) + +/* Hyp Coprocessor Trap Register */ +#define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TFP (1 << 10) + +/* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_TDRA (1 << 11) +#define MDCR_EL2_TDOSA (1 << 10) +#define MDCR_EL2_TDA (1 << 9) +#define MDCR_EL2_TDE (1 << 8) +#define MDCR_EL2_HPME (1 << 7) +#define MDCR_EL2_TPM (1 << 6) +#define MDCR_EL2_TPMCR (1 << 5) +#define MDCR_EL2_HPMN_MASK (0x1F) + +/* Exception Syndrome Register (ESR) bits */ +#define ESR_EL2_EC_SHIFT (26) +#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT) +#define ESR_EL2_IL (1U << 25) +#define ESR_EL2_ISS (ESR_EL2_IL - 1) +#define ESR_EL2_ISV_SHIFT (24) +#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT) +#define ESR_EL2_SAS_SHIFT (22) +#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT) +#define ESR_EL2_SSE (1 << 21) +#define ESR_EL2_SRT_SHIFT (16) +#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) +#define ESR_EL2_SF (1 << 15) +#define ESR_EL2_AR (1 << 14) +#define ESR_EL2_EA (1 << 9) +#define ESR_EL2_CM (1 << 8) +#define ESR_EL2_S1PTW (1 << 7) +#define ESR_EL2_WNR (1 << 6) +#define ESR_EL2_FSC (0x3f) +#define ESR_EL2_FSC_TYPE (0x3c) + +#define ESR_EL2_CV_SHIFT (24) +#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT) +#define ESR_EL2_COND_SHIFT (20) +#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT) + + +#define FSC_FAULT (0x04) +#define FSC_PERM (0x0c) + +/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ +#define HPFAR_MASK (~0xFUL) + +#define ESR_EL2_EC_UNKNOWN (0x00) +#define ESR_EL2_EC_WFI (0x01) +#define ESR_EL2_EC_CP15_32 (0x03) +#define ESR_EL2_EC_CP15_64 (0x04) +#define ESR_EL2_EC_CP14_MR (0x05) +#define ESR_EL2_EC_CP14_LS (0x06) +#define ESR_EL2_EC_FP_ASIMD (0x07) +#define ESR_EL2_EC_CP10_ID (0x08) +#define ESR_EL2_EC_CP14_64 (0x0C) +#define ESR_EL2_EC_ILL_ISS (0x0E) +#define ESR_EL2_EC_SVC32 (0x11) +#define ESR_EL2_EC_HVC32 (0x12) +#define ESR_EL2_EC_SMC32 (0x13) +#define ESR_EL2_EC_SVC64 (0x15) +#define ESR_EL2_EC_HVC64 (0x16) +#define ESR_EL2_EC_SMC64 (0x17) +#define ESR_EL2_EC_SYS64 (0x18) +#define ESR_EL2_EC_IABT (0x20) +#define ESR_EL2_EC_IABT_HYP (0x21) +#define ESR_EL2_EC_PC_ALIGN (0x22) +#define ESR_EL2_EC_DABT (0x24) +#define ESR_EL2_EC_DABT_HYP (0x25) +#define ESR_EL2_EC_SP_ALIGN (0x26) +#define ESR_EL2_EC_FP_EXC32 (0x28) +#define ESR_EL2_EC_FP_EXC64 (0x2C) +#define ESR_EL2_EC_SERROR (0x2F) +#define ESR_EL2_EC_BREAKPT (0x30) +#define ESR_EL2_EC_BREAKPT_HYP (0x31) +#define ESR_EL2_EC_SOFTSTP (0x32) +#define ESR_EL2_EC_SOFTSTP_HYP (0x33) +#define ESR_EL2_EC_WATCHPT (0x34) +#define ESR_EL2_EC_WATCHPT_HYP (0x35) +#define ESR_EL2_EC_BKPT32 (0x38) +#define ESR_EL2_EC_VECTOR32 (0x3A) +#define ESR_EL2_EC_BRK64 (0x3C) + +#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 + +#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) + +#endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h new file mode 100644 index 000000000000..483842180f8f --- /dev/null +++ b/arch/arm64/include/asm/kvm_asm.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +#include <asm/virt.h> + +/* + * 0 is reserved as an invalid value. + * Order *must* be kept in sync with the hyp switch code. + */ +#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ +#define CSSELR_EL1 2 /* Cache Size Selection Register */ +#define SCTLR_EL1 3 /* System Control Register */ +#define ACTLR_EL1 4 /* Auxilliary Control Register */ +#define CPACR_EL1 5 /* Coprocessor Access Control */ +#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ +#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ +#define TCR_EL1 8 /* Translation Control Register */ +#define ESR_EL1 9 /* Exception Syndrome Register */ +#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ +#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ +#define FAR_EL1 12 /* Fault Address Register */ +#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ +#define VBAR_EL1 14 /* Vector Base Address Register */ +#define CONTEXTIDR_EL1 15 /* Context ID Register */ +#define TPIDR_EL0 16 /* Thread ID, User R/W */ +#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ +#define TPIDR_EL1 18 /* Thread ID, Privileged */ +#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ +#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ +#define PAR_EL1 21 /* Physical Address Register */ +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ +#define DBGBCR15_EL1 38 +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ +#define DBGBVR15_EL1 54 +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ +#define DBGWCR15_EL1 70 +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ +#define DBGWVR15_EL1 86 +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ + +/* 32bit specific registers. Keep them at the end of the range */ +#define DACR32_EL2 88 /* Domain Access Control Register */ +#define IFSR32_EL2 89 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 94 + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ +#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + +#define NR_COPRO_REGS (NR_SYS_REGS * 2) + +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_TRAP 1 + +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) + +#ifndef __ASSEMBLY__ +struct kvm; +struct kvm_vcpu; + +extern char __kvm_hyp_init[]; +extern char __kvm_hyp_init_end[]; + +extern char __kvm_hyp_vector[]; + +#define __kvm_hyp_code_start __hyp_text_start +#define __kvm_hyp_code_end __hyp_text_end + +extern void __kvm_flush_vm_context(void); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); + +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +extern u64 __vgic_v3_get_ich_vtr_el2(void); + +extern char __save_vgic_v2_state[]; +extern char __restore_vgic_v2_state[]; +extern char __save_vgic_v3_state[]; +extern char __restore_vgic_v3_state[]; + +#endif + +#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..0b52377a6c11 --- /dev/null +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/asm/kvm_coproc.h + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_COPROC_H__ +#define __ARM64_KVM_COPROC_H__ + +#include <linux/kvm_host.h> + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +struct kvm_sys_reg_table { + const struct sys_reg_desc *table; + size_t num; +}; + +struct kvm_sys_reg_target_table { + struct kvm_sys_reg_table table64; + struct kvm_sys_reg_table table32; +}; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table); + +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#define kvm_coproc_table_init kvm_sys_reg_table_init +void kvm_sys_reg_table_init(void); + +struct kvm_one_reg; +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..5674a55b5518 --- /dev/null +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -0,0 +1,268 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/kvm_emulate.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_EMULATE_H__ +#define __ARM64_KVM_EMULATE_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmio.h> +#include <asm/ptrace.h> + +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); + +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); + +void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); + +static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; +} + +static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; +} + +static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; +} + +static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) +{ + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); +} + +static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return kvm_condition_valid32(vcpu); + + return true; +} + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + if (vcpu_mode_is_32bit(vcpu)) + kvm_skip_instr32(vcpu, is_wide_instr); + else + *vcpu_pc(vcpu) += 4; +} + +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; +} + +static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_reg32(vcpu, reg_num); + + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +} + +/* Get vcpu SPSR for current mode */ +static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_spsr32(vcpu); + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +} + +static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + + if (vcpu_mode_is_32bit(vcpu)) + return mode > COMPAT_PSR_MODE_USR; + + return mode != PSR_MODE_EL0t; +} + +static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.esr_el2; +} + +static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.far_el2; +} + +static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) +{ + return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; +} + +static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); +} + +static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR); +} + +static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE); +} + +static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT; +} + +static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA); +} + +static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW); +} + +static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +{ + return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT); +} + +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL); +} + +static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT; +} + +static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; +} + +static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; +} + +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu_sys_reg(vcpu, MPIDR_EL1); +} + +static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; + else + vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); +} + +static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); + + return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); +} + +static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return be16_to_cpu(data & 0xffff); + case 4: + return be32_to_cpu(data & 0xffffffff); + default: + return be64_to_cpu(data); + } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + case 4: + return le32_to_cpu(data & 0xffffffff); + default: + return le64_to_cpu(data); + } + } + + return data; /* Leave LE untouched */ +} + +static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_be16(data & 0xffff); + case 4: + return cpu_to_be32(data & 0xffffffff); + default: + return cpu_to_be64(data); + } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + case 4: + return cpu_to_le32(data & 0xffffffff); + default: + return cpu_to_le64(data); + } + } + + return data; /* Leave LE untouched */ +} + +#endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h new file mode 100644 index 000000000000..bcde41905746 --- /dev/null +++ b/arch/arm64/include/asm/kvm_host.h @@ -0,0 +1,251 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/asm/kvm_host.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_HOST_H__ +#define __ARM64_KVM_HOST_H__ + +#include <linux/types.h> +#include <linux/kvm_types.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmio.h> + +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + +#define KVM_USER_MEM_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#include <kvm/arm_vgic.h> +#include <kvm/arm_arch_timer.h> + +#define KVM_VCPU_MAX_FEATURES 3 + +int __attribute_const__ kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +int kvm_arch_dev_ioctl_check_extension(long ext); + +struct kvm_arch { + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* 1-level 2nd stage table and lock */ + spinlock_t pgd_lock; + pgd_t *pgd; + + /* VTTBR value associated with above pgd and vmid */ + u64 vttbr; + + /* Interrupt controller */ + struct vgic_dist vgic; + + /* Timer */ + struct arch_timer_kvm timer; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_fault_info { + u32 esr_el2; /* Hyp Syndrom Register */ + u64 far_el2; /* Hyp Fault Address Register */ + u64 hpfar_el2; /* Hyp IPA Fault Address Register */ +}; + +struct kvm_cpu_context { + struct kvm_regs gp_regs; + union { + u64 sys_regs[NR_SYS_REGS]; + u32 copro[NR_COPRO_REGS]; + }; +}; + +typedef struct kvm_cpu_context kvm_cpu_context_t; + +struct kvm_vcpu_arch { + struct kvm_cpu_context ctxt; + + /* HYP configuration */ + u64 hcr_el2; + + /* Exception Information */ + struct kvm_vcpu_fault_info fault; + + /* Debug state */ + u64 debug_flags; + + /* Pointer to host CPU context */ + kvm_cpu_context_t *host_cpu_context; + + /* VGIC state */ + struct vgic_cpu vgic_cpu; + struct arch_timer_cpu timer_cpu; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + /* dcache set/way operation pending */ + int last_pcpu; + cpumask_t require_dcache_flush; + + /* Don't run the guest */ + bool pause; + + /* IO related fields */ + struct kvm_decode mmio_decode; + + /* Interrupt related fields */ + u64 irq_lines; /* IRQ and FIQ levels */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + + /* Target CPU and feature flags */ + int target; + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* Detect first run of a vcpu */ + bool has_run_once; +}; + +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +/* + * CP14 and CP15 live in the same array, as they are backed by the + * same system registers. + */ +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) +#else +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) +#endif + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + +/* We do not have shadow page tables, hence the empty hooks */ +static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +struct kvm_vcpu *kvm_arm_get_running_vcpu(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); + +u64 kvm_call_hyp(void *hypfn, ...); + +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); + +int kvm_perf_init(void); +int kvm_perf_teardown(void); + +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, + unsigned long hyp_stack_ptr, + unsigned long vector_ptr) +{ + /* + * Call initialization code, and switch to the full blown + * HYP code. + */ + kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); +} + +struct vgic_sr_vectors { + void *save_vgic; + void *restore_vgic; +}; + +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + extern struct vgic_sr_vectors __vgic_sr_vectors; + + switch(vgic->type) + { + case VGIC_V2: + __vgic_sr_vectors.save_vgic = __save_vgic_v2_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; + break; + +#ifdef CONFIG_ARM_GIC_V3 + case VGIC_V3: + __vgic_sr_vectors.save_vgic = __save_vgic_v3_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state; + break; +#endif + + default: + BUG(); + } +} + +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + +#endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h new file mode 100644 index 000000000000..fc2f689c0694 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_MMIO_H__ +#define __ARM64_KVM_MMIO_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> + +/* + * This is annoying. The mmio code requires this, even if we don't + * need any decoding. To be fixed. + */ +struct kvm_decode { + unsigned long rt; + bool sign_extend; +}; + +/* + * The in-kernel MMIO emulation code wants to use a copy of run->mmio, + * which is an anonymous type. Use our own type instead. + */ +struct kvm_exit_mmio { + phys_addr_t phys_addr; + u8 data[8]; + u32 len; + bool is_write; +}; + +static inline void kvm_prepare_mmio(struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + run->mmio.phys_addr = mmio->phys_addr; + run->mmio.len = mmio->len; + run->mmio.is_write = mmio->is_write; + memcpy(run->mmio.data, mmio->data, mmio->len); + run->exit_reason = KVM_EXIT_MMIO; +} + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + +#endif /* __ARM64_KVM_MMIO_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h new file mode 100644 index 000000000000..a030d163840b --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_MMU_H__ +#define __ARM64_KVM_MMU_H__ + +#include <asm/page.h> +#include <asm/memory.h> + +/* + * As we only have the TTBR0_EL2 register, we cannot express + * "negative" addresses. This makes it impossible to directly share + * mappings with the kernel. + * + * Instead, give the HYP mode its own VA region at a fixed offset from + * the kernel by just masking the top bits (which are all ones for a + * kernel address). + */ +#define HYP_PAGE_OFFSET_SHIFT VA_BITS +#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) +#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK) + +/* + * Our virtual mapping for the idmap-ed MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the last + * possible page, where no kernel mapping will ever exist. + */ +#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) + +#ifdef __ASSEMBLY__ + +/* + * Convert a kernel VA into a HYP VA. + * reg: VA to be converted. + */ +.macro kern_hyp_va reg + and \reg, \reg, #HYP_PAGE_OFFSET_MASK +.endm + +#else + +#include <asm/cachetype.h> +#include <asm/cacheflush.h> + +#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) + +/* + * We currently only support a 40bit IPA. + */ +#define KVM_PHYS_SHIFT (40) +#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) + +/* Make sure we get the right size, and thus the right alignment */ +#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT)) +#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) + +int create_hyp_mappings(void *from, void *to); +int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +void free_boot_hyp_pgd(void); +void free_hyp_pgds(void); + +int kvm_alloc_stage2_pgd(struct kvm *kvm); +void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size); + +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); + +phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); +int kvm_mmu_init(void); +void kvm_clear_hyp_idmap(void); + +#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) +#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) + +static inline void kvm_clean_pgd(pgd_t *pgd) {} +static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} +static inline void kvm_clean_pte(pte_t *pte) {} +static inline void kvm_clean_pte_entry(pte_t *pte) {} + +static inline void kvm_set_s2pte_writable(pte_t *pte) +{ + pte_val(*pte) |= PTE_S2_RDWR; +} + +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= PMD_S2_RDWR; +} + +#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) +#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) +#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) + +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#ifndef CONFIG_ARM64_64K_PAGES +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#else +#define kvm_pmd_table_empty(pmdp) (0) +#endif +#define kvm_pud_table_empty(pudp) (0) + + +struct kvm; + +#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) + +static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) +{ + return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; +} + +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, + unsigned long size) +{ + if (!vcpu_has_cache_enabled(vcpu)) + kvm_flush_dcache_to_poc((void *)hva, size); + + if (!icache_is_aliasing()) { /* PIPT */ + flush_icache_range(hva, hva + size); + } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + /* any kind of VIPT cache */ + __flush_icache_all(); + } +} + +#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) + +void stage2_flush_vm(struct kvm *kvm); + +#endif /* __ASSEMBLY__ */ +#endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h new file mode 100644 index 000000000000..bc39e557c56c --- /dev/null +++ b/arch/arm64/include/asm/kvm_psci.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_PSCI_H__ +#define __ARM64_KVM_PSCI_H__ + +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2e9d83673ef6..f7af66b54cb2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -85,6 +85,8 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ @@ -98,9 +100,9 @@ #define PTE_HYP PTE_USER /* - * 40-bit physical address supported. + * Highest possible physical address supported. */ -#define PHYS_MASK_SHIFT (40) +#define PHYS_MASK_SHIFT (48) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) /* @@ -120,7 +122,6 @@ #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) #define TCR_TG0_64K (UL(1) << 14) #define TCR_TG1_64K (UL(1) << 30) -#define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 215ad4649dd7..7a5df5252dd7 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +/* The section containing the hypervisor text */ +extern char __hyp_text_start[]; +extern char __hyp_text_end[]; + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..8e38878c87c6 --- /dev/null +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/uapi/asm/kvm.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#define KVM_SPSR_EL1 0 +#define KVM_SPSR_SVC KVM_SPSR_EL1 +#define KVM_SPSR_ABT 1 +#define KVM_SPSR_UND 2 +#define KVM_SPSR_IRQ 3 +#define KVM_SPSR_FIQ 4 +#define KVM_NR_SPSR 5 + +#ifndef __ASSEMBLY__ +#include <linux/psci.h> +#include <asm/types.h> +#include <asm/ptrace.h> + +#define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +struct kvm_regs { + struct user_pt_regs regs; /* sp = sp_el0 */ + + __u64 sp_el1; + __u64 elr_el1; + + __u64 spsr[KVM_NR_SPSR]; + + struct user_fpsimd_state fp_regs; +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_AEM_V8 0 +#define KVM_ARM_TARGET_FOUNDATION_V8 1 +#define KVM_ARM_TARGET_CORTEX_A57 2 +#define KVM_ARM_TARGET_XGENE_POTENZA 3 +#define KVM_ARM_TARGET_CORTEX_A53 4 + +#define KVM_ARM_NUM_TARGETS 5 + +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + +#define KVM_VGIC_V2_DIST_SIZE 0x1000 +#define KVM_VGIC_V2_CPU_SIZE 0x2000 + +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ +#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(__u32)) + +/* Some registers need more space to represent values. */ +#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00 +#define KVM_REG_ARM_DEMUX_ID_SHIFT 8 +#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT) +#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF +#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0 + +/* AArch64 system registers */ +#define KVM_REG_ARM64_SYSREG (0x0013 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM64_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM64_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 + +#define ARM64_SYS_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \ + KVM_REG_ARM64_SYSREG_ ## n ## _MASK) + +#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \ + ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) + +#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) +#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) + +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED + +#endif + +#endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c481a119b98a..9a9fce090d58 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/dma-mapping.h> +#include <linux/kvm_host.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/cputable.h> @@ -119,6 +120,7 @@ int main(void) DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); @@ -128,13 +130,24 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); + DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); + DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); + DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); + DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); + DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); + DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); + DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); + DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); + DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); + DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 7f66fe150265..b33051d501e6 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -31,15 +31,6 @@ #include <asm/cputype.h> #include <asm/system_misc.h> -/* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) -#define DBG_SPSR_SS (1 << 21) - -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - /* Determine debug architecture. */ u8 debug_monitors_arch(void) { diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ce2d97255ba9..55d0e035205f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,6 +17,19 @@ ENTRY(stext) jiffies = jiffies_64; +#define HYPERVISOR_TEXT \ + /* \ + * Force the alignment to be compatible with \ + * the vectors requirements \ + */ \ + . = ALIGN(2048); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -48,6 +61,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -102,3 +116,9 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } } + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), + "HYP init code too big") diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig new file mode 100644 index 000000000000..8ba85e9ea388 --- /dev/null +++ b/arch/arm64/kvm/Kconfig @@ -0,0 +1,63 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + select MMU_NOTIFIER + select PREEMPT_NOTIFIERS + select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_MMIO + select KVM_ARM_HOST + select KVM_ARM_VGIC + select KVM_ARM_TIMER + ---help--- + Support hosting virtualized guest machines. + + If unsure, say N. + +config KVM_ARM_HOST + bool + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + +config KVM_ARM_VGIC + bool + depends on KVM_ARM_HOST && OF + select HAVE_KVM_IRQCHIP + ---help--- + Adds support for a hardware assisted, in-kernel GIC emulation. + +config KVM_ARM_TIMER + bool + depends on KVM_ARM_VGIC + ---help--- + Adds support for the Architected Timers in virtual machines. + +endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile new file mode 100644 index 000000000000..32a096174b94 --- /dev/null +++ b/arch/arm64/kvm/Makefile @@ -0,0 +1,27 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +CFLAGS_arm.o := -I. +CFLAGS_mmu.o := -I. + +KVM=../../../virt/kvm +ARM=../../../arch/arm/kvm + +obj-$(CONFIG_KVM_ARM_HOST) += kvm.o + +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o + +kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o +kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o + +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o +kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c new file mode 100644 index 000000000000..124418d17049 --- /dev/null +++ b/arch/arm64/kvm/emulate.c @@ -0,0 +1,158 @@ +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + + if (esr & ESR_EL2_CV) + return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT; + + return -1; +} + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); + + BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK)); + + if (!(cpsr & COMPAT_PSR_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~COMPAT_PSR_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c new file mode 100644 index 000000000000..76794692c20b --- /dev/null +++ b/arch/arm64/kvm/guest.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/guest.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <asm/cputype.h> +#include <asm/uaccess.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* + * Because the kvm_regs structure is a mix of 32, 64 and + * 128bit fields, we index it as if it was a 32bit + * array. Hence below, nr_regs is the number of entries, and + * off the index in the "array". + */ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + u32 off; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + __uint128_t tmp; + void *valp = &tmp; + u64 off; + int err = 0; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) { + err = -EFAULT; + goto out; + } + + if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { + u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_USR: + case COMPAT_PSR_MODE_FIQ: + case COMPAT_PSR_MODE_IRQ: + case COMPAT_PSR_MODE_SVC: + case COMPAT_PSR_MODE_ABT: + case COMPAT_PSR_MODE_UND: + case PSR_MODE_EL0t: + case PSR_MODE_EL1t: + case PSR_MODE_EL1h: + break; + default: + err = -EINVAL; + goto out; + } + } + + memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); +out: + return err; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(__u32); +} + +/** + * ARM64 versions of the TIMER registers, always available on arm64 + */ + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return -EFAULT; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) + + NUM_TIMER_REGS; +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend system regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + int ret; + + for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + + return kvm_arm_sys_reg_get_reg(vcpu, reg); +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + + return kvm_arm_sys_reg_set_reg(vcpu, reg); +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + switch (implementor) { + case ARM_CPU_IMP_ARM: + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A53: + return KVM_ARM_TARGET_CORTEX_A53; + case ARM_CPU_PART_CORTEX_A57: + return KVM_ARM_TARGET_CORTEX_A57; + }; + break; + case ARM_CPU_IMP_APM: + switch (part_number) { + case APM_CPU_PART_POTENZA: + return KVM_ARM_TARGET_XGENE_POTENZA; + }; + break; + }; + + return -EINVAL; +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + vcpu->arch.target = phys_target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (init->features[i / 32] & (1 << (i % 32))) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c new file mode 100644 index 000000000000..e28be510380c --- /dev/null +++ b/arch/arm64/kvm/handle_exit.c @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/handle_exit.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_psci.h> + +typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); + +static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int ret; + + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); + return 1; + } + + return ret; +} + +static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +/** + * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event + * instruction executed by a guest + * + * @vcpu: the vcpu pointer + * + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. + */ +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + + return 1; +} + +static exit_handle_fn arm_exit_handlers[] = { + [ESR_EL2_EC_WFI] = kvm_handle_wfx, + [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, + [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32, + [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64, + [ESR_EL2_EC_HVC32] = handle_hvc, + [ESR_EL2_EC_SMC32] = handle_smc, + [ESR_EL2_EC_HVC64] = handle_hvc, + [ESR_EL2_EC_SMC64] = handle_smc, + [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, + [ESR_EL2_EC_IABT] = kvm_handle_guest_abort, + [ESR_EL2_EC_DABT] = kvm_handle_guest_abort, +}; + +static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || + !arm_exit_handlers[hsr_ec]) { + kvm_err("Unknown exception class: hsr: %#08x\n", + (unsigned int)kvm_vcpu_get_hsr(vcpu)); + BUG(); + } + + return arm_exit_handlers[hsr_ec]; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exit_handle_fn exit_handler; + + switch (exception_index) { + case ARM_EXCEPTION_IRQ: + return 1; + case ARM_EXCEPTION_TRAP: + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + + exit_handler = kvm_get_exit_handler(vcpu); + + return exit_handler(vcpu, run); + default: + kvm_pr_unimpl("Unsupported exception type: %d", + exception_index); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; + } +} diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S new file mode 100644 index 000000000000..d968796f4b2d --- /dev/null +++ b/arch/arm64/kvm/hyp-init.S @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +ENTRY(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP boot pgd + * x1: HYP pgd + * x2: HYP stack + * x3: HYP vectors + */ +__do_hyp_init: + + msr ttbr0_el2, x0 + + mrs x4, tcr_el1 + ldr x5, =TCR_EL2_MASK + and x4, x4, x5 + ldr x5, =TCR_EL2_FLAGS + orr x4, x4, x5 + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR0_EL1 + bfi x4, x5, #16, #3 + msr vtcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + mrs x4, sctlr_el2 + and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2 + ldr x5, =SCTLR_EL2_FLAGS + orr x4, x4, x5 + msr sctlr_el2, x4 + isb + + /* MMU is now enabled. Get ready for the trampoline dance */ + ldr x4, =TRAMPOLINE_VA + adr x5, target + bfi x4, x5, #0, #PAGE_SHIFT + br x4 + +target: /* We're now in the trampoline code, switch page tables */ + msr ttbr0_el2, x1 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Set the stack and new vectors */ + kern_hyp_va x2 + mov sp, x2 + kern_hyp_va x3 + msr vbar_el2, x3 + + /* Hello, World! */ + eret +ENDPROC(__kvm_hyp_init) + + .ltorg + + .popsection diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S new file mode 100644 index 000000000000..b72aa9f9215c --- /dev/null +++ b/arch/arm64/kvm/hyp.S @@ -0,0 +1,1274 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/memory.h> +#include <asm/asm-offsets.h> +#include <asm/debug-monitors.h> +#include <asm/fpsimdmacros.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) +#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + .align PAGE_SHIFT + +.macro save_common_regs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_XREG_OFFSET(19) + stp x19, x20, [x3] + stp x21, x22, [x3, #16] + stp x23, x24, [x3, #32] + stp x25, x26, [x3, #48] + stp x27, x28, [x3, #64] + stp x29, lr, [x3, #80] + + mrs x19, sp_el0 + mrs x20, elr_el2 // EL1 PC + mrs x21, spsr_el2 // EL1 pstate + + stp x19, x20, [x3, #96] + str x21, [x3, #112] + + mrs x22, sp_el1 + mrs x23, elr_el1 + mrs x24, spsr_el1 + + str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] +.endm + +.macro restore_common_regs + // x2: base address for cpu context + // x3: tmp register + + ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] + + msr sp_el1, x22 + msr elr_el1, x23 + msr spsr_el1, x24 + + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 + ldp x19, x20, [x3] + ldr x21, [x3, #16] + + msr sp_el0, x19 + msr elr_el2, x20 // EL1 PC + msr spsr_el2, x21 // EL1 pstate + + add x3, x2, #CPU_XREG_OFFSET(19) + ldp x19, x20, [x3] + ldp x21, x22, [x3, #16] + ldp x23, x24, [x3, #32] + ldp x25, x26, [x3, #48] + ldp x27, x28, [x3, #64] + ldp x29, lr, [x3, #80] +.endm + +.macro save_host_regs + save_common_regs +.endm + +.macro restore_host_regs + restore_common_regs +.endm + +.macro save_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_save x3, 4 +.endm + +.macro restore_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_restore x3, 4 +.endm + +.macro save_guest_regs + // x0 is the vcpu address + // x1 is the return code, do not corrupt! + // x2 is the cpu context + // x3 is a tmp register + // Guest's x0-x3 are on the stack + + // Compute base to save registers + add x3, x2, #CPU_XREG_OFFSET(4) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + str x18, [x3, #112] + + pop x6, x7 // x2, x3 + pop x4, x5 // x0, x1 + + add x3, x2, #CPU_XREG_OFFSET(0) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + save_common_regs +.endm + +.macro restore_guest_regs + // x0 is the vcpu address. + // x2 is the cpu context + // x3 is a tmp register + + // Prepare x0-x3 for later restore + add x3, x2, #CPU_XREG_OFFSET(0) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + push x4, x5 // Push x0-x3 on the stack + push x6, x7 + + // x4-x18 + ldp x4, x5, [x3, #32] + ldp x6, x7, [x3, #48] + ldp x8, x9, [x3, #64] + ldp x10, x11, [x3, #80] + ldp x12, x13, [x3, #96] + ldp x14, x15, [x3, #112] + ldp x16, x17, [x3, #128] + ldr x18, [x3, #144] + + // x19-x29, lr, sp*, elr*, spsr* + restore_common_regs + + // Last bits of the 64bit state + pop x2, x3 + pop x0, x1 + + // Do not touch any register after this! +.endm + +/* + * Macros to perform system register save/restore. + * + * Ordering here is absolutely critical, and must be kept consistent + * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, + * and in kvm_asm.h. + * + * In other words, don't touch any of these unless you know what + * you are doing. + */ +.macro save_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + mrs x4, vmpidr_el2 + mrs x5, csselr_el1 + mrs x6, sctlr_el1 + mrs x7, actlr_el1 + mrs x8, cpacr_el1 + mrs x9, ttbr0_el1 + mrs x10, ttbr1_el1 + mrs x11, tcr_el1 + mrs x12, esr_el1 + mrs x13, afsr0_el1 + mrs x14, afsr1_el1 + mrs x15, far_el1 + mrs x16, mair_el1 + mrs x17, vbar_el1 + mrs x18, contextidr_el1 + mrs x19, tpidr_el0 + mrs x20, tpidrro_el0 + mrs x21, tpidr_el1 + mrs x22, amair_el1 + mrs x23, cntkctl_el1 + mrs x24, par_el1 + mrs x25, mdscr_el1 + + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + stp x18, x19, [x3, #112] + stp x20, x21, [x3, #128] + stp x22, x23, [x3, #144] + stp x24, x25, [x3, #160] +.endm + +.macro save_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbcr15_el1 + mrs x19, dbgbcr14_el1 + mrs x18, dbgbcr13_el1 + mrs x17, dbgbcr12_el1 + mrs x16, dbgbcr11_el1 + mrs x15, dbgbcr10_el1 + mrs x14, dbgbcr9_el1 + mrs x13, dbgbcr8_el1 + mrs x12, dbgbcr7_el1 + mrs x11, dbgbcr6_el1 + mrs x10, dbgbcr5_el1 + mrs x9, dbgbcr4_el1 + mrs x8, dbgbcr3_el1 + mrs x7, dbgbcr2_el1 + mrs x6, dbgbcr1_el1 + mrs x5, dbgbcr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbvr15_el1 + mrs x19, dbgbvr14_el1 + mrs x18, dbgbvr13_el1 + mrs x17, dbgbvr12_el1 + mrs x16, dbgbvr11_el1 + mrs x15, dbgbvr10_el1 + mrs x14, dbgbvr9_el1 + mrs x13, dbgbvr8_el1 + mrs x12, dbgbvr7_el1 + mrs x11, dbgbvr6_el1 + mrs x10, dbgbvr5_el1 + mrs x9, dbgbvr4_el1 + mrs x8, dbgbvr3_el1 + mrs x7, dbgbvr2_el1 + mrs x6, dbgbvr1_el1 + mrs x5, dbgbvr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwcr15_el1 + mrs x19, dbgwcr14_el1 + mrs x18, dbgwcr13_el1 + mrs x17, dbgwcr12_el1 + mrs x16, dbgwcr11_el1 + mrs x15, dbgwcr10_el1 + mrs x14, dbgwcr9_el1 + mrs x13, dbgwcr8_el1 + mrs x12, dbgwcr7_el1 + mrs x11, dbgwcr6_el1 + mrs x10, dbgwcr5_el1 + mrs x9, dbgwcr4_el1 + mrs x8, dbgwcr3_el1 + mrs x7, dbgwcr2_el1 + mrs x6, dbgwcr1_el1 + mrs x5, dbgwcr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwvr15_el1 + mrs x19, dbgwvr14_el1 + mrs x18, dbgwvr13_el1 + mrs x17, dbgwvr12_el1 + mrs x16, dbgwvr11_el1 + mrs x15, dbgwvr10_el1 + mrs x14, dbgwvr9_el1 + mrs x13, dbgwvr8_el1 + mrs x12, dbgwvr7_el1 + mrs x11, dbgwvr6_el1 + mrs x10, dbgwvr5_el1 + mrs x9, dbgwvr4_el1 + mrs x8, dbgwvr3_el1 + mrs x7, dbgwvr2_el1 + mrs x6, dbgwvr1_el1 + mrs x5, dbgwvr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + mrs x21, mdccint_el1 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] +.endm + +.macro restore_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + ldp x8, x9, [x3, #32] + ldp x10, x11, [x3, #48] + ldp x12, x13, [x3, #64] + ldp x14, x15, [x3, #80] + ldp x16, x17, [x3, #96] + ldp x18, x19, [x3, #112] + ldp x20, x21, [x3, #128] + ldp x22, x23, [x3, #144] + ldp x24, x25, [x3, #160] + + msr vmpidr_el2, x4 + msr csselr_el1, x5 + msr sctlr_el1, x6 + msr actlr_el1, x7 + msr cpacr_el1, x8 + msr ttbr0_el1, x9 + msr ttbr1_el1, x10 + msr tcr_el1, x11 + msr esr_el1, x12 + msr afsr0_el1, x13 + msr afsr1_el1, x14 + msr far_el1, x15 + msr mair_el1, x16 + msr vbar_el1, x17 + msr contextidr_el1, x18 + msr tpidr_el0, x19 + msr tpidrro_el0, x20 + msr tpidr_el1, x21 + msr amair_el1, x22 + msr cntkctl_el1, x23 + msr par_el1, x24 + msr mdscr_el1, x25 +.endm + +.macro restore_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbcr15_el1, x20 + msr dbgbcr14_el1, x19 + msr dbgbcr13_el1, x18 + msr dbgbcr12_el1, x17 + msr dbgbcr11_el1, x16 + msr dbgbcr10_el1, x15 + msr dbgbcr9_el1, x14 + msr dbgbcr8_el1, x13 + msr dbgbcr7_el1, x12 + msr dbgbcr6_el1, x11 + msr dbgbcr5_el1, x10 + msr dbgbcr4_el1, x9 + msr dbgbcr3_el1, x8 + msr dbgbcr2_el1, x7 + msr dbgbcr1_el1, x6 + msr dbgbcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbvr15_el1, x20 + msr dbgbvr14_el1, x19 + msr dbgbvr13_el1, x18 + msr dbgbvr12_el1, x17 + msr dbgbvr11_el1, x16 + msr dbgbvr10_el1, x15 + msr dbgbvr9_el1, x14 + msr dbgbvr8_el1, x13 + msr dbgbvr7_el1, x12 + msr dbgbvr6_el1, x11 + msr dbgbvr5_el1, x10 + msr dbgbvr4_el1, x9 + msr dbgbvr3_el1, x8 + msr dbgbvr2_el1, x7 + msr dbgbvr1_el1, x6 + msr dbgbvr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwcr15_el1, x20 + msr dbgwcr14_el1, x19 + msr dbgwcr13_el1, x18 + msr dbgwcr12_el1, x17 + msr dbgwcr11_el1, x16 + msr dbgwcr10_el1, x15 + msr dbgwcr9_el1, x14 + msr dbgwcr8_el1, x13 + msr dbgwcr7_el1, x12 + msr dbgwcr6_el1, x11 + msr dbgwcr5_el1, x10 + msr dbgwcr4_el1, x9 + msr dbgwcr3_el1, x8 + msr dbgwcr2_el1, x7 + msr dbgwcr1_el1, x6 + msr dbgwcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwvr15_el1, x20 + msr dbgwvr14_el1, x19 + msr dbgwvr13_el1, x18 + msr dbgwvr12_el1, x17 + msr dbgwvr11_el1, x16 + msr dbgwvr10_el1, x15 + msr dbgwvr9_el1, x14 + msr dbgwvr8_el1, x13 + msr dbgwvr7_el1, x12 + msr dbgwvr6_el1, x11 + msr dbgwvr5_el1, x10 + msr dbgwvr4_el1, x9 + msr dbgwvr3_el1, x8 + msr dbgwvr2_el1, x7 + msr dbgwvr1_el1, x6 + msr dbgwvr0_el1, x5 + + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + msr mdccint_el1, x21 +.endm + +.macro skip_32bit_state tmp, target + // Skip 32bit state if not needed + mrs \tmp, hcr_el2 + tbnz \tmp, #HCR_RW_SHIFT, \target +.endm + +.macro skip_tee_state tmp, target + // Skip ThumbEE state if not needed + mrs \tmp, id_pfr0_el1 + tbz \tmp, #12, \target +.endm + +.macro skip_debug_state tmp, target + ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] + tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target +.endm + +.macro compute_debug_state target + // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY + // is set, we do a full save/restore cycle and disable trapping. + add x25, x0, #VCPU_CONTEXT + + // Check the state of MDSCR_EL1 + ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] + and x26, x25, #DBG_MDSCR_KDE + and x25, x25, #DBG_MDSCR_MDE + adds xzr, x25, x26 + b.eq 9998f // Nothing to see there + + // If any interesting bits was set, we must set the flag + mov x26, #KVM_ARM64_DEBUG_DIRTY + str x26, [x0, #VCPU_DEBUG_FLAGS] + b 9999f // Don't skip restore + +9998: + // Otherwise load the flags from memory in case we recently + // trapped + skip_debug_state x25, \target +9999: +.endm + +.macro save_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + mrs x4, spsr_abt + mrs x5, spsr_und + mrs x6, spsr_irq + mrs x7, spsr_fiq + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + mrs x4, dacr32_el2 + mrs x5, ifsr32_el2 + mrs x6, fpexc32_el2 + stp x4, x5, [x3] + str x6, [x3, #16] + + skip_debug_state x8, 2f + mrs x7, dbgvcr32_el2 + str x7, [x3, #24] +2: + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + mrs x4, teecr32_el1 + mrs x5, teehbr32_el1 + stp x4, x5, [x3] +1: +.endm + +.macro restore_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr spsr_abt, x4 + msr spsr_und, x5 + msr spsr_irq, x6 + msr spsr_fiq, x7 + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + ldp x4, x5, [x3] + ldr x6, [x3, #16] + msr dacr32_el2, x4 + msr ifsr32_el2, x5 + msr fpexc32_el2, x6 + + skip_debug_state x8, 2f + ldr x7, [x3, #24] + msr dbgvcr32_el2, x7 +2: + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + ldp x4, x5, [x3] + msr teecr32_el1, x4 + msr teehbr32_el1, x5 +1: +.endm + +.macro activate_traps + ldr x2, [x0, #VCPU_HCR_EL2] + msr hcr_el2, x2 + ldr x2, =(CPTR_EL2_TTA) + msr cptr_el2, x2 + + ldr x2, =(1 << 15) // Trap CP15 Cr=15 + msr hstr_el2, x2 + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) + + // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap + // if not dirty. + ldr x3, [x0, #VCPU_DEBUG_FLAGS] + tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f + orr x2, x2, #MDCR_EL2_TDA +1: + msr mdcr_el2, x2 +.endm + +.macro deactivate_traps + mov x2, #HCR_RW + msr hcr_el2, x2 + msr cptr_el2, xzr + msr hstr_el2, xzr + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + msr mdcr_el2, x2 +.endm + +.macro activate_vm + ldr x1, [x0, #VCPU_KVM] + kern_hyp_va x1 + ldr x2, [x1, #KVM_VTTBR] + msr vttbr_el2, x2 +.endm + +.macro deactivate_vm + msr vttbr_el2, xzr +.endm + +/* + * Call into the vgic backend for state saving + */ +.macro save_vgic_state + adr x24, __vgic_sr_vectors + ldr x24, [x24, VGIC_SAVE_FN] + kern_hyp_va x24 + blr x24 + mrs x24, hcr_el2 + mov x25, #HCR_INT_OVERRIDE + neg x25, x25 + and x24, x24, x25 + msr hcr_el2, x24 +.endm + +/* + * Call into the vgic backend for state restoring + */ +.macro restore_vgic_state + mrs x24, hcr_el2 + ldr x25, [x0, #VCPU_IRQ_LINES] + orr x24, x24, #HCR_INT_OVERRIDE + orr x24, x24, x25 + msr hcr_el2, x24 + adr x24, __vgic_sr_vectors + ldr x24, [x24, #VGIC_RESTORE_FN] + kern_hyp_va x24 + blr x24 +.endm + +.macro save_timer_state + // x0: vcpu pointer + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + mrs x3, cntv_ctl_el0 + and x3, x3, #3 + str w3, [x0, #VCPU_TIMER_CNTV_CTL] + bic x3, x3, #1 // Clear Enable + msr cntv_ctl_el0, x3 + + isb + + mrs x3, cntv_cval_el0 + str x3, [x0, #VCPU_TIMER_CNTV_CVAL] + +1: + // Allow physical timer/counter access for the host + mrs x2, cnthctl_el2 + orr x2, x2, #3 + msr cnthctl_el2, x2 + + // Clear cntvoff for the host + msr cntvoff_el2, xzr +.endm + +.macro restore_timer_state + // x0: vcpu pointer + // Disallow physical timer access for the guest + // Physical counter access is allowed + mrs x2, cnthctl_el2 + orr x2, x2, #1 + bic x2, x2, #2 + msr cnthctl_el2, x2 + + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + ldr x3, [x2, #KVM_TIMER_CNTVOFF] + msr cntvoff_el2, x3 + ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] + msr cntv_cval_el0, x2 + isb + + ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] + and x2, x2, #3 + msr cntv_ctl_el0, x2 +1: +.endm + +__save_sysregs: + save_sysregs + ret + +__restore_sysregs: + restore_sysregs + ret + +__save_debug: + save_debug + ret + +__restore_debug: + restore_debug + ret + +__save_fpsimd: + save_fpsimd + ret + +__restore_fpsimd: + restore_fpsimd + ret + +/* + * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); + * + * This is the world switch. The first half of the function + * deals with entering the guest, and anything from __kvm_vcpu_return + * to the end of the function deals with reentering the host. + * On the enter path, only x0 (vcpu pointer) must be preserved until + * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception + * code) must both be preserved until the epilogue. + * In both cases, x2 points to the CPU context we're saving/restoring from/to. + */ +ENTRY(__kvm_vcpu_run) + kern_hyp_va x0 + msr tpidr_el2, x0 // Save the vcpu register + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + save_host_regs + bl __save_fpsimd + bl __save_sysregs + + compute_debug_state 1f + bl __save_debug +1: + activate_traps + activate_vm + + restore_vgic_state + restore_timer_state + + // Guest context + add x2, x0, #VCPU_CONTEXT + + bl __restore_sysregs + bl __restore_fpsimd + + skip_debug_state x3, 1f + bl __restore_debug +1: + restore_guest_32bit_state + restore_guest_regs + + // That's it, no more messing around. + eret + +__kvm_vcpu_return: + // Assume x0 is the vcpu pointer, x1 the return code + // Guest's x0-x3 are on the stack + + // Guest context + add x2, x0, #VCPU_CONTEXT + + save_guest_regs + bl __save_fpsimd + bl __save_sysregs + + skip_debug_state x3, 1f + bl __save_debug +1: + save_guest_32bit_state + + save_timer_state + save_vgic_state + + deactivate_traps + deactivate_vm + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + bl __restore_fpsimd + + skip_debug_state x3, 1f + // Clear the dirty flag for the next run, as all the state has + // already been saved. Note that we nuke the whole 64bit word. + // If we ever add more flags, we'll have to be more careful... + str xzr, [x0, #VCPU_DEBUG_FLAGS] + bl __restore_debug +1: + restore_host_regs + + mov x0, x1 + ret +END(__kvm_vcpu_run) + +// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +ENTRY(__kvm_tlb_flush_vmid_ipa) + dsb ishst + + kern_hyp_va x0 + ldr x2, [x0, #KVM_VTTBR] + msr vttbr_el2, x2 + isb + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + tlbi ipas2e1is, x1 + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb ish + tlbi vmalle1is + dsb ish + isb + + msr vttbr_el2, xzr + ret +ENDPROC(__kvm_tlb_flush_vmid_ipa) + +ENTRY(__kvm_flush_vm_context) + dsb ishst + tlbi alle1is + ic ialluis + dsb ish + ret +ENDPROC(__kvm_flush_vm_context) + + // struct vgic_sr_vectors __vgi_sr_vectors; + .align 3 +ENTRY(__vgic_sr_vectors) + .skip VGIC_SR_VECTOR_SZ +ENDPROC(__vgic_sr_vectors) + +__kvm_hyp_panic: + // Guess the context by looking at VTTBR: + // If zero, then we're already a host. + // Otherwise restore a minimal host context before panicing. + mrs x0, vttbr_el2 + cbz x0, 1f + + mrs x0, tpidr_el2 + + deactivate_traps + deactivate_vm + + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + +1: adr x0, __hyp_panic_str + adr x1, 2f + ldp x2, x3, [x1] + sub x0, x0, x2 + add x0, x0, x3 + mrs x1, spsr_el2 + mrs x2, elr_el2 + mrs x3, esr_el2 + mrs x4, far_el2 + mrs x5, hpfar_el2 + mrs x6, par_el1 + mrs x7, tpidr_el2 + + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret + + .align 3 +2: .quad HYP_PAGE_OFFSET + .quad PAGE_OFFSET +ENDPROC(__kvm_hyp_panic) + +__hyp_panic_str: + .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" + + .align 2 + +/* + * u64 kvm_call_hyp(void *hypfn, ...); + * + * This is not really a variadic function in the classic C-way and care must + * be taken when calling this to ensure parameters are passed in registers + * only, since the stack will change between the caller and the callee. + * + * Call the function with the first argument containing a pointer to the + * function you wish to call in Hyp mode, and subsequent arguments will be + * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the + * function pointer can be passed). The function being called must be mapped + * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are + * passed in r0 and r1. + * + * A function pointer with a value of 0 has a special meaning, and is + * used to implement __hyp_get_vectors in the same way as in + * arch/arm64/kernel/hyp_stub.S. + */ +ENTRY(kvm_call_hyp) + hvc #0 + ret +ENDPROC(kvm_call_hyp) + +.macro invalid_vector label, target + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid, __kvm_hyp_panic + invalid_vector el2t_irq_invalid, __kvm_hyp_panic + invalid_vector el2t_fiq_invalid, __kvm_hyp_panic + invalid_vector el2t_error_invalid, __kvm_hyp_panic + invalid_vector el2h_sync_invalid, __kvm_hyp_panic + invalid_vector el2h_irq_invalid, __kvm_hyp_panic + invalid_vector el2h_fiq_invalid, __kvm_hyp_panic + invalid_vector el2h_error_invalid, __kvm_hyp_panic + invalid_vector el1_sync_invalid, __kvm_hyp_panic + invalid_vector el1_irq_invalid, __kvm_hyp_panic + invalid_vector el1_fiq_invalid, __kvm_hyp_panic + invalid_vector el1_error_invalid, __kvm_hyp_panic + +el1_sync: // Guest trapped into EL2 + push x0, x1 + push x2, x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_EL2_EC_SHIFT + + cmp x2, #ESR_EL2_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + pop x2, x3 + pop x0, x1 + + /* Check for __hyp_get_vectors */ + cbnz x0, 1f + mrs x0, vbar_el2 + b 2f + +1: push lr, xzr + + /* + * Compute the function address in EL2, and shuffle the parameters. + */ + kern_hyp_va x0 + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + + pop lr, xzr +2: eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + cmp x2, #ESR_EL2_EC_DABT + mov x0, #ESR_EL2_EC_IABT + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ + and x2, x1, #ESR_EL2_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + push x3, xzr + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + pop x0, xzr // Restore PAR_EL1 from the stack + msr par_el1, x0 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str w1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __kvm_vcpu_return + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: pop x2, x3 + pop x0, x1 + + eret + +el1_irq: + push x0, x1 + push x2, x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __kvm_vcpu_return + + .ltorg + + .align 11 + +ENTRY(__kvm_hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__kvm_hyp_vector) + + .popsection diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c new file mode 100644 index 000000000000..81a02a8762b0 --- /dev/null +++ b/arch/arm64/kvm/inject_fault.c @@ -0,0 +1,203 @@ +/* + * Fault injection for both 32 and 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/esr.h> + +#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ + PSR_I_BIT | PSR_D_BIT) +#define EL1_EXCEPT_SYNC_OFFSET 0x200 + +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + u32 return_offset = (is_thumb) ? 4 : 0; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr & (1 << 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr & (1 << 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) + *fsr = 1 << 9 | 0x34; + else + *fsr = 0x14; +} + +static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_aarch32; + u32 esr = 0; + + is_aarch32 = vcpu_mode_is_32bit(vcpu); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + vcpu_sys_reg(vcpu, FAR_EL1) = addr; + + /* + * Build an {i,d}abort, depending on the level and the + * instruction set. Report an external synchronous abort. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + /* + * Here, the guest runs in AArch64 mode when in EL1. If we get + * an AArch32 fault, it means we managed to trap an EL0 fault. + */ + if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT); + else + esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT); + + if (!is_iabt) + esr |= ESR_EL1_EC_DABT_EL0; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT; +} + +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + /* + * Build an unknown exception, depending on the instruction + * set. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr; +} + +/** + * kvm_inject_dabt - inject a data abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, false, addr); + + inject_abt64(vcpu, false, addr); +} + +/** + * kvm_inject_pabt - inject a prefetch abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, true, addr); + + inject_abt64(vcpu, true, addr); +} + +/** + * kvm_inject_undefined - inject an undefined instruction into the guest + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_undefined(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_undef32(vcpu); + + inject_undef64(vcpu); +} diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c new file mode 100644 index 000000000000..bbc6ae32e4af --- /dev/null +++ b/arch/arm64/kvm/regmap.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/emulate.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/ptrace.h> + +#define VCPU_NR_MODES 6 +#define REG_OFFSET(_reg) \ + (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) + +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { + /* USR Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + REG_OFFSET(pc) + }, + + /* FIQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(compat_r8_fiq), /* r8 */ + REG_OFFSET(compat_r9_fiq), /* r9 */ + REG_OFFSET(compat_r10_fiq), /* r10 */ + REG_OFFSET(compat_r11_fiq), /* r11 */ + REG_OFFSET(compat_r12_fiq), /* r12 */ + REG_OFFSET(compat_sp_fiq), /* r13 */ + REG_OFFSET(compat_lr_fiq), /* r14 */ + REG_OFFSET(pc) + }, + + /* IRQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_irq), /* r13 */ + REG_OFFSET(compat_lr_irq), /* r14 */ + REG_OFFSET(pc) + }, + + /* SVC Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_svc), /* r13 */ + REG_OFFSET(compat_lr_svc), /* r14 */ + REG_OFFSET(pc) + }, + + /* ABT Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_abt), /* r13 */ + REG_OFFSET(compat_lr_abt), /* r14 */ + REG_OFFSET(pc) + }, + + /* UND Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_und), /* r13 */ + REG_OFFSET(compat_lr_und), /* r14 */ + REG_OFFSET(pc) + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + + switch (mode) { + case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: + mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ + break; + + case COMPAT_PSR_MODE_ABT: + mode = 4; + break; + + case COMPAT_PSR_MODE_UND: + mode = 5; + break; + + case COMPAT_PSR_MODE_SYS: + mode = 0; /* SYS maps to USR */ + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) +{ + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_SVC: + mode = KVM_SPSR_SVC; + break; + case COMPAT_PSR_MODE_ABT: + mode = KVM_SPSR_ABT; + break; + case COMPAT_PSR_MODE_UND: + mode = KVM_SPSR_UND; + break; + case COMPAT_PSR_MODE_IRQ: + mode = KVM_SPSR_IRQ; + break; + case COMPAT_PSR_MODE_FIQ: + mode = KVM_SPSR_FIQ; + break; + default: + BUG(); + } + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; +} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c new file mode 100644 index 000000000000..70a7816535cd --- /dev/null +++ b/arch/arm64/kvm/reset.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/reset.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/errno.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> + +#include <kvm/arm_arch_timer.h> + +#include <asm/cputype.h> +#include <asm/ptrace.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_coproc.h> + +/* + * ARMv8 Reset Values + */ +static const struct kvm_regs default_regs_reset = { + .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | + PSR_F_BIT | PSR_D_BIT), +}; + +static const struct kvm_regs default_regs_reset32 = { + .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT | + COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + +static bool cpu_has_32bit_el1(void) +{ + u64 pfr0; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + return !!(pfr0 & 0x20); +} + +int kvm_arch_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_ARM_EL1_32BIT: + r = cpu_has_32bit_el1(); + break; + default: + r = 0; + } + + return r; +} + +/** + * kvm_reset_vcpu - sets core registers and sys_regs to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on + * the virtual CPU struct to their architectually defined reset + * values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + const struct kvm_irq_level *cpu_vtimer_irq; + const struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + default: + if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (!cpu_has_32bit_el1()) + return -EINVAL; + cpu_reset = &default_regs_reset32; + vcpu->arch.hcr_el2 &= ~HCR_RW; + } else { + cpu_reset = &default_regs_reset; + } + + cpu_vtimer_irq = &default_vtimer_irq; + break; + } + + /* Reset core registers */ + memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + + /* Reset system registers */ + kvm_reset_sys_regs(vcpu); + + /* Reset timer */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + + return 0; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c new file mode 100644 index 000000000000..4cc3b719208e --- /dev/null +++ b/arch/arm64/kvm/sys_regs.c @@ -0,0 +1,1528 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/coproc.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell <rusty@rustcorp.com.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/debug-monitors.h> +#include <trace/events/kvm.h> + +#include "sys_regs.h" + +/* + * All of this file is extremly similar to the ARM coproc.c, but the + * types are different. My gut feeling is that it should be pretty + * easy to merge, but that would be an ABI breakage -- again. VFP + * would also need to be abstracted. + * + * For AArch32, we only take care of what is being trapped. Anything + * that has to do with init and userspace access has to go via the + * 64bit interface. + */ + +/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ +static u32 cache_levels; + +/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ +#define CSSELR_MAX 12 + +/* Which cache CCSIDR represents depends on CSSELR value. */ +static u32 get_ccsidr(u32 csselr) +{ + u32 ccsidr; + + /* Make sure noone else changes CSSELR during this! */ + local_irq_disable(); + /* Put value into CSSELR */ + asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + isb(); + /* Read result out of CCSIDR */ + asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + local_irq_enable(); + + return ccsidr; +} + +static void do_dc_cisw(u32 val) +{ + asm volatile("dc cisw, %x0" : : "r" (val)); + dsb(ish); +} + +static void do_dc_csw(u32 val) +{ + asm volatile("dc csw, %x0" : : "r" (val)); + dsb(ish); +} + +/* See note at ARM ARM B1.14.4 */ +static bool access_dcsw(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + int cpu; + + if (!p->is_write) + return read_from_write_only(vcpu, p); + + cpu = get_cpu(); + + cpumask_setall(&vcpu->arch.require_dcache_flush); + cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); + + /* If we were already preempted, take the long way around */ + if (cpu != vcpu->arch.last_pcpu) { + flush_cache_all(); + goto done; + } + + val = *vcpu_reg(vcpu, p->Rt); + + switch (p->CRm) { + case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ + case 14: /* DCCISW */ + do_dc_cisw(val); + break; + + case 10: /* DCCSW */ + do_dc_csw(val); + break; + } + +done: + put_cpu(); + + return true; +} + +/* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + + BUG_ON(!p->is_write); + + val = *vcpu_reg(vcpu, p->Rt); + if (!p->is_aarch32) { + vcpu_sys_reg(vcpu, r->reg) = val; + } else { + if (!p->is_32bit) + vcpu_cp15_64_high(vcpu, r->reg) = val >> 32; + vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + } + + return true; +} + +/* + * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + */ +static bool access_sctlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ + vcpu->arch.hcr_el2 &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } + + return true; +} + +static bool trap_raz_wi(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + else + return read_zero(vcpu, p); +} + +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + *vcpu_reg(vcpu, p->Rt) = (1 << 3); + return true; + } +} + +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u32 val; + asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); + *vcpu_reg(vcpu, p->Rt) = val; + return true; + } +} + +/* + * We want to avoid world-switching all the DBG registers all the + * time: + * + * - If we've touched any debug register, it is likely that we're + * going to touch more of them. It then makes sense to disable the + * traps and start doing the save/restore dance + * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is + * then mandatory to save/restore the registers, as the guest + * depends on them. + * + * For this, we use a DIRTY bit, indicating the guest has modified the + * debug registers, used as follow: + * + * On guest entry: + * - If the dirty bit is set (because we're coming back from trapping), + * disable the traps, save host registers, restore guest registers. + * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), + * set the dirty bit, disable the traps, save host registers, + * restore guest registers. + * - Otherwise, enable the traps + * + * On guest exit: + * - If the dirty bit is set, save guest registers, restore host + * registers and clear the dirty bit. This ensure that the host can + * now use the debug registers. + */ +static bool trap_debug_regs(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); + } + + return true; +} + +static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 amair; + + asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); + vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; +} + +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + /* + * Simply map the vcpu_id into the Aff0 field of the MPIDR. + */ + vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); +} + +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ + /* DBGBVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + /* DBGBCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + /* DBGWVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + /* DBGWCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + * + * Debug handling: We do trap most, if not all debug related system + * registers. The implementation is good enough to ensure that a guest + * can use these with minimal performance degradation. The drawback is + * that we don't implement any of the external debug, none of the + * OSlock protocol. This should be revisited if we ever encounter a + * more demanding guest... + */ +static const struct sys_reg_desc sys_reg_descs[] = { + /* DC ISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010), + access_dcsw }, + /* DC CSW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010), + access_dcsw }, + /* DC CISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), + access_dcsw }, + + DBG_BCR_BVR_WCR_WVR_EL1(0), + DBG_BCR_BVR_WCR_WVR_EL1(1), + /* MDCCINT_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, + /* MDSCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + trap_debug_regs, reset_val, MDSCR_EL1, 0 }, + DBG_BCR_BVR_WCR_WVR_EL1(2), + DBG_BCR_BVR_WCR_WVR_EL1(3), + DBG_BCR_BVR_WCR_WVR_EL1(4), + DBG_BCR_BVR_WCR_WVR_EL1(5), + DBG_BCR_BVR_WCR_WVR_EL1(6), + DBG_BCR_BVR_WCR_WVR_EL1(7), + DBG_BCR_BVR_WCR_WVR_EL1(8), + DBG_BCR_BVR_WCR_WVR_EL1(9), + DBG_BCR_BVR_WCR_WVR_EL1(10), + DBG_BCR_BVR_WCR_WVR_EL1(11), + DBG_BCR_BVR_WCR_WVR_EL1(12), + DBG_BCR_BVR_WCR_WVR_EL1(13), + DBG_BCR_BVR_WCR_WVR_EL1(14), + DBG_BCR_BVR_WCR_WVR_EL1(15), + + /* MDRAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + trap_raz_wi }, + /* OSLAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), + trap_raz_wi }, + /* OSLSR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), + trap_oslsr_el1 }, + /* OSDLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), + trap_raz_wi }, + /* DBGPRCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), + trap_raz_wi }, + /* DBGCLAIMSET_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), + trap_raz_wi }, + /* DBGCLAIMCLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), + trap_raz_wi }, + /* DBGAUTHSTATUS_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), + trap_dbgauthstatus_el1 }, + + /* TEECR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEECR32_EL1, 0 }, + /* TEEHBR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEEHBR32_EL1, 0 }, + + /* MDCCSR_EL1 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), + trap_raz_wi }, + /* DBGDTR_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), + trap_raz_wi }, + /* DBGDTR[TR]X_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), + trap_raz_wi }, + + /* DBGVCR32_EL2 */ + { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), + NULL, reset_val, DBGVCR32_EL2, 0 }, + + /* MPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), + NULL, reset_mpidr, MPIDR_EL1 }, + /* SCTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, + /* CPACR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), + NULL, reset_val, CPACR_EL1, 0 }, + /* TTBR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), + access_vm_reg, reset_unknown, TTBR0_EL1 }, + /* TTBR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), + access_vm_reg, reset_unknown, TTBR1_EL1 }, + /* TCR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), + access_vm_reg, reset_val, TCR_EL1, 0 }, + + /* AFSR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), + access_vm_reg, reset_unknown, AFSR0_EL1 }, + /* AFSR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), + access_vm_reg, reset_unknown, AFSR1_EL1 }, + /* ESR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), + access_vm_reg, reset_unknown, ESR_EL1 }, + /* FAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), + access_vm_reg, reset_unknown, FAR_EL1 }, + /* PAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), + NULL, reset_unknown, PAR_EL1 }, + + /* PMINTENSET_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), + trap_raz_wi }, + /* PMINTENCLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), + trap_raz_wi }, + + /* MAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), + access_vm_reg, reset_unknown, MAIR_EL1 }, + /* AMAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), + access_vm_reg, reset_amair_el1, AMAIR_EL1 }, + + /* VBAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), + NULL, reset_val, VBAR_EL1, 0 }, + /* CONTEXTIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), + access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, + /* TPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), + NULL, reset_unknown, TPIDR_EL1 }, + + /* CNTKCTL_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000), + NULL, reset_val, CNTKCTL_EL1, 0}, + + /* CSSELR_EL1 */ + { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, CSSELR_EL1 }, + + /* PMCR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), + trap_raz_wi }, + /* PMCNTENSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), + trap_raz_wi }, + /* PMCNTENCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), + trap_raz_wi }, + /* PMOVSCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), + trap_raz_wi }, + /* PMSWINC_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), + trap_raz_wi }, + /* PMSELR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), + trap_raz_wi }, + /* PMCEID0_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), + trap_raz_wi }, + /* PMCEID1_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), + trap_raz_wi }, + /* PMCCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), + trap_raz_wi }, + /* PMXEVTYPER_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), + trap_raz_wi }, + /* PMXEVCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), + trap_raz_wi }, + /* PMUSERENR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), + trap_raz_wi }, + /* PMOVSSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), + trap_raz_wi }, + + /* TPIDR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), + NULL, reset_unknown, TPIDR_EL0 }, + /* TPIDRRO_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), + NULL, reset_unknown, TPIDRRO_EL0 }, + + /* DACR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, DACR32_EL2 }, + /* IFSR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, IFSR32_EL2 }, + /* FPEXC32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000), + NULL, reset_val, FPEXC32_EL2, 0x70 }, +}; + +static bool trap_dbgidr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + u32 el3 = !!((pfr >> 12) & 0xf); + + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | + (((dfr >> 12) & 0xf) << 24) | + (((dfr >> 28) & 0xf) << 20) | + (6 << 16) | (el3 << 14) | (el3 << 12)); + return true; + } +} + +static bool trap_debug32(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg); + } + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ + NULL, (cp14_DBGBVR0 + (n) * 2) }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ + NULL, (cp14_DBGBCR0 + (n) * 2) }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ + NULL, (cp14_DBGWVR0 + (n) * 2) }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ + NULL, (cp14_DBGWCR0 + (n) * 2) } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ + NULL, cp14_DBGBXVR0 + n * 2 } + +/* + * Trapped cp14 registers. We generally ignore most of the external + * debug, on the principle that they don't really make sense to a + * guest. Revisit this one day, whould this principle change. + */ +static const struct sys_reg_desc cp14_regs[] = { + /* DBGIDR */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr }, + /* DBGDTRRXext */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi }, + + DBG_BCR_BVR_WCR_WVR(0), + /* DBGDSCRint */ + { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(1), + /* DBGDCCINT */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + /* DBGDSCRext */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(2), + /* DBGDTR[RT]Xint */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, + /* DBGDTR[RT]Xext */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(3), + DBG_BCR_BVR_WCR_WVR(4), + DBG_BCR_BVR_WCR_WVR(5), + /* DBGWFAR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi }, + /* DBGOSECCR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(6), + /* DBGVCR */ + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(7), + DBG_BCR_BVR_WCR_WVR(8), + DBG_BCR_BVR_WCR_WVR(9), + DBG_BCR_BVR_WCR_WVR(10), + DBG_BCR_BVR_WCR_WVR(11), + DBG_BCR_BVR_WCR_WVR(12), + DBG_BCR_BVR_WCR_WVR(13), + DBG_BCR_BVR_WCR_WVR(14), + DBG_BCR_BVR_WCR_WVR(15), + + /* DBGDRAR (32bit) */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi }, + + DBGBXVR(0), + /* DBGOSLAR */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi }, + DBGBXVR(1), + /* DBGOSLSR */ + { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 }, + DBGBXVR(2), + DBGBXVR(3), + /* DBGOSDLR */ + { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi }, + DBGBXVR(4), + /* DBGPRCR */ + { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi }, + DBGBXVR(5), + DBGBXVR(6), + DBGBXVR(7), + DBGBXVR(8), + DBGBXVR(9), + DBGBXVR(10), + DBGBXVR(11), + DBGBXVR(12), + DBGBXVR(13), + DBGBXVR(14), + DBGBXVR(15), + + /* DBGDSAR (32bit) */ + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi }, + + /* DBGDEVID2 */ + { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi }, + /* DBGDEVID1 */ + { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi }, + /* DBGDEVID */ + { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi }, + /* DBGCLAIMSET */ + { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi }, + /* DBGCLAIMCLR */ + { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi }, + /* DBGAUTHSTATUS */ + { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 }, +}; + +/* Trapped cp14 64bit registers */ +static const struct sys_reg_desc cp14_64_regs[] = { + /* DBGDRAR (64bit) */ + { Op1( 0), CRm( 1), .access = trap_raz_wi }, + + /* DBGDSAR (64bit) */ + { Op1( 0), CRm( 2), .access = trap_raz_wi }, +}; + +/* + * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, + * depending on the way they are accessed (as a 32bit or a 64bit + * register). + */ +static const struct sys_reg_desc cp15_regs[] = { + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, + { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, + { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + + /* + * DC{C,I,CI}SW operations: + */ + { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + + /* PMU */ + { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, + + { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, + { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, + { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, + { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, +}; + +static const struct sys_reg_desc cp15_64_regs[] = { + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, +}; + +/* Target specific emulation tables */ +static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table) +{ + target_tables[target] = table; +} + +/* Get specific register table for this target. */ +static const struct sys_reg_desc *get_target_table(unsigned target, + bool mode_is_64, + size_t *num) +{ + struct kvm_sys_reg_target_table *table; + + table = target_tables[target]; + if (mode_is_64) { + *num = table->table64.num; + return table->table64.table; + } else { + *num = table->table32.num; + return table->table32.table; + } +} + +static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, + const struct sys_reg_desc table[], + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + const struct sys_reg_desc *r = &table[i]; + + if (params->Op0 != r->Op0) + continue; + if (params->Op1 != r->Op1) + continue; + if (params->CRn != r->CRn) + continue; + if (params->CRm != r->CRm) + continue; + if (params->Op2 != r->Op2) + continue; + + return r; + } + return NULL; +} + +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +/* + * emulate_cp -- tries to match a sys_reg access in a handling table, and + * call the corresponding trap handler. + * + * @params: pointer to the descriptor of the access + * @table: array of trap descriptors + * @num: size of the trap descriptor array + * + * Return 0 if the access has been handled, and -1 if not. + */ +static int emulate_cp(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params, + const struct sys_reg_desc *table, + size_t num) +{ + const struct sys_reg_desc *r; + + if (!table) + return -1; /* Not handled */ + + r = find_reg(params, table, num); + + if (r) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + } + + /* Handled */ + return 0; + } + + /* Not handled */ + return -1; +} + +static void unhandled_cp_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *params) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + int cp; + + switch(hsr_ec) { + case ESR_EL2_EC_CP15_32: + case ESR_EL2_EC_CP15_64: + cp = 15; + break; + case ESR_EL2_EC_CP14_MR: + case ESR_EL2_EC_CP14_64: + cp = 14; + break; + default: + WARN_ON((cp = -1)); + } + + kvm_err("Unsupported guest CP%d access at: %08lx\n", + cp, *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); +} + +/** + * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + int Rt2 = (hsr >> 10) & 0xf; + + params.is_aarch32 = true; + params.is_32bit = false; + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + + params.Op0 = 0; + params.Op1 = (hsr >> 16) & 0xf; + params.Op2 = 0; + params.CRn = 0; + + /* + * Massive hack here. Store Rt2 in the top 32bits so we only + * have one register to deal with. As we use the same trap + * backends between AArch32 and AArch64, we get away with it. + */ + if (params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val &= 0xffffffff; + val |= *vcpu_reg(vcpu, Rt2) << 32; + *vcpu_reg(vcpu, params.Rt) = val; + } + + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + goto out; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + goto out; + + unhandled_cp_access(vcpu, ¶ms); + +out: + /* Do the opposite hack for the read side */ + if (!params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val >>= 32; + *vcpu_reg(vcpu, Rt2) = val; + } + + return 1; +} + +/** + * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + params.is_aarch32 = true; + params.is_32bit = true; + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + params.CRn = (hsr >> 10) & 0xf; + params.Op0 = 0; + params.Op1 = (hsr >> 14) & 0x7; + params.Op2 = (hsr >> 17) & 0x7; + + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + return 1; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + return 1; + + unhandled_cp_access(vcpu, ¶ms); + return 1; +} + +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_64(vcpu, + cp15_64_regs, ARRAY_SIZE(cp15_64_regs), + target_specific, num); +} + +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_32(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_64(vcpu, + cp14_64_regs, ARRAY_SIZE(cp14_64_regs), + NULL, 0); +} + +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_32(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + +static int emulate_sys_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, true, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + /* If access function fails, it should complain. */ + } else { + kvm_err("Unsupported guest sys_reg access at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + } + kvm_inject_undefined(vcpu); + return 1; +} + +static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *table, size_t num) +{ + unsigned long i; + + for (i = 0; i < num; i++) + if (table[i].reset) + table[i].reset(vcpu, &table[i]); +} + +/** + * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_hsr(vcpu); + + params.is_aarch32 = false; + params.is_32bit = false; + params.Op0 = (esr >> 20) & 3; + params.Op1 = (esr >> 14) & 0x7; + params.CRn = (esr >> 10) & 0xf; + params.CRm = (esr >> 1) & 0xf; + params.Op2 = (esr >> 17) & 0x7; + params.Rt = (esr >> 5) & 0x1f; + params.is_write = !(esr & 1); + + return emulate_sys_reg(vcpu, ¶ms); +} + +/****************************************************************************** + * Userspace API + *****************************************************************************/ + +static bool index_to_params(u64 id, struct sys_reg_params *params) +{ + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U64: + /* Any unused index bits means it's not valid. */ + if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK + | KVM_REG_ARM_COPROC_MASK + | KVM_REG_ARM64_SYSREG_OP0_MASK + | KVM_REG_ARM64_SYSREG_OP1_MASK + | KVM_REG_ARM64_SYSREG_CRN_MASK + | KVM_REG_ARM64_SYSREG_CRM_MASK + | KVM_REG_ARM64_SYSREG_OP2_MASK)) + return false; + params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK) + >> KVM_REG_ARM64_SYSREG_OP0_SHIFT); + params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK) + >> KVM_REG_ARM64_SYSREG_OP1_SHIFT); + params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK) + >> KVM_REG_ARM64_SYSREG_CRN_SHIFT); + params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK) + >> KVM_REG_ARM64_SYSREG_CRM_SHIFT); + params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK) + >> KVM_REG_ARM64_SYSREG_OP2_SHIFT); + return true; + default: + return false; + } +} + +/* Decode an index value, and find the sys_reg_desc entry. */ +static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, + u64 id) +{ + size_t num; + const struct sys_reg_desc *table, *r; + struct sys_reg_params params; + + /* We only do sys_reg for now. */ + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) + return NULL; + + if (!index_to_params(id, ¶ms)) + return NULL; + + table = get_target_table(vcpu->arch.target, true, &num); + r = find_reg(¶ms, table, num); + if (!r) + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + /* Not saved in the sys_reg array? */ + if (r && !r->reg) + r = NULL; + + return r; +} + +/* + * These are the invariant sys_reg registers: we let the guest see the + * host versions of these, so they're part of the guest state. + * + * A future CPU may provide a mechanism to present different values to + * the guest, or a future kvm may trap them. + */ + +#define FUNCTION_INVARIANT(reg) \ + static void get_##reg(struct kvm_vcpu *v, \ + const struct sys_reg_desc *r) \ + { \ + u64 val; \ + \ + asm volatile("mrs %0, " __stringify(reg) "\n" \ + : "=r" (val)); \ + ((struct sys_reg_desc *)r)->val = val; \ + } + +FUNCTION_INVARIANT(midr_el1) +FUNCTION_INVARIANT(ctr_el0) +FUNCTION_INVARIANT(revidr_el1) +FUNCTION_INVARIANT(id_pfr0_el1) +FUNCTION_INVARIANT(id_pfr1_el1) +FUNCTION_INVARIANT(id_dfr0_el1) +FUNCTION_INVARIANT(id_afr0_el1) +FUNCTION_INVARIANT(id_mmfr0_el1) +FUNCTION_INVARIANT(id_mmfr1_el1) +FUNCTION_INVARIANT(id_mmfr2_el1) +FUNCTION_INVARIANT(id_mmfr3_el1) +FUNCTION_INVARIANT(id_isar0_el1) +FUNCTION_INVARIANT(id_isar1_el1) +FUNCTION_INVARIANT(id_isar2_el1) +FUNCTION_INVARIANT(id_isar3_el1) +FUNCTION_INVARIANT(id_isar4_el1) +FUNCTION_INVARIANT(id_isar5_el1) +FUNCTION_INVARIANT(clidr_el1) +FUNCTION_INVARIANT(aidr_el1) + +/* ->val is filled in by kvm_sys_reg_table_init() */ +static struct sys_reg_desc invariant_sys_regs[] = { + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, get_midr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110), + NULL, get_revidr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000), + NULL, get_id_pfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001), + NULL, get_id_pfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010), + NULL, get_id_dfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011), + NULL, get_id_afr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100), + NULL, get_id_mmfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101), + NULL, get_id_mmfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110), + NULL, get_id_mmfr2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111), + NULL, get_id_mmfr3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + NULL, get_id_isar0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001), + NULL, get_id_isar1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + NULL, get_id_isar2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011), + NULL, get_id_isar3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100), + NULL, get_id_isar4_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101), + NULL, get_id_isar5_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_clidr_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111), + NULL, get_aidr_el1 }, + { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_ctr_el0 }, +}; + +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) +{ + if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id) +{ + if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int get_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + return reg_to_user(uaddr, &r->val, id); +} + +static int set_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + int err; + u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + err = reg_from_user(&val, uaddr, id); + if (err) + return err; + + /* This is what we mean by invariant: you can't change it. */ + if (r->val != val) + return -EINVAL; + + return 0; +} + +static bool is_valid_cache(u32 val) +{ + u32 level, ctype; + + if (val >= CSSELR_MAX) + return false; + + /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ + level = (val >> 1); + ctype = (cache_levels >> (level * 3)) & 7; + + switch (ctype) { + case 0: /* No cache */ + return false; + case 1: /* Instruction cache only */ + return (val & 1); + case 2: /* Data cache only */ + case 4: /* Unified cache */ + return !(val & 1); + case 3: /* Separate instruction and data caches */ + return true; + default: /* Reserved: we can't know instruction or data. */ + return false; + } +} + +static int demux_c15_get(u64 id, void __user *uaddr) +{ + u32 val; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + return put_user(get_ccsidr(val), uval); + default: + return -ENOENT; + } +} + +static int demux_c15_set(u64 id, void __user *uaddr) +{ + u32 val, newval; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + if (get_user(newval, uval)) + return -EFAULT; + + /* This is also invariant: you can't change it. */ + if (newval != get_ccsidr(val)) + return -EINVAL; + return 0; + default: + return -ENOENT; + } +} + +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_get(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return get_invariant_sys_reg(reg->id, uaddr); + + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); +} + +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_set(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return set_invariant_sys_reg(reg->id, uaddr); + + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); +} + +static unsigned int num_demux_regs(void) +{ + unsigned int i, count = 0; + + for (i = 0; i < CSSELR_MAX; i++) + if (is_valid_cache(i)) + count++; + + return count; +} + +static int write_demux_regids(u64 __user *uindices) +{ + u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + unsigned int i; + + val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; + for (i = 0; i < CSSELR_MAX; i++) { + if (!is_valid_cache(i)) + continue; + if (put_user(val | i, uindices)) + return -EFAULT; + uindices++; + } + return 0; +} + +static u64 sys_reg_to_index(const struct sys_reg_desc *reg) +{ + return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | + KVM_REG_ARM64_SYSREG | + (reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) | + (reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) | + (reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) | + (reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) | + (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT)); +} + +static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) +{ + if (!*uind) + return true; + + if (put_user(sys_reg_to_index(reg), *uind)) + return false; + + (*uind)++; + return true; +} + +/* Assumed ordered tables, see kvm_sys_reg_table_init. */ +static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) +{ + const struct sys_reg_desc *i1, *i2, *end1, *end2; + unsigned int total = 0; + size_t num; + + /* We check for duplicates here, to allow arch-specific overrides. */ + i1 = get_target_table(vcpu->arch.target, true, &num); + end1 = i1 + num; + i2 = sys_reg_descs; + end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); + + BUG_ON(i1 == end1 || i2 == end2); + + /* Walk carefully, as both tables may refer to the same register. */ + while (i1 || i2) { + int cmp = cmp_sys_reg(i1, i2); + /* target-specific overrides generic entry. */ + if (cmp <= 0) { + /* Ignore registers we trap but don't save. */ + if (i1->reg) { + if (!copy_reg_to_user(i1, &uind)) + return -EFAULT; + total++; + } + } else { + /* Ignore registers we trap but don't save. */ + if (i2->reg) { + if (!copy_reg_to_user(i2, &uind)) + return -EFAULT; + total++; + } + } + + if (cmp <= 0 && ++i1 == end1) + i1 = NULL; + if (cmp >= 0 && ++i2 == end2) + i2 = NULL; + } + return total; +} + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) +{ + return ARRAY_SIZE(invariant_sys_regs) + + num_demux_regs() + + walk_sys_regs(vcpu, (u64 __user *)NULL); +} + +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + int err; + + /* Then give them all the invariant registers' indices. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { + if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) + return -EFAULT; + uindices++; + } + + err = walk_sys_regs(vcpu, uindices); + if (err < 0) + return err; + uindices += err; + + return write_demux_regids(uindices); +} + +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) +{ + unsigned int i; + + for (i = 1; i < n; i++) { + if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + +void kvm_sys_reg_table_init(void) +{ + unsigned int i; + struct sys_reg_desc clidr; + + /* Make sure tables are unique and in order. */ + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); + + /* We abuse the reset function to overwrite the table itself. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) + invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); + + /* + * CLIDR format is awkward, so clean it up. See ARM B4.1.20: + * + * If software reads the Cache Type fields from Ctype1 + * upwards, once it has seen a value of 0b000, no caches + * exist at further-out levels of the hierarchy. So, for + * example, if Ctype3 is the first Cache Type field with a + * value of 0b000, the values of Ctype4 to Ctype7 must be + * ignored. + */ + get_clidr_el1(NULL, &clidr); /* Ugly... */ + cache_levels = clidr.val; + for (i = 0; i < 7; i++) + if (((cache_levels >> (i*3)) & 7) == 0) + break; + /* Clear all higher bits. */ + cache_levels &= (1 << (i*3))-1; +} + +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) +{ + size_t num; + const struct sys_reg_desc *table; + + /* Catch someone adding a register without putting in reset entry. */ + memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); + + /* Generic chip reset first (so target could override). */ + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + table = get_target_table(vcpu->arch.target, true, &num); + reset_sys_reg_descs(vcpu, table, num); + + for (num = 1; num < NR_SYS_REGS; num++) + if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) + panic("Didn't reset vcpu_sys_reg(%zi)", num); +} diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h new file mode 100644 index 000000000000..d411e251412c --- /dev/null +++ b/arch/arm64/kvm/sys_regs.h @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/coproc.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__ +#define __ARM64_KVM_SYS_REGS_LOCAL_H__ + +struct sys_reg_params { + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + u8 Rt; + bool is_write; + bool is_aarch32; + bool is_32bit; /* Only valid if is_aarch32 is true */ +}; + +struct sys_reg_desc { + /* MRS/MSR instruction which accesses it. */ + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + + /* Trapped access from guest, if non-NULL. */ + bool (*access)(struct kvm_vcpu *, + const struct sys_reg_params *, + const struct sys_reg_desc *); + + /* Initialization for vcpu. */ + void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *); + + /* Index into sys_reg[], or 0 if we don't need to save it. */ + int reg; + + /* Value (usually reset value) */ + u64 val; +}; + +static inline void print_sys_reg_instr(const struct sys_reg_params *p) +{ + /* Look, we even formatted it for you to paste into the table! */ + kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); +} + +static inline bool ignore_write(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + return true; +} + +static inline bool read_zero(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + *vcpu_reg(vcpu, p->Rt) = 0; + return true; +} + +static inline bool write_to_read_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg write to read-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +static inline bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg read to write-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +/* Reset functions */ +static inline void reset_unknown(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; +} + +static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = r->val; +} + +static inline int cmp_sys_reg(const struct sys_reg_desc *i1, + const struct sys_reg_desc *i2) +{ + BUG_ON(i1 == i2); + if (!i1) + return 1; + else if (!i2) + return -1; + if (i1->Op0 != i2->Op0) + return i1->Op0 - i2->Op0; + if (i1->Op1 != i2->Op1) + return i1->Op1 - i2->Op1; + if (i1->CRn != i2->CRn) + return i1->CRn - i2->CRn; + if (i1->CRm != i2->CRm) + return i1->CRm - i2->CRm; + return i1->Op2 - i2->Op2; +} + + +#define Op0(_x) .Op0 = _x +#define Op1(_x) .Op1 = _x +#define CRn(_x) .CRn = _x +#define CRm(_x) .CRm = _x +#define Op2(_x) .Op2 = _x + +#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c new file mode 100644 index 000000000000..475fd2929310 --- /dev/null +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Based on arch/arm/kvm/coproc_a15.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell <rusty@rustcorp.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kvm_host.h> +#include <asm/cputype.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <linux/init.h> + +#include "sys_regs.h" + +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1); + return true; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr; + + asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); + vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; +} + +/* + * Implementation specific sys-reg registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc genericv8_sys_regs[] = { + /* ACTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr, reset_actlr, ACTLR_EL1 }, +}; + +static const struct sys_reg_desc genericv8_cp15_regs[] = { + /* ACTLR */ + { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr }, +}; + +static struct kvm_sys_reg_target_table genericv8_target_table = { + .table64 = { + .table = genericv8_sys_regs, + .num = ARRAY_SIZE(genericv8_sys_regs), + }, + .table32 = { + .table = genericv8_cp15_regs, + .num = ARRAY_SIZE(genericv8_cp15_regs), + }, +}; + +static int __init sys_reg_genericv8_init(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) + BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], + &genericv8_sys_regs[i]) >= 0); + + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, + &genericv8_target_table); + + return 0; +} +late_initcall(sys_reg_genericv8_init); diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S new file mode 100644 index 000000000000..ae211772f991 --- /dev/null +++ b/arch/arm64/kvm/vgic-v2-switch.S @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/irqchip/arm-gic.h> + +#include <asm/assembler.h> +#include <asm/memory.h> +#include <asm/asm-offsets.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + + .text + .pushsection .hyp.text, "ax" + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +ENTRY(__save_vgic_v2_state) +__save_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) +CPU_BE( rev w9, w9 ) +CPU_BE( rev w10, w10 ) +CPU_BE( rev w11, w11 ) + + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x2], #4 +CPU_BE( rev w5, w5 ) + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__save_vgic_v2_state) + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +ENTRY(__restore_vgic_v2_state) +__restore_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x3], #4 +CPU_BE( rev w5, w5 ) + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__restore_vgic_v2_state) + + .popsection diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S new file mode 100644 index 000000000000..d16046999e06 --- /dev/null +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -0,0 +1,267 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/assembler.h> +#include <asm/memory.h> +#include <asm/asm-offsets.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> + + .text + .pushsection .hyp.text, "ax" + +/* + * We store LRs in reverse order to let the CPU deal with streaming + * access. Use this macro to make it look saner... + */ +#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_v3_state + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Make sure stores to the GIC via the memory mapped interface + // are now visible to the system register interface + dsb st + + // Save all interesting registers + mrs_s x4, ICH_HCR_EL2 + mrs_s x5, ICH_VMCR_EL2 + mrs_s x6, ICH_MISR_EL2 + mrs_s x7, ICH_EISR_EL2 + mrs_s x8, ICH_ELSR_EL2 + + str w4, [x3, #VGIC_V3_CPU_HCR] + str w5, [x3, #VGIC_V3_CPU_VMCR] + str w6, [x3, #VGIC_V3_CPU_MISR] + str w7, [x3, #VGIC_V3_CPU_EISR] + str w8, [x3, #VGIC_V3_CPU_ELRSR] + + msr_s ICH_HCR_EL2, xzr + + mrs_s x21, ICH_VTR_EL2 + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + mrs_s x20, ICH_LR15_EL2 + mrs_s x19, ICH_LR14_EL2 + mrs_s x18, ICH_LR13_EL2 + mrs_s x17, ICH_LR12_EL2 + mrs_s x16, ICH_LR11_EL2 + mrs_s x15, ICH_LR10_EL2 + mrs_s x14, ICH_LR9_EL2 + mrs_s x13, ICH_LR8_EL2 + mrs_s x12, ICH_LR7_EL2 + mrs_s x11, ICH_LR6_EL2 + mrs_s x10, ICH_LR5_EL2 + mrs_s x9, ICH_LR4_EL2 + mrs_s x8, ICH_LR3_EL2 + mrs_s x7, ICH_LR2_EL2 + mrs_s x6, ICH_LR1_EL2 + mrs_s x5, ICH_LR0_EL2 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + str x20, [x3, #LR_OFFSET(15)] + str x19, [x3, #LR_OFFSET(14)] + str x18, [x3, #LR_OFFSET(13)] + str x17, [x3, #LR_OFFSET(12)] + str x16, [x3, #LR_OFFSET(11)] + str x15, [x3, #LR_OFFSET(10)] + str x14, [x3, #LR_OFFSET(9)] + str x13, [x3, #LR_OFFSET(8)] + str x12, [x3, #LR_OFFSET(7)] + str x11, [x3, #LR_OFFSET(6)] + str x10, [x3, #LR_OFFSET(5)] + str x9, [x3, #LR_OFFSET(4)] + str x8, [x3, #LR_OFFSET(3)] + str x7, [x3, #LR_OFFSET(2)] + str x6, [x3, #LR_OFFSET(1)] + str x5, [x3, #LR_OFFSET(0)] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs_s x20, ICH_AP0R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + mrs_s x19, ICH_AP0R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] +6: mrs_s x18, ICH_AP0R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] +5: mrs_s x17, ICH_AP0R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP0R] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs_s x20, ICH_AP1R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + mrs_s x19, ICH_AP1R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] +6: mrs_s x18, ICH_AP1R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] +5: mrs_s x17, ICH_AP1R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP1R] + + // Restore SRE_EL1 access and re-enable SRE at EL1. + mrs_s x5, ICC_SRE_EL2 + orr x5, x5, #ICC_SRE_EL2_ENABLE + msr_s ICC_SRE_EL2, x5 + isb + mov x5, #1 + msr_s ICC_SRE_EL1, x5 +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_v3_state + // Disable SRE_EL1 access. Necessary, otherwise + // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... + msr_s ICC_SRE_EL1, xzr + isb + + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Restore all interesting registers + ldr w4, [x3, #VGIC_V3_CPU_HCR] + ldr w5, [x3, #VGIC_V3_CPU_VMCR] + + msr_s ICH_HCR_EL2, x4 + msr_s ICH_VMCR_EL2, x5 + + mrs_s x21, ICH_VTR_EL2 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + msr_s ICH_AP1R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] + msr_s ICH_AP1R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] + msr_s ICH_AP1R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] + msr_s ICH_AP1R0_EL2, x17 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + msr_s ICH_AP0R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] + msr_s ICH_AP0R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] + msr_s ICH_AP0R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] + msr_s ICH_AP0R0_EL2, x17 + + and w22, w21, #0xf + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + ldr x20, [x3, #LR_OFFSET(15)] + ldr x19, [x3, #LR_OFFSET(14)] + ldr x18, [x3, #LR_OFFSET(13)] + ldr x17, [x3, #LR_OFFSET(12)] + ldr x16, [x3, #LR_OFFSET(11)] + ldr x15, [x3, #LR_OFFSET(10)] + ldr x14, [x3, #LR_OFFSET(9)] + ldr x13, [x3, #LR_OFFSET(8)] + ldr x12, [x3, #LR_OFFSET(7)] + ldr x11, [x3, #LR_OFFSET(6)] + ldr x10, [x3, #LR_OFFSET(5)] + ldr x9, [x3, #LR_OFFSET(4)] + ldr x8, [x3, #LR_OFFSET(3)] + ldr x7, [x3, #LR_OFFSET(2)] + ldr x6, [x3, #LR_OFFSET(1)] + ldr x5, [x3, #LR_OFFSET(0)] + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + msr_s ICH_LR15_EL2, x20 + msr_s ICH_LR14_EL2, x19 + msr_s ICH_LR13_EL2, x18 + msr_s ICH_LR12_EL2, x17 + msr_s ICH_LR11_EL2, x16 + msr_s ICH_LR10_EL2, x15 + msr_s ICH_LR9_EL2, x14 + msr_s ICH_LR8_EL2, x13 + msr_s ICH_LR7_EL2, x12 + msr_s ICH_LR6_EL2, x11 + msr_s ICH_LR5_EL2, x10 + msr_s ICH_LR4_EL2, x9 + msr_s ICH_LR3_EL2, x8 + msr_s ICH_LR2_EL2, x7 + msr_s ICH_LR1_EL2, x6 + msr_s ICH_LR0_EL2, x5 + + // Ensure that the above will have reached the + // (re)distributors. This ensure the guest will read + // the correct values from the memory-mapped interface. + isb + dsb sy + + // Prevent the guest from touching the GIC system registers + mrs_s x5, ICC_SRE_EL2 + and x5, x5, #~ICC_SRE_EL2_ENABLE + msr_s ICC_SRE_EL2, x5 +.endm + +ENTRY(__save_vgic_v3_state) + save_vgic_v3_state + ret +ENDPROC(__save_vgic_v3_state) + +ENTRY(__restore_vgic_v3_state) + restore_vgic_v3_state + ret +ENDPROC(__restore_vgic_v3_state) + +ENTRY(__vgic_v3_get_ich_vtr_el2) + mrs_s x0, ICH_VTR_EL2 + ret +ENDPROC(__vgic_v3_get_ich_vtr_el2) + + .popsection diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index e0ef63cd05dc..e085ee6ef4e2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -209,8 +209,14 @@ ENTRY(__cpu_setup) * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. */ - ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \ TCR_ASID16 | TCR_TBI0 | (1 << 31) + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in + * TCR_EL1. + */ + mrs x9, ID_AA64MMFR0_EL1 + bfi x10, x9, #32, #3 #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 989dd3fe8de1..cf03097176b1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -238,9 +238,6 @@ struct kvm_vm_data { #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) 1 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ @@ -599,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) {} +static inline void kvm_arch_hardware_unsetup(void) {} + #endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 990b86420cc6..3d50ea955c4c 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -25,6 +25,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 1a4053789d01..18e45ec49bbf 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file) ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ +KVM := ../../../virt/kvm -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) +common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) -common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) +common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5b2dc0d10c8f..c9aa236dc29b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; @@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -702,7 +702,7 @@ again: out: srcu_read_unlock(&vcpu->kvm->srcu, idx); if (r > 0) { - kvm_resched(vcpu); + cond_resched(); idx = srcu_read_lock(&vcpu->kvm->srcu); goto again; } @@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); @@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_release_vm_pages(kvm); } -void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { if (cpu != vcpu->cpu) { @@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kfree(vcpu->arch.apic); } - long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1550,12 +1541,8 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } @@ -1591,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return 0; } -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ - return; -} - void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_flush_remote_tlbs(kvm); @@ -1847,10 +1826,6 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { return __apic_accept_irq(vcpu, irq->vector); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4d6fa0bf1305..5e3f4b0f18c8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -71,11 +71,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); @@ -659,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size); extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index 2c7b3ade8ec0..5cd48572450e 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -196,16 +196,21 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b973e0af9cb..d340d53c345b 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -74,6 +74,7 @@ */ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index af326cde7cb6..f391f3fbde8b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,7 +53,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; @@ -628,4 +623,12 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_exit(void) {} + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5287fe03d77..e2dd05c81bc6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); -extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +extern void kvmppc_core_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +extern int kvmppc_core_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f862579..60019a6fd6bb 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -155,6 +155,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 422de3f4d46c..008cd856c5b5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -5,9 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm +KVM := ../../../virt/kvm -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ - eventfd.o) +common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ + $(KVM)/eventfd.o CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. @@ -53,7 +54,7 @@ kvm-e500mc-objs := \ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ - ../../../virt/kvm/coalesced_mmio.o \ + $(KVM)/coalesced_mmio.o \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ @@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o kvm-book3s_64-module-objs := \ - ../../../virt/kvm/kvm_main.o \ - ../../../virt/kvm/eventfd.o \ + $(KVM)/kvm_main.o \ + $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ book3s.o \ @@ -111,7 +112,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o -kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 102ad8a255f3..a54c59db7bd8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1258,7 +1258,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvm_guest_exit(); preempt_enable(); - kvm_resched(vcpu); + cond_resched(); spin_lock(&vc->lock); now = get_tb(); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1a1b51189773..0a91f47e264b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 2861ae9eaae6..b58d61039015 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6316ee336e88..ea4cfdc991da 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -246,24 +246,16 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = kvmppc_core_check_processor_compat(); @@ -296,11 +288,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -409,15 +397,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_core_free_memslot(free, dont); + kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { - return kvmppc_core_create_memslot(slot, npages); + return kvmppc_core_create_memslot(kvm, slot, npages); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -436,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvmppc_core_commit_memory_region(kvm, mem, old); } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -1125,7 +1110,3 @@ int kvm_arch_init(void *opaque) { return 0; } - -void kvm_arch_exit(void) -{ -} diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cdb..99971dfc6b9a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include <linux/types.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> +#include <linux/kvm_types.h> #include <linux/kvm_host.h> #include <asm/debug.h> #include <asm/cpu.h> @@ -266,4 +269,18 @@ struct kvm_arch{ }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); + +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_check_processor_compat(void *rtn) {} +static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} + #endif diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a4585..40b4c6470f88 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 698fb826e149..412fbc5dc688 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -86,16 +86,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; @@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void) { } -void kvm_arch_check_processor_compat(void *rtn) -{ -} - int kvm_arch_init(void *opaque) { return 0; } -void kvm_arch_exit(void) -{ -} - /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -127,7 +115,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); @@ -320,11 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - /* Nothing todo */ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_regs(&vcpu->arch.host_fpregs); @@ -971,12 +950,8 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } @@ -1026,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} - static int __init kvm_s390_init(void) { int ret; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index eba15f18fd38..a4dfc0bd05db 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f7f20f7fac3c..1b83952b1106 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,6 +79,13 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 @@ -96,10 +103,6 @@ #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, @@ -631,8 +634,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { __u32 padding[3]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) /* for KVM_SET_CPUID2 */ struct kvm_cpuid2 { diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..bdccfb62aa0d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,6 +27,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE @@ -38,6 +39,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d609e1d84048..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -5,12 +5,13 @@ CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. -kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o eventfd.o \ - irqchip.o) -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ - assigned-dev.o iommu.o) -kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) +KVM := ../../../virt/kvm + +kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o +kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o cpuid.o pmu.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..89d288237b9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit) #define F(x) bit(X86_FEATURE_##x) -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, u32 index, int *nent, int maxnent) +{ + return 0; +} + +static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -480,6 +486,15 @@ out: return r; } +static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, + u32 idx, int *nent, int maxnent, unsigned int type) +{ + if (type == KVM_GET_EMULATED_CPUID) + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + + return __do_cpuid_ent(entry, func, idx, nent, maxnent); +} + #undef F struct kvm_cpuid_param { @@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, + __u32 num_entries, unsigned int ioctl_type) +{ + int i; + + if (ioctl_type != KVM_GET_EMULATED_CPUID) + return false; + + /* + * We want to make sure that ->padding is being passed clean from + * userspace in case we want to use it for something in the future. + * + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we + * have to give ourselves satisfied only with the emulated side. /me + * sheds a tear. + */ + for (i = 0; i < num_entries; i++) { + if (entries[i].padding[0] || + entries[i].padding[1] || + entries[i].padding[2]) + return true; + } + return false; +} + +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; @@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, goto out; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; + + if (sanity_check_entries(entries, cpuid->nent, type)) + return -EINVAL; + r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) @@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, continue; r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -6,8 +6,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 711c649f80b7..6d9635d02f85 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3241,7 +3241,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu.direct_map; arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, gfn, &arch); + return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch); } static bool can_do_async_pf(struct kvm_vcpu *vcpu) @@ -4229,7 +4229,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) if (nr_to_scan == 0) goto out; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -4265,7 +4265,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) break; } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); out: return percpu_counter_read_positive(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7e6090e13237..a6a6370c2010 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -69,6 +69,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -204,7 +221,8 @@ retry_walk: goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 765210d4d925..e00860ec66b5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -606,7 +606,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cdafb6dc705..d6c6d1c2e5ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2566,7 +2566,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2630,7 +2630,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e8753555f144..d32ded4703ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -242,7 +242,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -2520,7 +2520,7 @@ out: return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -2530,6 +2530,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: @@ -2639,15 +2640,17 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { + case KVM_GET_SUPPORTED_CPUID: + case KVM_GET_EMULATED_CPUID: { struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, + ioctl); if (r) goto out; @@ -5123,7 +5126,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -5133,7 +5136,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -5280,12 +5283,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); atomic_set(&kvm_guest_has_master_clock, 0); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); @@ -5928,7 +5931,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_resched(vcpu); + cond_resched(); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } @@ -6582,7 +6585,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6593,7 +6596,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -6673,10 +6676,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) @@ -6788,6 +6791,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) static_key_slow_dec(&kvm_no_apic_vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (type) @@ -6879,7 +6886,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { int i; @@ -6900,7 +6907,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, } } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { int i; @@ -6958,6 +6966,10 @@ out_free: return -ENOMEM; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -7096,7 +7108,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) int r; if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || - is_error_page(work->page)) + work->wakeup_all) return; r = kvm_mmu_reload(vcpu); @@ -7206,7 +7218,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct x86_exception fault; trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (is_error_page(work->page)) + if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); |