aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2014-09-27 11:03:33 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2014-09-27 11:03:33 +0200
commite77d99d4a4ec761ad061f1ec890c71040a92efe3 (patch)
treeaea6fe2ee5bb6e699045a3629b48208f3e2a26b6
parentbb0ca6acd466af55c95b7ce508f29e23a24cabd9 (diff)
parent0496daa5cf99741ce8db82686b4c7446a37feabb (diff)
Merge tag 'kvm-arm-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-next
Changes for KVM for arm/arm64 for 3.18 This includes a bunch of changes: - Support read-only memory slots on arm/arm64 - Various changes to fix Sparse warnings - Correctly detect write vs. read Stage-2 faults - Various VGIC cleanups and fixes - Dynamic VGIC data strcuture sizing - Fix SGI set_clear_pend offset bug - Fix VTTBR_BADDR Mask - Correctly report the FSC on Stage-2 faults Conflicts: virt/kvm/eventfd.c [duplicate, different patch where the kvm-arm version broke x86. The kvm tree instead has the right one]
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt10
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_mmu.h11
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/arm.c17
-rw-r--r--arch/arm/kvm/coproc.c2
-rw-r--r--arch/arm/kvm/guest.c2
-rw-r--r--arch/arm/kvm/mmu.c40
-rw-r--r--arch/arm64/include/asm/kvm_arm.h13
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h4
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c2
-rw-r--r--include/kvm/arm_vgic.h112
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--virt/kvm/arm/vgic.c631
-rw-r--r--virt/kvm/kvm_main.c11
20 files changed, 678 insertions, 215 deletions
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 7f4e91b1316b..df8b0c7540b6 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -71,3 +71,13 @@ Groups:
Errors:
-ENODEV: Getting or setting this register is not yet supported
-EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+ Attributes:
+ A value describing the number of interrupts (SGI, PPI and SPI) for
+ this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+ Errors:
+ -EINVAL: Value set is out of the expected range
+ -EBUSY: Value has already be set, or GIC has already been initialized
+ with default values.
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 69b746955fca..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -149,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
{
+ return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
+{
return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
}
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 155497c2b4da..53036e21756b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -43,7 +43,7 @@
#include <kvm/arm_vgic.h>
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f5f72f..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
flush_pmd_entry(pte);
}
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
- unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
- if (hsr_ec == HSR_EC_IABT)
- return false;
- else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
- return false;
- else
- return true;
-}
-
static inline void kvm_clean_pgd(pgd_t *pgd)
{
clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3471e5..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -173,6 +174,7 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 005a7b5fd0aa..779605122f32 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,7 +82,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
/**
* kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
*/
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
{
return &kvm_arm_running_vcpu;
}
@@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
+
+ kvm_vgic_destroy(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -177,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_READONLY_MEM:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -242,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
+ kvm_vgic_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -257,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
- int ret;
-
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
- /* Set up VGIC */
- ret = kvm_vgic_vcpu_init(vcpu);
- if (ret)
- return ret;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -413,9 +410,9 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm->arch.pgd);
+ BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
- kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
- kvm->arch.vttbr |= vmid;
+ kvm->arch.vttbr = pgd_phys | vmid;
spin_unlock(&kvm_vmid_lock);
}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 37a0fe1bb9bb..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 813e49258690..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
if (ret != 0)
- return ret;
+ return -EFAULT;
return kvm_arm_timer_set_reg(vcpu, reg->id, val);
}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -746,22 +746,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
return false;
}
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+
+ return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
{
int ret;
bool write_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
- unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
- write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+ write_fault = kvm_is_write_fault(vcpu);
if (fault_status == FSC_PERM && !write_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
@@ -863,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
- bool is_iabt;
+ unsigned long hva;
+ bool is_iabt, write_fault, writable;
gfn_t gfn;
int ret, idx;
@@ -874,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = kvm_vcpu_trap_get_fault(vcpu);
+ fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
- kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
- kvm_vcpu_trap_get_class(vcpu), fault_status);
+ kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+ kvm_vcpu_trap_get_class(vcpu),
+ (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+ (unsigned long)kvm_vcpu_get_hsr(vcpu));
return -EFAULT;
}
idx = srcu_read_lock(&vcpu->kvm->srcu);
gfn = fault_ipa >> PAGE_SHIFT;
- if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+ write_fault = kvm_is_write_fault(vcpu);
+ if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- if (fault_status != FSC_FAULT) {
- kvm_err("Unsupported fault status on io memory: %#lx\n",
- fault_status);
- ret = -EFAULT;
- goto out_unlock;
- }
-
/*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the
@@ -910,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
- ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
out_unlock:
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520459ed..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -122,6 +122,17 @@
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
@@ -149,7 +160,7 @@
#endif
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (48LLU)
#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fdc3e21abd8d..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -174,6 +174,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
+ return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+{
return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 992d9da88119..2012c4ba8d67 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -43,7 +43,7 @@
#define KVM_VCPU_MAX_FEATURES 3
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -197,7 +197,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
}
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
u64 kvm_call_hyp(void *hypfn, ...);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
/*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
*/
-#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
@@ -93,19 +92,6 @@ void kvm_clear_hyp_idmap(void);
#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
- unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
- if (esr_ec == ESR_EL2_EC_IABT)
- return false;
-
- if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
- return false;
-
- return true;
-}
-
static inline void kvm_clean_pgd(pgd_t *pgd) {}
static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
static inline void kvm_clean_pte(pte_t *pte) {}
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8cdec8..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,6 +37,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -159,6 +160,7 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 8d1ec2887a26..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
if (ret != 0)
- return ret;
+ return -EFAULT;
return kvm_arm_timer_set_reg(vcpu, reg->id, val);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5805e7c4a4dd..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c121bb65..2f2aac8448a4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,26 +25,25 @@
#include <linux/spinlock.h>
#include <linux/types.h>
-#define VGIC_NR_IRQS 256
+#define VGIC_NR_IRQS_LEGACY 256
#define VGIC_NR_SGIS 16
#define VGIC_NR_PPIS 16
#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
-#define VGIC_MAX_CPUS KVM_MAX_VCPUS
#define VGIC_V2_MAX_LRS (1 << 6)
#define VGIC_V3_MAX_LRS 16
+#define VGIC_MAX_IRQS 1024
/* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
+#if (KVM_MAX_VCPUS > 8)
#error Invalid number of CPU interfaces
#endif
-#if (VGIC_NR_IRQS & 31)
+#if (VGIC_NR_IRQS_LEGACY & 31)
#error "VGIC_NR_IRQS must be a multiple of 32"
#endif
-#if (VGIC_NR_IRQS > 1024)
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
#error "VGIC_NR_IRQS must be <= 1024"
#endif
@@ -54,19 +53,33 @@
* - a bunch of shared interrupts (SPI)
*/
struct vgic_bitmap {
- union {
- u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
- DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
- } percpu[VGIC_MAX_CPUS];
- union {
- u32 reg[VGIC_NR_SHARED_IRQS / 32];
- DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
- } shared;
+ /*
+ * - One UL per VCPU for private interrupts (assumes UL is at
+ * least 32 bits)
+ * - As many UL as necessary for shared interrupts.
+ *
+ * The private interrupts are accessed via the "private"
+ * field, one UL per vcpu (the state for vcpu n is in
+ * private[n]). The shared interrupts are accessed via the
+ * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+ */
+ unsigned long *private;
+ unsigned long *shared;
};
struct vgic_bytemap {
- u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
- u32 shared[VGIC_NR_SHARED_IRQS / 4];
+ /*
+ * - 8 u32 per VCPU for private interrupts
+ * - As many u32 as necessary for shared interrupts.
+ *
+ * The private interrupts are accessed via the "private"
+ * field, (the state for vcpu n is in private[n*8] to
+ * private[n*8 + 7]). The shared interrupts are accessed via
+ * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+ * shared[(n-32)/4] word).
+ */
+ u32 *private;
+ u32 *shared;
};
struct kvm_vcpu;
@@ -127,6 +140,9 @@ struct vgic_dist {
bool in_kernel;
bool ready;
+ int nr_cpus;
+ int nr_irqs;
+
/* Virtual control interface mapping */
void __iomem *vctrl_base;
@@ -140,11 +156,25 @@ struct vgic_dist {
/* Interrupt enabled (one bit per IRQ) */
struct vgic_bitmap irq_enabled;
- /* Interrupt 'pin' level */
- struct vgic_bitmap irq_state;
+ /* Level-triggered interrupt external input is asserted */
+ struct vgic_bitmap irq_level;
- /* Level-triggered interrupt in progress */
- struct vgic_bitmap irq_active;
+ /*
+ * Interrupt state is pending on the distributor
+ */
+ struct vgic_bitmap irq_pending;
+
+ /*
+ * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+ * interrupts. Essentially holds the state of the flip-flop in
+ * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+ * Once set, it is only cleared for level-triggered interrupts on
+ * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+ */
+ struct vgic_bitmap irq_soft_pend;
+
+ /* Level-triggered interrupt queued on VCPU interface */
+ struct vgic_bitmap irq_queued;
/* Interrupt priority. Not used yet. */
struct vgic_bytemap irq_priority;
@@ -152,15 +182,36 @@ struct vgic_dist {
/* Level/edge triggered */
struct vgic_bitmap irq_cfg;
- /* Source CPU per SGI and target CPU */
- u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
-
- /* Target CPU for each IRQ */
- u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS];
- struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS];
+ /*
+ * Source CPU per SGI and target CPU:
+ *
+ * Each byte represent a SGI observable on a VCPU, each bit of
+ * this byte indicating if the corresponding VCPU has
+ * generated this interrupt. This is a GICv2 feature only.
+ *
+ * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+ * the SGIs observable on VCPUn.
+ */
+ u8 *irq_sgi_sources;
+
+ /*
+ * Target CPU for each SPI:
+ *
+ * Array of available SPI, each byte indicating the target
+ * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+ */
+ u8 *irq_spi_cpu;
+
+ /*
+ * Reverse lookup of irq_spi_cpu for faster compute pending:
+ *
+ * Array of bitmaps, one per VCPU, describing if IRQn is
+ * routed to a particular VCPU.
+ */
+ struct vgic_bitmap *irq_spi_target;
/* Bitmap indicating which CPU has something pending */
- unsigned long irq_pending_on_cpu;
+ unsigned long *irq_pending_on_cpu;
#endif
};
@@ -190,11 +241,11 @@ struct vgic_v3_cpu_if {
struct vgic_cpu {
#ifdef CONFIG_KVM_ARM_VGIC
/* per IRQ to LR mapping */
- u8 vgic_irq_lr_map[VGIC_NR_IRQS];
+ u8 *vgic_irq_lr_map;
/* Pending interrupts on this VCPU */
DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
- DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
+ unsigned long *pending_shared;
/* Bitmap of used/free list registers */
DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -225,7 +276,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
int kvm_vgic_hyp_init(void);
int kvm_vgic_init(struct kvm *kvm);
int kvm_vgic_create(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_vgic_destroy(struct kvm *kvm);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d594f9f34429..28be31f49250 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -536,6 +536,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+ bool *writable);
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 3ee3ce06bbec..862967852d5a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
* How the whole thing works (courtesy of Christoffer Dall):
*
* - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- * something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- * bitmap (this bitmap is updated by both user land ioctls and guest
- * mmio ops, and other in-kernel peripherals such as the
- * arch. timers) and indicate the 'wire' state.
+ * something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ * ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ * arch. timers).
* - Every time the bitmap changes, the irq_pending_on_cpu oracle is
* recalculated
* - To calculate the oracle, we need info for each cpu from
* compute_pending_for_cpu, which considers:
- * - PPI: dist->irq_state & dist->irq_enable
- * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- * - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ * - PPI: dist->irq_pending & dist->irq_enable
+ * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
+ * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
* registers, stored on each vcpu. We only keep one bit of
* information per interrupt, making sure that only one vcpu can
* accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
* - The same is true when injecting an interrupt, except that we only
* consider a single interrupt at a time. The irq_spi_cpu array
* contains the target CPU for each SPI.
@@ -60,13 +61,18 @@
* the 'line' again. This is achieved as such:
*
* - When a level interrupt is moved onto a vcpu, the corresponding
- * bit in irq_active is set. As long as this bit is set, the line
+ * bit in irq_queued is set. As long as this bit is set, the line
* will be ignored for further interrupts. The interrupt is injected
* into the vcpu with the GICH_LR_EOI bit set (generate a
* maintenance interrupt on EOI).
* - When the interrupt is EOIed, the maintenance interrupt fires,
- * and clears the corresponding bit in irq_active. This allow the
+ * and clears the corresponding bit in irq_queued. This allows the
* interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ * writes to GICD_ISPENDRn and lowering the external input line does not
+ * cause the interrupt to become inactive in such a situation.
+ * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ * inactive as long as the external input line is held high.
*/
#define VGIC_ADDR_UNDEF (-1)
@@ -89,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
static void vgic_update_state(struct kvm *kvm);
static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
@@ -99,10 +106,8 @@ static const struct vgic_ops *vgic_ops;
static const struct vgic_params *vgic;
/*
- * struct vgic_bitmap contains unions that provide two views of
- * the same data. In one case it is an array of registers of
- * u32's, and in the other case it is a bitmap of unsigned
- * longs.
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
*
* This does not work on 64-bit BE systems, because the bitmap access
* will store two consecutive 32-bit words with the higher-addressed
@@ -118,23 +123,45 @@ static const struct vgic_params *vgic;
#define REG_OFFSET_SWIZZLE 0
#endif
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+ int nr_longs;
+
+ nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+ b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+ if (!b->private)
+ return -ENOMEM;
+
+ b->shared = b->private + nr_cpus;
+
+ return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+ kfree(b->private);
+ b->private = NULL;
+ b->shared = NULL;
+}
+
static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
int cpuid, u32 offset)
{
offset >>= 2;
if (!offset)
- return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
+ return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
else
- return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
+ return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
}
static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
int cpuid, int irq)
{
if (irq < VGIC_NR_PRIVATE_IRQS)
- return test_bit(irq, x->percpu[cpuid].reg_ul);
+ return test_bit(irq, x->private + cpuid);
- return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
+ return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
}
static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -143,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
unsigned long *reg;
if (irq < VGIC_NR_PRIVATE_IRQS) {
- reg = x->percpu[cpuid].reg_ul;
+ reg = x->private + cpuid;
} else {
- reg = x->shared.reg_ul;
+ reg = x->shared;
irq -= VGIC_NR_PRIVATE_IRQS;
}
@@ -157,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
{
- if (unlikely(cpuid >= VGIC_MAX_CPUS))
- return NULL;
- return x->percpu[cpuid].reg_ul;
+ return x->private + cpuid;
}
static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
{
- return x->shared.reg_ul;
+ return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+ int size;
+
+ size = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+ size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+ x->private = kzalloc(size, GFP_KERNEL);
+ if (!x->private)
+ return -ENOMEM;
+
+ x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+ return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+ kfree(b->private);
+ b->private = NULL;
+ b->shared = NULL;
}
static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
{
- offset >>= 2;
- BUG_ON(offset > (VGIC_NR_IRQS / 4));
- if (offset < 8)
- return x->percpu[cpuid] + offset;
- else
- return x->shared + offset - 8;
+ u32 *reg;
+
+ if (offset < VGIC_NR_PRIVATE_IRQS) {
+ reg = x->private;
+ offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+ } else {
+ reg = x->shared;
+ offset -= VGIC_NR_PRIVATE_IRQS;
+ }
+
+ return reg + (offset / sizeof(u32));
}
#define VGIC_CFG_LEVEL 0
@@ -196,46 +248,81 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
}
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
+}
+
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
}
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+ return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
}
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+ vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
}
static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
+ return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
}
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
}
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
}
static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -256,6 +343,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
vcpu->arch.vgic_cpu.pending_shared);
}
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
{
return le32_to_cpu(*((u32 *)mmio->data)) & mask;
@@ -347,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
case 4: /* GICD_TYPER */
reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
- reg |= (VGIC_NR_IRQS >> 5) - 1;
+ reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
vgic_reg_access(mmio, &reg, word_offset,
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
break;
@@ -409,11 +501,33 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
+ u32 *reg, orig;
+ u32 level_mask;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+ level_mask = (~(*reg));
+
+ /* Mark both level and edge triggered irqs as pending */
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
vgic_reg_access(mmio, reg, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
if (mmio->is_write) {
+ /* Set the soft-pending flag only for level-triggered irqs */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+ *reg &= level_mask;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
vgic_update_state(vcpu->kvm);
return true;
}
@@ -425,11 +539,34 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
+ u32 *level_active;
+ u32 *reg, orig;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
vgic_reg_access(mmio, reg, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
if (mmio->is_write) {
+ /* Re-set level triggered level-active interrupts */
+ level_active = vgic_bitmap_get_reg(&dist->irq_level,
+ vcpu->vcpu_id, offset);
+ reg = vgic_bitmap_get_reg(&dist->irq_pending,
+ vcpu->vcpu_id, offset);
+ *reg |= *level_active;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
+ /* Clear soft-pending flags */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
vgic_update_state(vcpu->kvm);
return true;
}
@@ -651,9 +788,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* is fine, then we are only setting a few bits that were
* already set.
*/
- vgic_dist_irq_set(vcpu, lr.irq);
+ vgic_dist_irq_set_pending(vcpu, lr.irq);
if (lr.irq < VGIC_NR_SGIS)
- dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+ *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
lr.state &= ~LR_STATE_PENDING;
vgic_set_lr(vcpu, i, lr);
@@ -662,8 +799,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* active), then the LR does not hold any useful info and can
* be marked as free for other use.
*/
- if (!(lr.state & LR_STATE_MASK))
+ if (!(lr.state & LR_STATE_MASK)) {
vgic_retire_lr(i, lr.irq, vcpu);
+ vgic_irq_clear_queued(vcpu, lr.irq);
+ }
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -677,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int sgi;
- int min_sgi = (offset & ~0x3) * 4;
+ int min_sgi = (offset & ~0x3);
int max_sgi = min_sgi + 3;
int vcpu_id = vcpu->vcpu_id;
u32 reg = 0;
@@ -685,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
/* Copy source SGIs from distributor side */
for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
int shift = 8 * (sgi - min_sgi);
- reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+ reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
}
mmio_data_write(mmio, ~0, reg);
@@ -698,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int sgi;
- int min_sgi = (offset & ~0x3) * 4;
+ int min_sgi = (offset & ~0x3);
int max_sgi = min_sgi + 3;
int vcpu_id = vcpu->vcpu_id;
u32 reg;
@@ -709,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
/* Clear pending SGIs on the distributor */
for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
u8 mask = reg >> (8 * (sgi - min_sgi));
+ u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
if (set) {
- if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+ if ((*src & mask) != mask)
updated = true;
- dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+ *src |= mask;
} else {
- if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+ if (*src & mask)
updated = true;
- dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+ *src &= ~mask;
}
}
@@ -755,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
struct mmio_range {
phys_addr_t base;
unsigned long len;
+ int bits_per_irq;
bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
phys_addr_t offset);
};
@@ -763,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
{
.base = GIC_DIST_CTRL,
.len = 12,
+ .bits_per_irq = 0,
.handle_mmio = handle_mmio_misc,
},
{
.base = GIC_DIST_IGROUP,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
.base = GIC_DIST_ENABLE_SET,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_set_enable_reg,
},
{
.base = GIC_DIST_ENABLE_CLEAR,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_clear_enable_reg,
},
{
.base = GIC_DIST_PENDING_SET,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_set_pending_reg,
},
{
.base = GIC_DIST_PENDING_CLEAR,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_clear_pending_reg,
},
{
.base = GIC_DIST_ACTIVE_SET,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
.base = GIC_DIST_ACTIVE_CLEAR,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
.base = GIC_DIST_PRI,
- .len = VGIC_NR_IRQS,
+ .len = VGIC_MAX_IRQS,
+ .bits_per_irq = 8,
.handle_mmio = handle_mmio_priority_reg,
},
{
.base = GIC_DIST_TARGET,
- .len = VGIC_NR_IRQS,
+ .len = VGIC_MAX_IRQS,
+ .bits_per_irq = 8,
.handle_mmio = handle_mmio_target_reg,
},
{
.base = GIC_DIST_CONFIG,
- .len = VGIC_NR_IRQS / 4,
+ .len = VGIC_MAX_IRQS / 4,
+ .bits_per_irq = 2,
.handle_mmio = handle_mmio_cfg_reg,
},
{
@@ -850,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
return NULL;
}
+static bool vgic_validate_access(const struct vgic_dist *dist,
+ const struct mmio_range *range,
+ unsigned long offset)
+{
+ int irq;
+
+ if (!range->bits_per_irq)
+ return true; /* Not an irq-based access */
+
+ irq = offset * 8 / range->bits_per_irq;
+ if (irq >= dist->nr_irqs)
+ return false;
+
+ return true;
+}
+
/**
* vgic_handle_mmio - handle an in-kernel MMIO access
* @vcpu: pointer to the vcpu performing the access
@@ -889,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
spin_lock(&vcpu->kvm->arch.vgic.lock);
offset = mmio->phys_addr - range->base - base;
- updated_state = range->handle_mmio(vcpu, mmio, offset);
+ if (vgic_validate_access(dist, range, offset)) {
+ updated_state = range->handle_mmio(vcpu, mmio, offset);
+ } else {
+ vgic_reg_access(mmio, NULL, offset,
+ ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+ updated_state = false;
+ }
spin_unlock(&vcpu->kvm->arch.vgic.lock);
kvm_prepare_mmio(run, mmio);
kvm_handle_mmio_return(vcpu, run);
@@ -900,6 +1074,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
return true;
}
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+ return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
{
struct kvm *kvm = vcpu->kvm;
@@ -932,8 +1111,8 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
kvm_for_each_vcpu(c, vcpu, kvm) {
if (target_cpus & 1) {
/* Flag the SGI as pending */
- vgic_dist_irq_set(vcpu, sgi);
- dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
+ vgic_dist_irq_set_pending(vcpu, sgi);
+ *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
}
@@ -941,32 +1120,38 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
}
}
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+ return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
unsigned long pending_private, pending_shared;
+ int nr_shared = vgic_nr_shared_irqs(dist);
int vcpu_id;
vcpu_id = vcpu->vcpu_id;
pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
pend_shared = vcpu->arch.vgic_cpu.pending_shared;
- pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
+ pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
- pending = vgic_bitmap_get_shared_map(&dist->irq_state);
+ pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
- bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+ bitmap_and(pend_shared, pending, enabled, nr_shared);
bitmap_and(pend_shared, pend_shared,
vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
- VGIC_NR_SHARED_IRQS);
+ nr_shared);
pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
- pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+ pending_shared = find_first_bit(pend_shared, nr_shared);
return (pending_private < VGIC_NR_PRIVATE_IRQS ||
- pending_shared < VGIC_NR_SHARED_IRQS);
+ pending_shared < vgic_nr_shared_irqs(dist));
}
/*
@@ -980,14 +1165,14 @@ static void vgic_update_state(struct kvm *kvm)
int c;
if (!dist->enabled) {
- set_bit(0, &dist->irq_pending_on_cpu);
+ set_bit(0, dist->irq_pending_on_cpu);
return;
}
kvm_for_each_vcpu(c, vcpu, kvm) {
if (compute_pending_for_cpu(vcpu)) {
pr_debug("CPU%d has pending interrupts\n", c);
- set_bit(c, &dist->irq_pending_on_cpu);
+ set_bit(c, dist->irq_pending_on_cpu);
}
}
}
@@ -1079,8 +1264,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
vgic_retire_lr(lr, vlr.irq, vcpu);
- if (vgic_irq_is_active(vcpu, vlr.irq))
- vgic_irq_clear_active(vcpu, vlr.irq);
+ if (vgic_irq_is_queued(vcpu, vlr.irq))
+ vgic_irq_clear_queued(vcpu, vlr.irq);
}
}
}
@@ -1092,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_lr vlr;
int lr;
/* Sanitize the input... */
BUG_ON(sgi_source_id & ~7);
BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
- BUG_ON(irq >= VGIC_NR_IRQS);
+ BUG_ON(irq >= dist->nr_irqs);
kvm_debug("Queue IRQ%d\n", irq);
@@ -1144,14 +1330,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
int vcpu_id = vcpu->vcpu_id;
int c;
- sources = dist->irq_sgi_sources[vcpu_id][irq];
+ sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
- for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
+ for_each_set_bit(c, &sources, dist->nr_cpus) {
if (vgic_queue_irq(vcpu, c, irq))
clear_bit(c, &sources);
}
- dist->irq_sgi_sources[vcpu_id][irq] = sources;
+ *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
/*
* If the sources bitmap has been cleared it means that we
@@ -1160,7 +1346,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
* our emulated gic and can get rid of them.
*/
if (!sources) {
- vgic_dist_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, irq);
vgic_cpu_irq_clear(vcpu, irq);
return true;
}
@@ -1170,15 +1356,15 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
{
- if (vgic_irq_is_active(vcpu, irq))
+ if (!vgic_can_sample_irq(vcpu, irq))
return true; /* level interrupt, already queued */
if (vgic_queue_irq(vcpu, 0, irq)) {
if (vgic_irq_is_edge(vcpu, irq)) {
- vgic_dist_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, irq);
vgic_cpu_irq_clear(vcpu, irq);
} else {
- vgic_irq_set_active(vcpu, irq);
+ vgic_irq_set_queued(vcpu, irq);
}
return true;
@@ -1223,7 +1409,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
}
/* SPIs */
- for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
+ for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
overflow = 1;
}
@@ -1239,7 +1425,7 @@ epilog:
* us. Claim we don't have anything pending. We'll
* adjust that if needed while exiting.
*/
- clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
+ clear_bit(vcpu_id, dist->irq_pending_on_cpu);
}
}
@@ -1261,17 +1447,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+ WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
- vgic_irq_clear_active(vcpu, vlr.irq);
+ vgic_irq_clear_queued(vcpu, vlr.irq);
WARN_ON(vlr.state & LR_STATE_MASK);
vlr.state = 0;
vgic_set_lr(vcpu, lr, vlr);
+ /*
+ * If the IRQ was EOIed it was also ACKed and we we
+ * therefore assume we can clear the soft pending
+ * state (should it had been set) for this interrupt.
+ *
+ * Note: if the IRQ soft pending state was set after
+ * the IRQ was acked, it actually shouldn't be
+ * cleared, but we have no way of knowing that unless
+ * we start trapping ACKs when the soft-pending state
+ * is set.
+ */
+ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
/* Any additional pending interrupt? */
- if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
vgic_cpu_irq_set(vcpu, vlr.irq);
level_pending = true;
} else {
+ vgic_dist_irq_clear_pending(vcpu, vlr.irq);
vgic_cpu_irq_clear(vcpu, vlr.irq);
}
@@ -1315,14 +1516,14 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
vlr = vgic_get_lr(vcpu, lr);
- BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+ BUG_ON(vlr.irq >= dist->nr_irqs);
vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
}
/* Check if we still have something up our sleeve... */
pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
if (level_pending || pending < vgic->nr_lr)
- set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+ set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1356,7 +1557,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
- return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+ return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1376,34 +1577,36 @@ static void vgic_kick_vcpus(struct kvm *kvm)
static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
{
- int is_edge = vgic_irq_is_edge(vcpu, irq);
- int state = vgic_dist_irq_is_pending(vcpu, irq);
+ int edge_triggered = vgic_irq_is_edge(vcpu, irq);
/*
* Only inject an interrupt if:
* - edge triggered and we have a rising edge
* - level triggered and we change level
*/
- if (is_edge)
+ if (edge_triggered) {
+ int state = vgic_dist_irq_is_pending(vcpu, irq);
return level > state;
- else
+ } else {
+ int state = vgic_dist_irq_get_level(vcpu, irq);
return level != state;
+ }
}
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
unsigned int irq_num, bool level)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int is_edge, is_level;
+ int edge_triggered, level_triggered;
int enabled;
bool ret = true;
spin_lock(&dist->lock);
vcpu = kvm_get_vcpu(kvm, cpuid);
- is_edge = vgic_irq_is_edge(vcpu, irq_num);
- is_level = !is_edge;
+ edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+ level_triggered = !edge_triggered;
if (!vgic_validate_injection(vcpu, irq_num, level)) {
ret = false;
@@ -1417,10 +1620,19 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
- if (level)
- vgic_dist_irq_set(vcpu, irq_num);
- else
- vgic_dist_irq_clear(vcpu, irq_num);
+ if (level) {
+ if (level_triggered)
+ vgic_dist_irq_set_level(vcpu, irq_num);
+ vgic_dist_irq_set_pending(vcpu, irq_num);
+ } else {
+ if (level_triggered) {
+ vgic_dist_irq_clear_level(vcpu, irq_num);
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ } else {
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ }
+ }
enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1429,7 +1641,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
goto out;
}
- if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+ if (!vgic_can_sample_irq(vcpu, irq_num)) {
/*
* Level interrupt in progress, will be picked up
* when EOId.
@@ -1440,7 +1652,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
if (level) {
vgic_cpu_irq_set(vcpu, irq_num);
- set_bit(cpuid, &dist->irq_pending_on_cpu);
+ set_bit(cpuid, dist->irq_pending_on_cpu);
}
out:
@@ -1466,7 +1678,8 @@ out:
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level)
{
- if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+ if (likely(vgic_initialized(kvm)) &&
+ vgic_update_irq_pending(kvm, cpuid, irq_num, level))
vgic_kick_vcpus(kvm);
return 0;
@@ -1483,6 +1696,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ kfree(vgic_cpu->pending_shared);
+ kfree(vgic_cpu->vgic_irq_lr_map);
+ vgic_cpu->pending_shared = NULL;
+ vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+ vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+ if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+ kvm_vgic_vcpu_destroy(vcpu);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
/**
* kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
* @vcpu: pointer to the vcpu struct
@@ -1490,16 +1729,13 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
* Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
* this vcpu and enable the VGIC for this VCPU
*/
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int i;
- if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
- return -EBUSY;
-
- for (i = 0; i < VGIC_NR_IRQS; i++) {
+ for (i = 0; i < dist->nr_irqs; i++) {
if (i < VGIC_NR_PPIS)
vgic_bitmap_set_irq_val(&dist->irq_enabled,
vcpu->vcpu_id, i, 1);
@@ -1518,8 +1754,113 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
vgic_cpu->nr_lr = vgic->nr_lr;
vgic_enable(vcpu);
+}
- return 0;
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_destroy(vcpu);
+
+ vgic_free_bitmap(&dist->irq_enabled);
+ vgic_free_bitmap(&dist->irq_level);
+ vgic_free_bitmap(&dist->irq_pending);
+ vgic_free_bitmap(&dist->irq_soft_pend);
+ vgic_free_bitmap(&dist->irq_queued);
+ vgic_free_bitmap(&dist->irq_cfg);
+ vgic_free_bytemap(&dist->irq_priority);
+ if (dist->irq_spi_target) {
+ for (i = 0; i < dist->nr_cpus; i++)
+ vgic_free_bitmap(&dist->irq_spi_target[i]);
+ }
+ kfree(dist->irq_sgi_sources);
+ kfree(dist->irq_spi_cpu);
+ kfree(dist->irq_spi_target);
+ kfree(dist->irq_pending_on_cpu);
+ dist->irq_sgi_sources = NULL;
+ dist->irq_spi_cpu = NULL;
+ dist->irq_spi_target = NULL;
+ dist->irq_pending_on_cpu = NULL;
+}
+
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int nr_cpus, nr_irqs;
+ int ret, i;
+
+ if (dist->nr_cpus) /* Already allocated */
+ return 0;
+
+ nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+ if (!nr_cpus) /* No vcpus? Can't be good... */
+ return -EINVAL;
+
+ /*
+ * If nobody configured the number of interrupts, use the
+ * legacy one.
+ */
+ if (!dist->nr_irqs)
+ dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+ nr_irqs = dist->nr_irqs;
+
+ ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+ ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
+
+ if (ret)
+ goto out;
+
+ dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+ dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+ dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+ GFP_KERNEL);
+ dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+ GFP_KERNEL);
+ if (!dist->irq_sgi_sources ||
+ !dist->irq_spi_cpu ||
+ !dist->irq_spi_target ||
+ !dist->irq_pending_on_cpu) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+ nr_cpus, nr_irqs);
+
+ if (ret)
+ goto out;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+ if (ret) {
+ kvm_err("VGIC: Failed to allocate vcpu memory\n");
+ break;
+ }
+ }
+
+ for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+ vgic_set_target_reg(kvm, 0, i);
+
+out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
+
+ return ret;
}
/**
@@ -1533,6 +1874,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
*/
int kvm_vgic_init(struct kvm *kvm)
{
+ struct kvm_vcpu *vcpu;
int ret = 0, i;
if (!irqchip_in_kernel(kvm))
@@ -1550,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm)
goto out;
}
+ ret = vgic_init_maps(kvm);
+ if (ret) {
+ kvm_err("Unable to allocate maps\n");
+ goto out;
+ }
+
ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
if (ret) {
@@ -1557,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm)
goto out;
}
- for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
- vgic_set_target_reg(kvm, 0, i);
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_init(vcpu);
kvm->arch.vgic.ready = true;
out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
mutex_unlock(&kvm->lock);
return ret;
}
@@ -1613,7 +1963,7 @@ out:
return ret;
}
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
+static int vgic_ioaddr_overlap(struct kvm *kvm)
{
phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
@@ -1802,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
+ ret = vgic_init_maps(dev->kvm);
+ if (ret)
+ goto out;
+
if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
ret = -EINVAL;
goto out;
@@ -1899,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
return vgic_attr_regs_access(dev, attr, &reg, true);
}
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 val;
+ int ret = 0;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ /*
+ * We require:
+ * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+ * - at most 1024 interrupts
+ * - a multiple of 32 interrupts
+ */
+ if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+ val > VGIC_MAX_IRQS ||
+ (val & 31))
+ return -EINVAL;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+ ret = -EBUSY;
+ else
+ dev->kvm->arch.vgic.nr_irqs = val;
+
+ mutex_unlock(&dev->kvm->lock);
+
+ return ret;
+ }
}
@@ -1935,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = put_user(reg, uaddr);
break;
}
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+ break;
+ }
}
@@ -1971,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return 0;
}
return -ENXIO;
}
@@ -2029,8 +2420,8 @@ static const struct of_device_id vgic_ids[] = {
int kvm_vgic_hyp_init(void)
{
const struct of_device_id *matched_id;
- int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
- const struct vgic_params **);
+ const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+ const struct vgic_params **);
struct device_node *vgic_node;
int ret;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a1cf53ee0d28..39a02fbdb572 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1095,9 +1095,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
* If writable is set to false, the hva returned by this function is only
* allowed to be read.
*/
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+ gfn_t gfn, bool *writable)
{
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
if (!kvm_is_error_hva(hva) && writable)
@@ -1106,6 +1106,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
return hva;
}
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
static int kvm_read_hva(void *data, void __user *hva, int len)
{
return __copy_from_user(data, hva, len);