aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2015-05-15 15:23:50 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2015-05-15 15:23:50 +1000
commit2099118fd4a0cb7732d8fb253a039b08d901d3b0 (patch)
tree810961cba9f7dee714f24cd340047921b98dfa3d
parenta3529b73ac941b0c4a41e15cc18ed72a6d3e6047 (diff)
parentcba3d2765e9fcbce45fa3930c4d5f4da7652e752 (diff)
Merge remote-tracking branch 'kvm/linux-next'
-rw-r--r--Documentation/virtual/kvm/api.txt3
-rw-r--r--arch/arm/kvm/arm.c4
-rw-r--r--arch/mips/kvm/mips.c4
-rw-r--r--arch/powerpc/kvm/booke.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c2
-rw-r--r--arch/s390/include/asm/kvm_host.h4
-rw-r--r--arch/s390/kernel/entry.S2
-rw-r--r--arch/s390/kvm/intercept.c16
-rw-r--r--arch/s390/kvm/interrupt.c87
-rw-r--r--arch/s390/kvm/kvm-s390.c61
-rw-r--r--arch/s390/kvm/kvm-s390.h25
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/x86/include/asm/kvm_host.h17
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/kernel/kvmclock.c3
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/ioapic.c9
-rw-r--r--arch/x86/kvm/irq_comm.c14
-rw-r--r--arch/x86/kvm/lapic.c26
-rw-r--r--arch/x86/kvm/lapic.h8
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c32
-rw-r--r--arch/x86/kvm/vmx.c221
-rw-r--r--arch/x86/kvm/x86.c67
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--include/linux/kvm_host.h28
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--virt/kvm/kvm_main.c12
31 files changed, 477 insertions, 202 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9fa2bf8c3f6f..695544420ff2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -959,7 +959,8 @@ documentation when it pops into existence).
4.37 KVM_ENABLE_CAP
Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
-Architectures: ppc, s390
+Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
+ mips (only KVM_CAP_ENABLE_CAP), ppc, s390
Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
Parameters: struct kvm_enable_cap (in)
Returns: 0 on success; -1 on error
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d9631ecddd56..e41cb11f71b2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -553,13 +553,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Enter the guest
*/
trace_kvm_entry(*vcpu_pc(vcpu));
- kvm_guest_enter();
+ __kvm_guest_enter();
vcpu->mode = IN_GUEST_MODE;
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
- kvm_guest_exit();
+ __kvm_guest_exit();
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
* We may have taken a host interrupt in HYP mode (ie
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index bb68e8d520e8..71f345b499c8 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -393,7 +393,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_mips_deliver_interrupts(vcpu,
kvm_read_c0_guest_cause(vcpu->arch.cop0));
- kvm_guest_enter();
+ __kvm_guest_enter();
/* Disable hardware page table walking while in guest */
htw_stop();
@@ -403,7 +403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
/* Re-enable HTW before enabling interrupts */
htw_start();
- kvm_guest_exit();
+ __kvm_guest_exit();
local_irq_enable();
if (vcpu->sigset_active)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 6c1316a15a27..3872ab31c80a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1004,10 +1004,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
- local_irq_enable();
-
trace_kvm_exit(exit_nr, vcpu);
- kvm_guest_exit();
+ __kvm_guest_exit();
+
+ local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ac3ddf115f3d..8cd1f80fdc70 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -115,7 +115,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
continue;
}
- kvm_guest_enter();
+ __kvm_guest_enter();
return 1;
}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d01fc588b5c3..444c412307cb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -80,6 +80,7 @@ struct sca_block {
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
#define CPUSTAT_G 0x00000008
#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
@@ -95,7 +96,8 @@ struct kvm_s390_sie_block {
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
__u8 reserved10[16]; /* 0x0010 */
-#define PROG_BLOCK_SIE 0x00000001
+#define PROG_BLOCK_SIE (1<<0)
+#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
__u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 99b44acbfcc7..3238893c9d4f 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1005,7 +1005,7 @@ ENTRY(sie64a)
.Lsie_gmap:
lg %r14,__SF_EMPTY(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
- tm __SIE_PROG20+3(%r14),1 # last exit...
+ tm __SIE_PROG20+3(%r14),3 # last exit...
jnz .Lsie_done
LPP __SF_EMPTY(%r15) # set guest id
sie 0(%r14)
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9e3779e3e496..7365e8a46032 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -241,21 +241,6 @@ static int handle_prog(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
-static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
-{
- int rc, rc2;
-
- vcpu->stat.exit_instr_and_program++;
- rc = handle_instruction(vcpu);
- rc2 = handle_prog(vcpu);
-
- if (rc == -EOPNOTSUPP)
- vcpu->arch.sie_block->icptcode = 0x04;
- if (rc)
- return rc;
- return rc2;
-}
-
/**
* handle_external_interrupt - used for external interruption interceptions
*
@@ -355,7 +340,6 @@ static const intercept_handler_t intercept_funcs[] = {
[0x00 >> 2] = handle_noop,
[0x04 >> 2] = handle_instruction,
[0x08 >> 2] = handle_prog,
- [0x0C >> 2] = handle_instruction_and_prog,
[0x10 >> 2] = handle_noop,
[0x14 >> 2] = handle_external_interrupt,
[0x18 >> 2] = handle_noop,
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0d3deef6edff..3c683e6e7d8d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -134,6 +134,8 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
active_mask = pending_local_irqs(vcpu);
active_mask |= pending_floating_irqs(vcpu);
+ if (!active_mask)
+ return 0;
if (psw_extint_disabled(vcpu))
active_mask &= ~IRQ_PEND_EXT_MASK;
@@ -941,12 +943,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (cpu_timer_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- do {
- irqs = deliverable_irqs(vcpu);
+ while ((irqs = deliverable_irqs(vcpu)) && !rc) {
/* bits are in the order of interrupt priority */
irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
- if (irq_type == IRQ_PEND_COUNT)
- break;
if (is_ioirq(irq_type)) {
rc = __deliver_io(vcpu, irq_type);
} else {
@@ -958,9 +957,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
rc = func(vcpu);
}
- if (rc)
- break;
- } while (!rc);
+ }
set_intercept_indicators(vcpu);
@@ -1061,7 +1058,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (sclp.has_sigpif)
return __inject_extcall_sigpif(vcpu, src_id);
- if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+ if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
*extcall = irq->u.extcall;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1340,12 +1337,54 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
return 0;
}
-static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+/*
+ * Find a destination VCPU for a floating irq and kick it.
+ */
+static void __floating_irq_kick(struct kvm *kvm, u64 type)
{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu;
+ int sigcpu, online_vcpus, nr_tries = 0;
+
+ online_vcpus = atomic_read(&kvm->online_vcpus);
+ if (!online_vcpus)
+ return;
+
+ /* find idle VCPUs first, then round robin */
+ sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+ if (sigcpu == online_vcpus) {
+ do {
+ sigcpu = fi->next_rr_cpu;
+ fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+ /* avoid endless loops if all vcpus are stopped */
+ if (nr_tries++ >= online_vcpus)
+ return;
+ } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+ }
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+
+ /* make the VCPU drop out of the SIE, or wake it up if sleeping */
+ li = &dst_vcpu->arch.local_int;
+ spin_lock(&li->lock);
+ switch (type) {
+ case KVM_S390_MCHK:
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+ break;
+ default:
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ break;
+ }
+ spin_unlock(&li->lock);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
struct kvm_s390_float_interrupt *fi;
- struct kvm_vcpu *dst_vcpu = NULL;
- int sigcpu;
u64 type = READ_ONCE(inti->type);
int rc;
@@ -1373,32 +1412,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
if (rc)
return rc;
- sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
- if (sigcpu == KVM_MAX_VCPUS) {
- do {
- sigcpu = fi->next_rr_cpu++;
- if (sigcpu == KVM_MAX_VCPUS)
- sigcpu = fi->next_rr_cpu = 0;
- } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
- }
- dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
- switch (type) {
- case KVM_S390_MCHK:
- atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
- break;
- case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
- break;
- default:
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- break;
- }
- spin_unlock(&li->lock);
- kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+ __floating_irq_kick(kvm, type);
return 0;
-
}
int kvm_s390_inject_vm(struct kvm *kvm,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c4e81b26c1b0..166d9cd32c9d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -110,7 +110,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
0xffe6fffbfcfdfc40UL,
- 0x005c800000000000UL,
+ 0x005e800000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -454,10 +454,10 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
mutex_lock(&kvm->lock);
kvm->arch.epoch = gtod - host_tod;
- kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
+ kvm_s390_vcpu_block_all(kvm);
+ kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
- exit_sie(cur_vcpu);
- }
+ kvm_s390_vcpu_unblock_all(kvm);
mutex_unlock(&kvm->lock);
return 0;
}
@@ -1311,8 +1311,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
- CPUSTAT_STOPPED |
- CPUSTAT_GED);
+ CPUSTAT_STOPPED);
+
+ if (test_kvm_facility(vcpu->kvm, 78))
+ atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+ else if (test_kvm_facility(vcpu->kvm, 8))
+ atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+
kvm_s390_vcpu_setup_model(vcpu);
vcpu->arch.sie_block->ecb = 6;
@@ -1409,16 +1414,26 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
-void s390_vcpu_block(struct kvm_vcpu *vcpu)
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
{
atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
}
-void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
{
atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
}
+static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+}
+
+static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+}
+
/*
* Kick a guest cpu out of SIE and wait until SIE is not running.
* If the CPU is not running (e.g. waiting as idle) the function will
@@ -1430,10 +1445,11 @@ void exit_sie(struct kvm_vcpu *vcpu)
cpu_relax();
}
-/* Kick a guest cpu out of SIE and prevent SIE-reentry */
-void exit_sie_sync(struct kvm_vcpu *vcpu)
+/* Kick a guest cpu out of SIE to process a request synchronously */
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
- s390_vcpu_block(vcpu);
+ kvm_make_request(req, vcpu);
+ kvm_s390_vcpu_request(vcpu);
exit_sie(vcpu);
}
@@ -1447,8 +1463,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
/* match against both prefix pages */
if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
}
}
}
@@ -1720,8 +1735,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
+ if (!vcpu->requests)
+ return 0;
retry:
- s390_vcpu_unblock(vcpu);
+ kvm_s390_vcpu_request_handled(vcpu);
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
* guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -1993,12 +2010,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
*/
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
+ local_irq_disable();
+ __kvm_guest_enter();
+ local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
- kvm_guest_exit();
+ local_irq_disable();
+ __kvm_guest_exit();
+ local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = vcpu_post_run(vcpu, exit_reason);
@@ -2206,8 +2225,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
- kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
}
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
@@ -2223,8 +2241,7 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
- kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ca108b90ae56..c5704786e473 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -211,10 +211,10 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
-void s390_vcpu_block(struct kvm_vcpu *vcpu);
-void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
-void exit_sie_sync(struct kvm_vcpu *vcpu);
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
/* is cmma enabled */
@@ -228,6 +228,25 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info);
+static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ WARN_ON(!mutex_is_locked(&kvm->lock));
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_block(vcpu);
+}
+
+static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_unblock(vcpu);
+}
+
/**
* kvm_s390_inject_prog_cond - conditionally inject a program check
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d22d8ee1ff9d..ad4242245771 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -698,10 +698,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
case 0x00001000:
end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
break;
- /* We dont support EDAT2
case 0x00002000:
+ /* only support 2G frame size if EDAT2 is available and we are
+ not in 24-bit addressing mode */
+ if (!test_kvm_facility(vcpu->kvm, 78) ||
+ psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
- break;*/
+ break;
default:
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e962e3..8b661d1946b5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -635,6 +635,8 @@ struct kvm_arch {
#endif
bool boot_vcpu_runs_old_kvmclock;
+
+ u64 disabled_quirks;
};
struct kvm_vm_stat {
@@ -687,12 +689,13 @@ struct msr_data {
struct kvm_lapic_irq {
u32 vector;
- u32 delivery_mode;
- u32 dest_mode;
- u32 level;
- u32 trig_mode;
+ u16 delivery_mode;
+ u16 dest_mode;
+ bool level;
+ u16 trig_mode;
u32 shorthand;
u32 dest_id;
+ bool msi_redir_hint;
};
struct kvm_x86_ops {
@@ -709,7 +712,7 @@ struct kvm_x86_ops {
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
- void (*vcpu_reset)(struct kvm_vcpu *vcpu);
+ void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
@@ -999,7 +1002,7 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
-int fx_init(struct kvm_vcpu *vcpu);
+int fx_init(struct kvm_vcpu *vcpu, bool init_event);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
@@ -1143,7 +1146,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
-void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address);
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..628954ceede1 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -86,7 +86,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
offset = pvclock_get_nsec_offset(src);
ret = src->system_time + offset;
ret_flags = src->flags;
- rdtsc_barrier();
*cycles = ret;
*flags = ret_flags;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d7dcef58aefa..2fec75e4b1e1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -345,4 +345,7 @@ struct kvm_xcrs {
struct kvm_sync_regs {
};
+#define KVM_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_QUIRK_CD_NW_CLEARED (1 << 1)
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 42caaef897c8..4e03921761c4 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -24,6 +24,7 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/memblock.h>
+#include <linux/sched.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -265,6 +266,8 @@ void __init kvmclock_init(void)
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+
+ set_sched_clock_stable();
}
int __init kvm_setup_vsyscall_timeinfo(void)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 630bcb0d7a04..5839fc56cb3e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3840,7 +3840,7 @@ static const struct opcode group5[] = {
F(DstMem | SrcNone | Lock, em_inc),
F(DstMem | SrcNone | Lock, em_dec),
I(SrcMem | NearBranch, em_call_near_abs),
- I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
+ I(SrcMemFAddr | ImplicitOps, em_call_far),
I(SrcMem | NearBranch, em_jmp_abs),
I(SrcMemFAddr | ImplicitOps, em_jmp_far),
I(SrcMem | Stack, em_push), D(Undefined),
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 28146f03c514..856f79105bb5 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -349,6 +349,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
irqe.delivery_mode = entry->fields.delivery_mode << 8;
irqe.level = 1;
irqe.shorthand = 0;
+ irqe.msi_redir_hint = false;
if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
ioapic->irr_delivered |= 1 << irq;
@@ -637,11 +638,9 @@ void kvm_ioapic_destroy(struct kvm *kvm)
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
cancel_delayed_work_sync(&ioapic->eoi_inject);
- if (ioapic) {
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
- kvm->arch.vioapic = NULL;
- kfree(ioapic);
- }
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ kvm->arch.vioapic = NULL;
+ kfree(ioapic);
}
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 72298b3ac025..9efff9e5b58c 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -31,6 +31,8 @@
#include "ioapic.h"
+#include "lapic.h"
+
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
@@ -48,11 +50,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
line_status);
}
-inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
-{
- return irq->delivery_mode == APIC_DM_LOWEST;
-}
-
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map)
{
@@ -60,7 +57,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_vcpu *vcpu, *lowest = NULL;
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
- kvm_is_dm_lowest_prio(irq)) {
+ kvm_lowest_prio_delivery(irq)) {
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
irq->delivery_mode = APIC_DM_FIXED;
}
@@ -76,7 +73,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
irq->dest_id, irq->dest_mode))
continue;
- if (!kvm_is_dm_lowest_prio(irq)) {
+ if (!kvm_lowest_prio_delivery(irq)) {
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
@@ -106,9 +103,10 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
irq->delivery_mode = e->msi.data & 0x700;
+ irq->msi_redir_hint = ((e->msi.address_lo
+ & MSI_ADDR_REDIRECTION_LOWPRI) > 0);
irq->level = 1;
irq->shorthand = 0;
- /* TODO Deal with RH bit of MSI message address */
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 629af0f1c5c4..dc5b57bb1f76 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -728,7 +728,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
dst = map->logical_map[cid];
- if (irq->delivery_mode == APIC_DM_LOWEST) {
+ if (kvm_lowest_prio_delivery(irq)) {
int l = -1;
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
@@ -914,9 +914,10 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.vector = icr_low & APIC_VECTOR_MASK;
irq.delivery_mode = icr_low & APIC_MODE_MASK;
irq.dest_mode = icr_low & APIC_DEST_MASK;
- irq.level = icr_low & APIC_INT_ASSERT;
+ irq.level = (icr_low & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
irq.shorthand = icr_low & APIC_SHORT_MASK;
+ irq.msi_redir_hint = false;
if (apic_x2apic_mode(apic))
irq.dest_id = icr_high;
else
@@ -926,10 +927,11 @@ static void apic_send_ipi(struct kvm_lapic *apic)
apic_debug("icr_high 0x%x, icr_low 0x%x, "
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
- "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
+ "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
+ "msi_redir_hint 0x%x\n",
icr_high, icr_low, irq.shorthand, irq.dest_id,
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
- irq.vector);
+ irq.vector, irq.msi_redir_hint);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
@@ -1557,7 +1559,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
}
-void kvm_lapic_reset(struct kvm_vcpu *vcpu)
+void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_lapic *apic;
int i;
@@ -1571,14 +1573,16 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
- kvm_apic_set_id(apic, vcpu->vcpu_id);
+ if (!init_event)
+ kvm_apic_set_id(apic, vcpu->vcpu_id);
kvm_apic_set_version(apic->vcpu);
for (i = 0; i < APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic->lapic_timer.timer_mode = 0;
- apic_set_reg(apic, APIC_LVT0,
- SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
+ if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+ apic_set_reg(apic, APIC_LVT0,
+ SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_set_reg(apic, APIC_DFR, 0xffffffffU);
apic_set_spiv(apic, 0xff);
@@ -1712,7 +1716,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
- kvm_lapic_reset(vcpu);
+ kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
return 0;
@@ -2046,8 +2050,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
pe = xchg(&apic->pending_events, 0);
if (test_bit(KVM_APIC_INIT, &pe)) {
- kvm_lapic_reset(vcpu);
- kvm_vcpu_reset(vcpu);
+ kvm_lapic_reset(vcpu, true);
+ kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 9d28383fc1e7..71b150cae5f9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
-void kvm_lapic_reset(struct kvm_vcpu *vcpu);
+void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
@@ -153,6 +153,12 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
return vcpu->arch.apic->pending_events;
}
+static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
+{
+ return (irq->delivery_mode == APIC_DM_LOWEST ||
+ irq->msi_redir_hint);
+}
+
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d43867c33bc4..209fe1477465 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4238,7 +4238,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
- mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
+ mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index c7d65637c851..06eb2fc1bab8 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -166,6 +166,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+ WARN_ON(pfec & PFERR_RSVD_MASK);
+
return (mmu->permissions[index] >> pte_access) & 1;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fd49c867b25a..6e6d115fe9b5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID))
return r;
+
+ /*
+ * page fault with PFEC.RSVD = 1 is caused by shadow
+ * page fault, should not be used to walk guest page
+ * table.
+ */
+ error_code &= ~PFERR_RSVD_MASK;
};
r = mmu_topup_memory_caches(vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8650f6..8954d7a07703 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1082,7 +1082,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return target_tsc - tsc;
}
-static void init_vmcb(struct vcpu_svm *svm)
+static void init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
@@ -1153,17 +1153,17 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- svm_set_efer(&svm->vcpu, 0);
+ if (!init_event)
+ svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
kvm_set_rflags(&svm->vcpu, 2);
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
/*
- * This is the guest-visible cr0 value.
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
+ * It also updates the guest-visible cr0 value.
*/
- svm->vcpu.arch.cr0 = 0;
(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
save->cr4 = X86_CR4_PAE;
@@ -1176,7 +1176,7 @@ static void init_vmcb(struct vcpu_svm *svm)
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
- save->g_pat = 0x0007040600070406ULL;
+ save->g_pat = svm->vcpu.arch.pat;
save->cr3 = 0;
save->cr4 = 0;
}
@@ -1195,13 +1195,19 @@ static void init_vmcb(struct vcpu_svm *svm)
enable_gif(svm);
}
-static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
+static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 dummy;
u32 eax = 1;
- init_vmcb(svm);
+ if (!init_event) {
+ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
+ if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
+ svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ }
+ init_vmcb(svm, init_event);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
@@ -1257,12 +1263,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
- init_vmcb(svm);
-
- svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
- MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
- svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ init_vmcb(svm, false);
svm_init_osvw(&svm->vcpu);
@@ -1575,7 +1576,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* does not do it - this results in some delay at
* reboot
*/
- cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+ if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
+ cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
update_cr0_intercept(svm);
@@ -1883,7 +1885,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
* so reinitialize it.
*/
clear_page(svm->vmcb);
- init_vmcb(svm);
+ init_vmcb(svm, false);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b61687bd79..5e8d41bccc26 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2170,8 +2170,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_nested;
- else if (irqchip_in_kernel(vcpu->kvm) &&
- apic_x2apic_mode(vcpu->arch.apic)) {
+ else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
@@ -4667,16 +4666,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- u32 msr_low, msr_high;
- u64 host_pat;
- rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
- host_pat = msr_low | ((u64) msr_high << 32);
- /* Write the default value follow host pat */
- vmcs_write64(GUEST_IA32_PAT, host_pat);
- /* Keep arch.pat sync with GUEST_IA32_PAT */
- vmx->vcpu.arch.pat = host_pat;
- }
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+ vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
u32 index = vmx_msr_index[i];
@@ -4708,22 +4699,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
return 0;
}
-static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct msr_data apic_base_msr;
+ u64 cr0;
vmx->rmode.vm86_active = 0;
vmx->soft_vnmi_blocked = 0;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
- kvm_set_cr8(&vmx->vcpu, 0);
- apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
- apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
- apic_base_msr.host_initiated = true;
- kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
+ kvm_set_cr8(vcpu, 0);
+
+ if (!init_event) {
+ apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
+ if (kvm_vcpu_is_reset_bsp(vcpu))
+ apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+ apic_base_msr.host_initiated = true;
+ kvm_set_apic_base(vcpu, &apic_base_msr);
+ }
vmx_segment_cache_clear(vmx);
@@ -4747,9 +4743,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
- vmcs_write32(GUEST_SYSENTER_CS, 0);
- vmcs_writel(GUEST_SYSENTER_ESP, 0);
- vmcs_writel(GUEST_SYSENTER_EIP, 0);
+ if (!init_event) {
+ vmcs_write32(GUEST_SYSENTER_CS, 0);
+ vmcs_writel(GUEST_SYSENTER_ESP, 0);
+ vmcs_writel(GUEST_SYSENTER_EIP, 0);
+ vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ }
vmcs_writel(GUEST_RFLAGS, 0x02);
kvm_rip_write(vcpu, 0xfff0);
@@ -4764,18 +4763,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
- /* Special registers */
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
-
setup_msrs(vmx);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
- if (cpu_has_vmx_tpr_shadow()) {
+ if (cpu_has_vmx_tpr_shadow() && !init_event) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
- if (vm_need_tpr_shadow(vmx->vcpu.kvm))
+ if (vm_need_tpr_shadow(vcpu->kvm))
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
- __pa(vmx->vcpu.arch.apic->regs));
+ __pa(vcpu->arch.apic->regs));
vmcs_write32(TPR_THRESHOLD, 0);
}
@@ -4787,12 +4783,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
if (vmx->vpid != 0)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
- vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
- vmx_set_cr4(&vmx->vcpu, 0);
- vmx_set_efer(&vmx->vcpu, 0);
- vmx_fpu_activate(&vmx->vcpu);
- update_exception_bitmap(&vmx->vcpu);
+ cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+ vmx_set_cr0(vcpu, cr0); /* enter rmode */
+ vmx->vcpu.arch.cr0 = cr0;
+ vmx_set_cr4(vcpu, 0);
+ if (!init_event)
+ vmx_set_efer(vcpu, 0);
+ vmx_fpu_activate(vcpu);
+ update_exception_bitmap(vcpu);
vpid_sync_context(vmx);
}
@@ -7691,6 +7689,158 @@ static void kvm_flush_pml_buffers(struct kvm *kvm)
kvm_vcpu_kick(vcpu);
}
+static void vmx_dump_sel(char *name, uint32_t sel)
+{
+ pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
+ name, vmcs_read32(sel),
+ vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
+ vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
+ vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
+}
+
+static void vmx_dump_dtsel(char *name, uint32_t limit)
+{
+ pr_err("%s limit=0x%08x, base=0x%016lx\n",
+ name, vmcs_read32(limit),
+ vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
+}
+
+static void dump_vmcs(void)
+{
+ u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
+ u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
+ u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+ u32 secondary_exec_control = 0;
+ unsigned long cr4 = vmcs_readl(GUEST_CR4);
+ u64 efer = vmcs_readl(GUEST_IA32_EFER);
+ int i, n;
+
+ if (cpu_has_secondary_exec_ctrls())
+ secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+
+ pr_err("*** Guest State ***\n");
+ pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
+ vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
+ vmcs_readl(CR0_GUEST_HOST_MASK));
+ pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
+ cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
+ pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
+ if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
+ (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
+ {
+ pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n",
+ vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
+ pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n",
+ vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
+ }
+ pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
+ vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
+ pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
+ vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
+ pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
+ vmcs_readl(GUEST_SYSENTER_ESP),
+ vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
+ vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
+ vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
+ vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
+ vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
+ vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
+ vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
+ vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
+ vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
+ vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
+ vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
+ if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
+ (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
+ pr_err("EFER = 0x%016llx PAT = 0x%016lx\n",
+ efer, vmcs_readl(GUEST_IA32_PAT));
+ pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n",
+ vmcs_readl(GUEST_IA32_DEBUGCTL),
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
+ if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+ pr_err("PerfGlobCtl = 0x%016lx\n",
+ vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
+ if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
+ pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
+ pr_err("Interruptibility = %08x ActivityState = %08x\n",
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
+ vmcs_read32(GUEST_ACTIVITY_STATE));
+ if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
+ pr_err("InterruptStatus = %04x\n",
+ vmcs_read16(GUEST_INTR_STATUS));
+
+ pr_err("*** Host State ***\n");
+ pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
+ vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
+ pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
+ vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
+ vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
+ vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
+ vmcs_read16(HOST_TR_SELECTOR));
+ pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
+ vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
+ vmcs_readl(HOST_TR_BASE));
+ pr_err("GDTBase=%016lx IDTBase=%016lx\n",
+ vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
+ pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
+ vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
+ vmcs_readl(HOST_CR4));
+ pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
+ vmcs_readl(HOST_IA32_SYSENTER_ESP),
+ vmcs_read32(HOST_IA32_SYSENTER_CS),
+ vmcs_readl(HOST_IA32_SYSENTER_EIP));
+ if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
+ pr_err("EFER = 0x%016lx PAT = 0x%016lx\n",
+ vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
+ if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ pr_err("PerfGlobCtl = 0x%016lx\n",
+ vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ pr_err("*** Control State ***\n");
+ pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
+ pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
+ pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
+ pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
+ vmcs_read32(EXCEPTION_BITMAP),
+ vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
+ vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
+ pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
+ vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
+ vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
+ vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
+ pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
+ vmcs_read32(VM_EXIT_INTR_INFO),
+ vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+ vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
+ pr_err(" reason=%08x qualification=%016lx\n",
+ vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
+ pr_err("IDTVectoring: info=%08x errcode=%08x\n",
+ vmcs_read32(IDT_VECTORING_INFO_FIELD),
+ vmcs_read32(IDT_VECTORING_ERROR_CODE));
+ pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
+ if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
+ pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+ if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
+ pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
+ if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
+ pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
+ n = vmcs_read32(CR3_TARGET_COUNT);
+ for (i = 0; i + 1 < n; i += 4)
+ pr_err("CR3 target%u=%016lx target%u=%016lx\n",
+ i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
+ i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
+ if (i < n)
+ pr_err("CR3 target%u=%016lx\n",
+ i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
+ if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+ pr_err("PLE Gap=%08x Window=%08x\n",
+ vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
+ if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
+ pr_err("Virtual processor ID = 0x%04x\n",
+ vmcs_read16(VIRTUAL_PROCESSOR_ID));
+}
+
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -7723,6 +7873,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+ dump_vmcs();
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= exit_reason;
@@ -8924,7 +9075,7 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
struct vmx_msr_entry *e)
{
/* x2APIC MSR accesses are not allowed */
- if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
+ if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
return -EINVAL;
if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
e->index == MSR_IA32_UCODE_REV)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd03e29..cdccbe1749a5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2800,6 +2800,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_TIME:
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
case KVM_CAP_TSC_DEADLINE_TIMER:
+ case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_DISABLE_QUIRKS:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -3847,6 +3849,26 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
return 0;
}
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_DISABLE_QUIRKS:
+ kvm->arch.disabled_quirks = cap->args[0];
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4099,7 +4121,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ goto out;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
default:
r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
}
@@ -5954,6 +5984,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
lapic_irq.shorthand = 0;
lapic_irq.dest_mode = 0;
lapic_irq.dest_id = apicid;
+ lapic_irq.msi_redir_hint = false;
lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
@@ -6347,7 +6378,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit)
smp_send_reschedule(vcpu->cpu);
- kvm_guest_enter();
+ __kvm_guest_enter();
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -7003,7 +7034,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
-int fx_init(struct kvm_vcpu *vcpu)
+int fx_init(struct kvm_vcpu *vcpu, bool init_event)
{
int err;
@@ -7011,7 +7042,9 @@ int fx_init(struct kvm_vcpu *vcpu)
if (err)
return err;
- fpu_finit(&vcpu->arch.guest_fpu);
+ if (!init_event)
+ fpu_finit(&vcpu->arch.guest_fpu);
+
if (cpu_has_xsaves)
vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
@@ -7053,14 +7086,25 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
kvm_put_guest_xcr0(vcpu);
- if (!vcpu->guest_fpu_loaded)
+ if (!vcpu->guest_fpu_loaded) {
+ vcpu->fpu_counter = 0;
return;
+ }
vcpu->guest_fpu_loaded = 0;
fpu_save_init(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+ /*
+ * If using eager FPU mode, or if the guest is a frequent user
+ * of the FPU, just leave the FPU active for next time.
+ * Every 255 times fpu_counter rolls over to 0; a guest that uses
+ * the FPU in bursts will revert to loading it on demand.
+ */
+ if (!use_eager_fpu()) {
+ if (++vcpu->fpu_counter < 5)
+ kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+ }
trace_kvm_fpu(0);
}
@@ -7091,7 +7135,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
if (r)
return r;
- kvm_vcpu_reset(vcpu);
+ kvm_vcpu_reset(vcpu, false);
kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
@@ -7129,7 +7173,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_free(vcpu);
}
-void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
+void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
@@ -7156,13 +7200,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- kvm_pmu_reset(vcpu);
+ if (!init_event)
+ kvm_pmu_reset(vcpu);
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
- kvm_x86_ops->vcpu_reset(vcpu);
+ kvm_x86_ops->vcpu_reset(vcpu, init_event);
}
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@ -7351,7 +7396,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
goto fail_free_mce_banks;
}
- r = fx_init(vcpu);
+ r = fx_init(vcpu, false);
if (r)
goto fail_free_wbinvd_dirty_mask;
@@ -7363,6 +7408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+ vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
+
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f5fef1868096..01a1d011e073 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -4,6 +4,8 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
+#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
+
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.exception.pending = false;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad45054309a0..b7a08cd6f4a8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -230,6 +230,7 @@ struct kvm_vcpu {
int fpu_active;
int guest_fpu_loaded, guest_xcr0_loaded;
+ unsigned char fpu_counter;
wait_queue_head_t wq;
struct pid *pid;
int sigset_active;
@@ -762,16 +763,10 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
}
#endif
-static inline void kvm_guest_enter(void)
+/* must be called with irqs disabled */
+static inline void __kvm_guest_enter(void)
{
- unsigned long flags;
-
- BUG_ON(preemptible());
-
- local_irq_save(flags);
guest_enter();
- local_irq_restore(flags);
-
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspace from rcu point of view. In
@@ -783,12 +778,27 @@ static inline void kvm_guest_enter(void)
rcu_virt_note_context_switch(smp_processor_id());
}
+/* must be called with irqs disabled */
+static inline void __kvm_guest_exit(void)
+{
+ guest_exit();
+}
+
+static inline void kvm_guest_enter(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __kvm_guest_enter();
+ local_irq_restore(flags);
+}
+
static inline void kvm_guest_exit(void)
{
unsigned long flags;
local_irq_save(flags);
- guest_exit();
+ __kvm_guest_exit();
local_irq_restore(flags);
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b60056776d1..75bd9f7fd846 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -814,6 +814,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_INJECT_IRQ 113
#define KVM_CAP_S390_IRQ_STATE 114
#define KVM_CAP_PPC_HWRNG 115
+#define KVM_CAP_DISABLE_QUIRKS 116
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 90977418aeb6..f202c4035134 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1590,15 +1590,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
int offset, int len)
{
int r;
+ struct kvm_memory_slot *memslot;
unsigned long addr;
- addr = gfn_to_hva(kvm, gfn);
+ memslot = gfn_to_memslot(kvm, gfn);
+ addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = __copy_to_user((void __user *)addr + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty(kvm, gfn);
+ mark_page_dirty_in_slot(kvm, memslot, gfn);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
@@ -2882,18 +2884,12 @@ static int hardware_enable_all(void)
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
void *v)
{
- int cpu = (long)v;
-
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
- pr_info("kvm: disabling virtualization on CPU%d\n",
- cpu);
hardware_disable();
break;
case CPU_STARTING:
- pr_info("kvm: enabling virtualization on CPU%d\n",
- cpu);
hardware_enable();
break;
}