From 5dfd486c4750c9278c63fa96e6e85bdd2fb58e9d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:35 -0800 Subject: x86, kvm: Fix kvm's use of __pa() on percpu areas In short, it is illegal to call __pa() on an address holding a percpu variable. This replaces those __pa() calls with slow_virt_to_phys(). All of the cases in this patch are in boot time (or CPU hotplug time at worst) code, so the slow pagetable walking in slow_virt_to_phys() is not expected to have a performance impact. The times when this actually matters are pretty obscure (certain 32-bit NUMA systems), but it _does_ happen. It is important to keep KVM guests working on these systems because the real hardware is getting harder and harder to find. This bug manifested first by me seeing a plain hang at boot after this message: CPU 0 irqstacks, hard=f3018000 soft=f301a000 or, sometimes, it would actually make it out to the console: [ 0.000000] BUG: unable to handle kernel paging request at ffffffff I eventually traced it down to the KVM async pagefault code. This can be worked around by disabling that code either at compile-time, or on the kernel command-line. The kvm async pagefault code was injecting page faults in to the guest which the guest misinterpreted because its "reason" was not being properly sent from the host. The guest passes a physical address of an per-cpu async page fault structure via an MSR to the host. Since __pa() is broken on percpu data, the physical address it sent was bascially bogus and the host went scribbling on random data. The guest never saw the real reason for the page fault (it was injected by the host), assumed that the kernel had taken a _real_ page fault, and panic()'d. The behavior varied, though, depending on what got corrupted by the bad write. Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212435.4905663F@kernel.stglabs.ibm.com Acked-by: Rik van Riel Reviewed-by: Marcelo Tosatti Signed-off-by: H. Peter Anvin --- arch/x86/kernel/kvm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9c2bd8bd4b4..aa7e58b82b3 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -297,9 +297,9 @@ static void kvm_register_steal_time(void) memset(st, 0, sizeof(*st)); - wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); + wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", - cpu, __pa(st)); + cpu, slow_virt_to_phys(st)); } static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; @@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void) return; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = __pa(&__get_cpu_var(apf_reason)); + u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); #ifdef CONFIG_PREEMPT pa |= KVM_ASYNC_PF_SEND_ALWAYS; @@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void) /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); __get_cpu_var(kvm_apic_eoi) = 0; - pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; + pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) + | KVM_MSR_ENABLED; wrmsrl(MSR_KVM_PV_EOI_EN, pa); } -- cgit v1.2.3 From 136867f517cbc3f8a91f035677911a6b503c3323 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 5 Feb 2013 19:57:22 -0700 Subject: x86/kvm: Fix compile warning in kvm_register_steal_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compile warning in kvm_register_steal_time(): CC arch/x86/kernel/kvm.o arch/x86/kernel/kvm.c: In function ‘kvm_register_steal_time’: arch/x86/kernel/kvm.c:302:3: warning: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘phys_addr_t’ [-Wformat] Introduced via: 5dfd486c4750 x86, kvm: Fix kvm's use of __pa() on percpu areas d76565344512 x86, mm: Create slow_virt_to_phys() f3c4fbb68e93 x86, mm: Use new pagetable helpers in try_preserve_large_page() 4cbeb51b860c x86, mm: Pagetable level size/shift/mask helpers a25b9316841c x86, mm: Make DEBUG_VIRTUAL work earlier in boot Signed-off-by: Shuah Khan Acked-by: Gleb Natapov Cc: Marcelo Tosatti Cc: Dave Hansen Cc: Rik van Riel Cc: shuahkhan@gmail.com Cc: avi@redhat.com Cc: gleb@redhat.com Cc: mst@redhat.com Link: http://lkml.kernel.org/r/1360119442.8356.8.camel@lorien2 Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index aa7e58b82b3..9cec2025309 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -298,8 +298,8 @@ static void kvm_register_steal_time(void) memset(st, 0, sizeof(*st)); wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); - printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", - cpu, slow_virt_to_phys(st)); + pr_info("kvm-stealtime: cpu %d, msr %llx\n", + cpu, (unsigned long long) slow_virt_to_phys(st)); } static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; -- cgit v1.2.3