aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/eboot.c8
-rw-r--r--arch/x86/include/asm/preempt.h4
-rw-r--r--arch/x86/kernel/alternative.c5
-rw-r--r--arch/x86/kernel/apic/apic.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c5
-rw-r--r--arch/x86/kernel/crash.c7
-rw-r--r--arch/x86/kernel/entry_64.S16
-rw-r--r--arch/x86/kernel/paravirt.c16
-rw-r--r--arch/x86/kernel/process_64.c52
-rw-r--r--arch/x86/kernel/tsc.c17
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/platform/efi/efi.c67
-rw-r--r--arch/x86/xen/enlighten.c23
15 files changed, 199 insertions, 34 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 0cdc154a22b5..4c3f76b425c1 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -667,6 +667,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u32 h = handles[i];
+ u32 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop32);
@@ -678,7 +679,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query32(gop32, &info, &size, &fb_base);
+ status = __gop_query32(gop32, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -692,6 +693,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
@@ -770,6 +772,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u64 h = handles[i];
+ u32 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop64);
@@ -781,7 +784,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query64(gop64, &info, &size, &fb_base);
+ status = __gop_query64(gop64, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -795,6 +798,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 8f3271842533..67b6cd00a44f 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(!raw_cpu_read_4(__preempt_count));
+ return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPT
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index aef653193160..d1918a8c4393 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -325,10 +325,15 @@ done:
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
+ unsigned long flags;
+
if (instr[0] != 0x90)
return;
+ local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+ sync_core();
+ local_irq_restore(flags);
DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index cde732c1b495..307a49828826 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
apic_write(APIC_LVTT, lvtt_value);
if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
+ /*
+ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
+ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
+ * According to Intel, MFENCE can do the serialization here.
+ */
+ asm volatile("mfence" : : : "memory");
+
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2813ea0f142e..22212615a137 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2098,9 +2098,12 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = cpuc->event_constraint[idx];
+ struct event_constraint *c1 = NULL;
struct event_constraint *c2;
+ if (idx >= 0) /* fake does < 0 */
+ c1 = cpuc->event_constraint[idx];
+
/*
* first time only
* - static constraint: no change across incremental scheduling calls
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c76d3e37c6e1..403ace539b73 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -184,10 +184,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(unsigned long start_pfn,
- unsigned long nr_pfn, void *arg)
+static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
{
- int *nr_ranges = arg;
+ unsigned int *nr_ranges = arg;
(*nr_ranges)++;
return 0;
@@ -213,7 +212,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
ced->image = image;
- walk_system_ram_range(0, -1, &nr_ranges,
+ walk_system_ram_res(0, -1, &nr_ranges,
get_nr_ram_ranges_callback);
ced->max_nr_ranges = nr_ranges;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4bd6c197563d..6c9cb6073832 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1393,7 +1393,18 @@ END(error_exit)
/* Runs on exception stack */
ENTRY(nmi)
INTR_FRAME
+ /*
+ * Fix up the exception frame if we're on Xen.
+ * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
+ * one value to the stack on native, so it may clobber the rdx
+ * scratch slot, but it won't clobber any of the important
+ * slots past it.
+ *
+ * Xen is a different story, because the Xen frame itself overlaps
+ * the "NMI executing" variable.
+ */
PARAVIRT_ADJUST_EXCEPTION_FRAME
+
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
@@ -1445,9 +1456,12 @@ ENTRY(nmi)
* we don't want to enable interrupts, because then we'll end
* up in an awkward situation in which IRQs are on but NMIs
* are off.
+ *
+ * We also must not push anything to the stack before switching
+ * stacks lest we corrupt the "NMI executing" variable.
*/
- SWAPGS
+ SWAPGS_UNSAFE_STACK
cld
movq %rsp, %rdx
movq PER_CPU_VAR(kernel_stack), %rsp
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c614dd492f5f..1f316f066c49 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -41,10 +41,18 @@
#include <asm/timer.h>
#include <asm/special_insns.h>
-/* nop stub */
-void _paravirt_nop(void)
-{
-}
+/*
+ * nop stub, which must not clobber anything *including the stack* to
+ * avoid confusing the entry prologues.
+ */
+extern void _paravirt_nop(void);
+asm (".pushsection .entry.text, \"ax\"\n"
+ ".global _paravirt_nop\n"
+ "_paravirt_nop:\n\t"
+ "ret\n\t"
+ ".size _paravirt_nop, . - _paravirt_nop\n\t"
+ ".type _paravirt_nop, @function\n\t"
+ ".popsection");
/* identity function, which can be inlined */
u32 _paravirt_ident_32(u32 x)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5e0bf57d9944..58e02d938218 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -499,27 +499,59 @@ void set_personality_ia32(bool x32)
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
+/*
+ * Called from fs/proc with a reference on @p to find the function
+ * which called into schedule(). This needs to be done carefully
+ * because the task might wake up and we might look at a stack
+ * changing under us.
+ */
unsigned long get_wchan(struct task_struct *p)
{
- unsigned long stack;
- u64 fp, ip;
+ unsigned long start, bottom, top, sp, fp, ip;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
- stack = (unsigned long)task_stack_page(p);
- if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
+
+ start = (unsigned long)task_stack_page(p);
+ if (!start)
+ return 0;
+
+ /*
+ * Layout of the stack page:
+ *
+ * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
+ * PADDING
+ * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
+ * stack
+ * ----------- bottom = start + sizeof(thread_info)
+ * thread_info
+ * ----------- start
+ *
+ * The tasks stack pointer points at the location where the
+ * framepointer is stored. The data on the stack is:
+ * ... IP FP ... IP FP
+ *
+ * We need to read FP and IP, so we need to adjust the upper
+ * bound by another unsigned long.
+ */
+ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
+ top -= 2 * sizeof(unsigned long);
+ bottom = start + sizeof(struct thread_info);
+
+ sp = READ_ONCE(p->thread.sp);
+ if (sp < bottom || sp > top)
return 0;
- fp = *(u64 *)(p->thread.sp);
+
+ fp = READ_ONCE(*(unsigned long *)sp);
do {
- if (fp < (unsigned long)stack ||
- fp >= (unsigned long)stack+THREAD_SIZE)
+ if (fp < bottom || fp > top)
return 0;
- ip = *(u64 *)(fp+8);
+ ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long)));
if (!in_sched_functions(ip))
return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
+ fp = READ_ONCE(*(unsigned long *)fp);
+ } while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 505449700e0c..21187ebee7d0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -21,6 +21,7 @@
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#include <asm/geode.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1004,15 +1005,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
static void __init check_system_tsc_reliable(void)
{
-#ifdef CONFIG_MGEODE_LX
- /* RTSC counts during suspend */
+#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
+ if (is_geode_lx()) {
+ /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
- unsigned long res_low, res_high;
+ unsigned long res_low, res_high;
- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
- /* Geode_LX - the OLPC CPU has a very reliable TSC */
- if (res_low & RTSC_SUSP)
- tsc_clocksource_reliable = 1;
+ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ /* Geode_LX - the OLPC CPU has a very reliable TSC */
+ if (res_low & RTSC_SUSP)
+ tsc_clocksource_reliable = 1;
+ }
#endif
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
tsc_clocksource_reliable = 1;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4911bf19122b..7858cd9acfe4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -512,7 +512,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->vmcb->control.next_rip != 0) {
- WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
+ WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2d73807f0d31..bc3041e1abbc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6144,6 +6144,8 @@ static __init int hardware_setup(void)
memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
+ set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+
if (enable_apicv) {
for (msr = 0x800; msr <= 0x8ff; msr++)
vmx_disable_intercept_msr_read_x2apic(msr);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3fba623e3ba5..f9977a7a9444 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1132,7 +1132,7 @@ void mark_rodata_ro(void)
* has been zapped already via cleanup_highmem().
*/
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
- set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 841ea05e1b02..477384985ac9 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -679,6 +679,70 @@ out:
}
/*
+ * Iterate the EFI memory map in reverse order because the regions
+ * will be mapped top-down. The end result is the same as if we had
+ * mapped things forward, but doesn't require us to change the
+ * existing implementation of efi_map_region().
+ */
+static inline void *efi_map_next_entry_reverse(void *entry)
+{
+ /* Initial call */
+ if (!entry)
+ return memmap.map_end - memmap.desc_size;
+
+ entry -= memmap.desc_size;
+ if (entry < memmap.map)
+ return NULL;
+
+ return entry;
+}
+
+/*
+ * efi_map_next_entry - Return the next EFI memory map descriptor
+ * @entry: Previous EFI memory map descriptor
+ *
+ * This is a helper function to iterate over the EFI memory map, which
+ * we do in different orders depending on the current configuration.
+ *
+ * To begin traversing the memory map @entry must be %NULL.
+ *
+ * Returns %NULL when we reach the end of the memory map.
+ */
+static void *efi_map_next_entry(void *entry)
+{
+ if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
+ /*
+ * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
+ * config table feature requires us to map all entries
+ * in the same order as they appear in the EFI memory
+ * map. That is to say, entry N must have a lower
+ * virtual address than entry N+1. This is because the
+ * firmware toolchain leaves relative references in
+ * the code/data sections, which are split and become
+ * separate EFI memory regions. Mapping things
+ * out-of-order leads to the firmware accessing
+ * unmapped addresses.
+ *
+ * Since we need to map things this way whether or not
+ * the kernel actually makes use of
+ * EFI_PROPERTIES_TABLE, let's just switch to this
+ * scheme by default for 64-bit.
+ */
+ return efi_map_next_entry_reverse(entry);
+ }
+
+ /* Initial call */
+ if (!entry)
+ return memmap.map;
+
+ entry += memmap.desc_size;
+ if (entry >= memmap.map_end)
+ return NULL;
+
+ return entry;
+}
+
+/*
* Map the efi memory ranges of the runtime services and update new_mmap with
* virtual addresses.
*/
@@ -688,7 +752,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
unsigned long left = 0;
efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ p = NULL;
+ while ((p = efi_map_next_entry(p))) {
md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a671e837228d..a10ed8915bf4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,10 @@
#include <linux/memblock.h>
#include <linux/edd.h>
+#ifdef CONFIG_KEXEC
+#include <linux/kexec.h>
+#endif
+
#include <xen/xen.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
@@ -1798,6 +1802,21 @@ static struct notifier_block xen_hvm_cpu_notifier = {
.notifier_call = xen_hvm_cpu_notify,
};
+#ifdef CONFIG_KEXEC
+static void xen_hvm_shutdown(void)
+{
+ native_machine_shutdown();
+ if (kexec_in_progress)
+ xen_reboot(SHUTDOWN_soft_reset);
+}
+
+static void xen_hvm_crash_shutdown(struct pt_regs *regs)
+{
+ native_machine_crash_shutdown(regs);
+ xen_reboot(SHUTDOWN_soft_reset);
+}
+#endif
+
static void __init xen_hvm_guest_init(void)
{
if (xen_pv_domain())
@@ -1817,6 +1836,10 @@ static void __init xen_hvm_guest_init(void)
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
+#ifdef CONFIG_KEXEC
+ machine_ops.shutdown = xen_hvm_shutdown;
+ machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
+#endif
}
#endif