aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/Makefile6
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/eboot.c146
-rw-r--r--arch/x86/boot/compressed/eboot.h52
-rw-r--r--arch/x86/boot/compressed/head_32.S14
-rw-r--r--arch/x86/boot/compressed/head_64.S9
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S29
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c14
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/bootparam_utils.h4
-rw-r--r--arch/x86/include/asm/checksum_32.h22
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h13
-rw-r--r--arch/x86/include/asm/hugetlb.h1
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/mce.h13
-rw-r--r--arch/x86/include/asm/mmu_context.h20
-rw-r--r--arch/x86/include/asm/pgtable.h11
-rw-r--r--arch/x86/include/asm/ptrace.h16
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/topology.h3
-rw-r--r--arch/x86/include/asm/xen/page.h31
-rw-r--r--arch/x86/include/asm/xor_avx.h4
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/acpi/sleep.c18
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c10
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c21
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c53
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c4
-rw-r--r--arch/x86/kernel/e820.c5
-rw-r--r--arch/x86/kernel/early-quirks.c15
-rw-r--r--arch/x86/kernel/entry_32.S19
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ftrace.c95
-rw-r--r--arch/x86/kernel/head_32.S7
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/i387.c17
-rw-r--r--arch/x86/kernel/ldt.c13
-rw-r--r--arch/x86/kernel/microcode_amd.c2
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/setup.c25
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kvm/emulate.c5
-rw-r--r--arch/x86/kvm/i8254.c18
-rw-r--r--arch/x86/kvm/lapic.c110
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c13
-rw-r--r--arch/x86/kvm/x86.c45
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lib/csum-wrappers_64.c12
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/ioremap.c26
-rw-r--r--arch/x86/mm/mmap.c6
-rw-r--r--arch/x86/net/bpf_jit.S2
-rw-r--r--arch/x86/net/bpf_jit_comp.c14
-rw-r--r--arch/x86/platform/efi/efi.c169
-rw-r--r--arch/x86/realmode/rm/Makefile3
-rw-r--r--arch/x86/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/vdso/vdso32-setup.c8
-rw-r--r--arch/x86/xen/p2m.c10
-rw-r--r--arch/x86/xen/setup.c22
-rw-r--r--arch/x86/xen/smp.c20
-rw-r--r--arch/x86/xen/time.c17
82 files changed, 806 insertions, 596 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5c477260294f..412189d2bff9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -31,6 +31,9 @@ ifeq ($(CONFIG_X86_32),y)
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
+ # Don't autogenerate MMX or SSE instructions
+ KBUILD_CFLAGS += -mno-mmx -mno-sse
+
# Never want PIC in a 32-bit kernel, prevent breakage with GCC built
# with nonstandard options
KBUILD_CFLAGS += -fno-pic
@@ -57,8 +60,11 @@ else
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
+ # Don't autogenerate MMX or SSE instructions
+ KBUILD_CFLAGS += -mno-mmx -mno-sse
+
# Use -mpreferred-stack-boundary=3 if supported.
- KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
+ KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 379814bc41e3..6cf0111783d3 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -53,18 +53,18 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
# How to compile the 16-bit code. Note we always compile for -march=i386,
# that way we can complain to the user if the CPU is insufficient.
-KBUILD_CFLAGS := $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
+KBUILD_CFLAGS := $(USERINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \
-DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/code16gcc.h \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+ -mno-mmx -mno-sse \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
- $(call cc-option, -fno-unit-at-a-time)) \
+ $(call cc-option, -fno-unit-at-a-time)) \
$(call cc-option, -fno-stack-protector) \
$(call cc-option, -mpreferred-stack-boundary=2)
-KBUILD_CFLAGS += $(call cc-option, -m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 5ef205c5f37b..7194d9f094bc 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -12,6 +12,7 @@ KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
cflags-$(CONFIG_X86_64) := -mcmodel=small
KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += -mno-mmx -mno-sse
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c205035a6b96..17177e80d466 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -865,6 +865,9 @@ fail:
* Because the x86 boot code expects to be passed a boot_params we
* need to create one ourselves (usually the bootloader would create
* one for us).
+ *
+ * The caller is responsible for filling out ->code32_start in the
+ * returned boot_params.
*/
struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
{
@@ -875,14 +878,15 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
struct efi_info *efi;
efi_loaded_image_t *image;
void *options;
- u32 load_options_size;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
- unsigned long cmdline;
+ char *cmdline_ptr;
u16 *s2;
u8 *s1;
int i;
+ unsigned long ramdisk_addr;
+ unsigned long ramdisk_size;
sys_table = _table;
@@ -893,13 +897,14 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->handle_protocol,
handle, &proto, (void *)&image);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+ efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
return NULL;
}
- status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
+ status = efi_low_alloc(sys_table, 0x4000, 1,
+ (unsigned long *)&boot_params);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc lowmem for boot params\n");
+ efi_printk(sys_table, "Failed to alloc lowmem for boot params\n");
return NULL;
}
@@ -921,45 +926,13 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
hdr->vid_mode = 0xffff;
hdr->boot_flag = 0xAA55;
- hdr->code32_start = (__u64)(unsigned long)image->image_base;
-
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
- options = image->load_options;
- load_options_size = image->load_options_size / 2; /* ASCII */
- cmdline = 0;
- s2 = (u16 *)options;
-
- if (s2) {
- while (*s2 && *s2 != '\n' && options_size < load_options_size) {
- s2++;
- options_size++;
- }
-
- if (options_size) {
- if (options_size > hdr->cmdline_size)
- options_size = hdr->cmdline_size;
-
- options_size++; /* NUL termination */
-
- status = low_alloc(options_size, 1, &cmdline);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for cmdline\n");
- goto fail;
- }
-
- s1 = (u8 *)(unsigned long)cmdline;
- s2 = (u16 *)options;
-
- for (i = 0; i < options_size - 1; i++)
- *s1++ = *s2++;
-
- *s1 = '\0';
- }
- }
-
- hdr->cmd_line_ptr = cmdline;
+ cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
+ if (!cmdline_ptr)
+ goto fail;
+ hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
hdr->ramdisk_image = 0;
hdr->ramdisk_size = 0;
@@ -969,16 +942,20 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
memset(sdt, 0, sizeof(*sdt));
- status = handle_ramdisks(image, hdr);
+ status = handle_cmdline_files(sys_table, image,
+ (char *)(unsigned long)hdr->cmd_line_ptr,
+ "initrd=", hdr->initrd_addr_max,
+ &ramdisk_addr, &ramdisk_size);
if (status != EFI_SUCCESS)
goto fail2;
+ hdr->ramdisk_image = ramdisk_addr;
+ hdr->ramdisk_size = ramdisk_size;
return boot_params;
fail2:
- if (options_size)
- low_free(options_size, hdr->cmd_line_ptr);
+ efi_free(sys_table, options_size, hdr->cmd_line_ptr);
fail:
- low_free(0x4000, (unsigned long)boot_params);
+ efi_free(sys_table, 0x4000, (unsigned long)boot_params);
return NULL;
}
@@ -992,22 +969,24 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
efi_memory_desc_t *mem_map;
efi_status_t status;
__u32 desc_version;
+ bool called_exit = false;
u8 nr_entries;
int i;
size = sizeof(*mem_map) * 32;
again:
- size += sizeof(*mem_map);
+ size += sizeof(*mem_map) * 2;
_size = size;
- status = low_alloc(size, 1, (unsigned long *)&mem_map);
+ status = efi_low_alloc(sys_table, size, 1, (unsigned long *)&mem_map);
if (status != EFI_SUCCESS)
return status;
+get_map:
status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
mem_map, &key, &desc_size, &desc_version);
if (status == EFI_BUFFER_TOO_SMALL) {
- low_free(_size, (unsigned long)mem_map);
+ efi_free(sys_table, _size, (unsigned long)mem_map);
goto again;
}
@@ -1029,8 +1008,20 @@ again:
/* Might as well exit boot services now */
status = efi_call_phys2(sys_table->boottime->exit_boot_services,
handle, key);
- if (status != EFI_SUCCESS)
- goto free_mem_map;
+ if (status != EFI_SUCCESS) {
+ /*
+ * ExitBootServices() will fail if any of the event
+ * handlers change the memory map. In which case, we
+ * must be prepared to retry, but only once so that
+ * we're guaranteed to exit on repeated failures instead
+ * of spinning forever.
+ */
+ if (called_exit)
+ goto free_mem_map;
+
+ called_exit = true;
+ goto get_map;
+ }
/* Historic? */
boot_params->alt_mem_k = 32 * 1024;
@@ -1097,44 +1088,10 @@ again:
return EFI_SUCCESS;
free_mem_map:
- low_free(_size, (unsigned long)mem_map);
+ efi_free(sys_table, _size, (unsigned long)mem_map);
return status;
}
-static efi_status_t relocate_kernel(struct setup_header *hdr)
-{
- unsigned long start, nr_pages;
- efi_status_t status;
-
- /*
- * The EFI firmware loader could have placed the kernel image
- * anywhere in memory, but the kernel has various restrictions
- * on the max physical address it can run at. Attempt to move
- * the kernel to boot_params.pref_address, or as low as
- * possible.
- */
- start = hdr->pref_address;
- nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
-
- status = efi_call_phys4(sys_table->boottime->allocate_pages,
- EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
- nr_pages, &start);
- if (status != EFI_SUCCESS) {
- status = low_alloc(hdr->init_size, hdr->kernel_alignment,
- &start);
- if (status != EFI_SUCCESS)
- efi_printk("Failed to alloc mem for kernel\n");
- }
-
- if (status == EFI_SUCCESS)
- memcpy((void *)start, (void *)(unsigned long)hdr->code32_start,
- hdr->init_size);
-
- hdr->pref_address = hdr->code32_start;
- hdr->code32_start = (__u32)start;
-
- return status;
-}
/*
* On success we return a pointer to a boot_params structure, and NULL
@@ -1163,14 +1120,15 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
EFI_LOADER_DATA, sizeof(*gdt),
(void **)&gdt);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for gdt structure\n");
+ efi_printk(sys_table, "Failed to alloc mem for gdt structure\n");
goto fail;
}
gdt->size = 0x800;
- status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
+ status = efi_low_alloc(sys_table, gdt->size, 8,
+ (unsigned long *)&gdt->address);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for gdt\n");
+ efi_printk(sys_table, "Failed to alloc mem for gdt\n");
goto fail;
}
@@ -1178,7 +1136,7 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
EFI_LOADER_DATA, sizeof(*idt),
(void **)&idt);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for idt structure\n");
+ efi_printk(sys_table, "Failed to alloc mem for idt structure\n");
goto fail;
}
@@ -1190,10 +1148,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
* address, relocate it.
*/
if (hdr->pref_address != hdr->code32_start) {
- status = relocate_kernel(hdr);
-
+ unsigned long bzimage_addr = hdr->code32_start;
+ status = efi_relocate_kernel(sys_table, &bzimage_addr,
+ hdr->init_size, hdr->init_size,
+ hdr->pref_address,
+ hdr->kernel_alignment);
if (status != EFI_SUCCESS)
goto fail;
+
+ hdr->pref_address = hdr->code32_start;
+ hdr->code32_start = bzimage_addr;
}
status = exit_boot(boot_params, handle);
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e5b0a8f91c5f..cc78e2da7af9 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -10,8 +10,6 @@
#define SEG_GRANULARITY_4KB (1 << 0)
#define DESC_TYPE_CODE_DATA (1 << 0)
-
-#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
#define EFI_READ_CHUNK_SIZE (1024 * 1024)
#define EFI_CONSOLE_OUT_DEVICE_GUID \
@@ -40,6 +38,24 @@ struct efi_graphics_output_mode_info {
u32 pixels_per_scan_line;
} __packed;
+struct efi_graphics_output_protocol_mode_32 {
+ u32 max_mode;
+ u32 mode;
+ u32 info;
+ u32 size_of_info;
+ u64 frame_buffer_base;
+ u32 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_64 {
+ u32 max_mode;
+ u32 mode;
+ u64 info;
+ u64 size_of_info;
+ u64 frame_buffer_base;
+ u64 frame_buffer_size;
+} __packed;
+
struct efi_graphics_output_protocol_mode {
u32 max_mode;
u32 mode;
@@ -49,6 +65,20 @@ struct efi_graphics_output_protocol_mode {
unsigned long frame_buffer_size;
} __packed;
+struct efi_graphics_output_protocol_32 {
+ u32 query_mode;
+ u32 set_mode;
+ u32 blt;
+ u32 mode;
+};
+
+struct efi_graphics_output_protocol_64 {
+ u64 query_mode;
+ u64 set_mode;
+ u64 blt;
+ u64 mode;
+};
+
struct efi_graphics_output_protocol {
void *query_mode;
unsigned long set_mode;
@@ -56,16 +86,22 @@ struct efi_graphics_output_protocol {
struct efi_graphics_output_protocol_mode *mode;
};
+struct efi_uga_draw_protocol_32 {
+ u32 get_mode;
+ u32 set_mode;
+ u32 blt;
+};
+
+struct efi_uga_draw_protocol_64 {
+ u64 get_mode;
+ u64 set_mode;
+ u64 blt;
+};
+
struct efi_uga_draw_protocol {
void *get_mode;
void *set_mode;
void *blt;
};
-struct efi_simple_text_output_protocol {
- void *reset;
- void *output_string;
- void *test_string;
-};
-
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1e3184f6072f..abb988a54c69 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -50,6 +50,13 @@ ENTRY(efi_pe_entry)
pushl %eax
pushl %esi
pushl %ecx
+
+ call reloc
+reloc:
+ popl %ecx
+ subl reloc, %ecx
+ movl %ecx, BP_code32_start(%eax)
+
sub $0x4, %esp
ENTRY(efi_stub_entry)
@@ -63,12 +70,7 @@ ENTRY(efi_stub_entry)
hlt
jmp 1b
2:
- call 3f
-3:
- popl %eax
- subl $3b, %eax
- subl BP_pref_address(%esi), %eax
- add BP_code32_start(%esi), %eax
+ movl BP_code32_start(%esi), %eax
leal preferred_addr(%eax), %eax
jmp *%eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 16f24e6dad79..92059b8f3f7b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -217,6 +217,8 @@ ENTRY(efi_pe_entry)
cmpq $0,%rax
je 1f
mov %rax, %rdx
+ leaq startup_32(%rip), %rax
+ movl %eax, BP_code32_start(%rdx)
popq %rsi
popq %rdi
@@ -230,12 +232,7 @@ ENTRY(efi_stub_entry)
hlt
jmp 1b
2:
- call 3f
-3:
- popq %rax
- subq $3b, %rax
- subq BP_pref_address(%rsi), %rax
- add BP_code32_start(%esi), %eax
+ movl BP_code32_start(%esi), %eax
leaq preferred_addr(%rax), %rax
jmp *%rax
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 586f41aac361..185fad49d86f 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -24,10 +24,6 @@
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
-.Lpoly:
- .octa 0xc2000000000000000000000000000001
-.Ltwo_one:
- .octa 0x00000001000000000000000000000001
#define DATA %xmm0
#define SHASH %xmm1
@@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update)
.Lupdate_just_ret:
ret
ENDPROC(clmul_ghash_update)
-
-/*
- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
- *
- * Calculate hash_key << 1 mod poly
- */
-ENTRY(clmul_ghash_setkey)
- movaps .Lbswap_mask, BSWAP
- movups (%rsi), %xmm0
- PSHUFB_XMM BSWAP %xmm0
- movaps %xmm0, %xmm1
- psllq $1, %xmm0
- psrlq $63, %xmm1
- movaps %xmm1, %xmm2
- pslldq $8, %xmm1
- psrldq $8, %xmm2
- por %xmm1, %xmm0
- # reduction
- pshufd $0b00100100, %xmm2, %xmm1
- pcmpeqd .Ltwo_one, %xmm1
- pand .Lpoly, %xmm1
- pxor %xmm1, %xmm0
- movups %xmm0, (%rdi)
- ret
-ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 6759dd1135be..d785cf2c529c 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const be128 *shash);
-void clmul_ghash_setkey(be128 *shash, const u8 *key);
-
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 *x = (be128 *)key;
+ u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
- clmul_ghash_setkey(&ctx->shash, key);
+ /* perform multiplication by 'x' in GF(2^128) */
+ a = be64_to_cpu(x->a);
+ b = be64_to_cpu(x->b);
+
+ ctx->shash.a = (__be64)((b << 1) | (a >> 63));
+ ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+
+ if (a >> 63)
+ ctx->shash.b ^= cpu_to_be64(0xc2);
return 0;
}
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 6cbd8df348d2..9f5e71f06671 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -141,7 +141,7 @@ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
/* save number of bits */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
- bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61;
+ bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128 and append length */
index = sctx->count[0] & 0x7f;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cf1a471a18a2..10adb41f162e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -459,7 +459,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+ compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 653668d140f9..4a8cb8d7cbd5 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -35,9 +35,9 @@ static void sanitize_boot_params(struct boot_params *boot_params)
*/
if (boot_params->sentinel) {
/* fields in boot_params are left uninitialized, clear them */
- memset(&boot_params->olpc_ofw_header, 0,
+ memset(&boot_params->ext_ramdisk_image, 0,
(char *)&boot_params->efi_info -
- (char *)&boot_params->olpc_ofw_header);
+ (char *)&boot_params->ext_ramdisk_image);
memset(&boot_params->kbd_status, 0,
(char *)&boot_params->hdr -
(char *)&boot_params->kbd_status);
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 46fc474fd819..f50de6951738 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -49,9 +49,15 @@ static inline __wsum csum_partial_copy_from_user(const void __user *src,
int len, __wsum sum,
int *err_ptr)
{
+ __wsum ret;
+
might_sleep();
- return csum_partial_copy_generic((__force void *)src, dst,
- len, sum, err_ptr, NULL);
+ stac();
+ ret = csum_partial_copy_generic((__force void *)src, dst,
+ len, sum, err_ptr, NULL);
+ clac();
+
+ return ret;
}
/*
@@ -176,10 +182,16 @@ static inline __wsum csum_and_copy_to_user(const void *src,
int len, __wsum sum,
int *err_ptr)
{
+ __wsum ret;
+
might_sleep();
- if (access_ok(VERIFY_WRITE, dst, len))
- return csum_partial_copy_generic(src, (__force void *)dst,
- len, sum, NULL, err_ptr);
+ if (access_ok(VERIFY_WRITE, dst, len)) {
+ stac();
+ ret = csum_partial_copy_generic(src, (__force void *)dst,
+ len, sum, NULL, err_ptr);
+ clac();
+ return ret;
+ }
if (len)
*err_ptr = -EFAULT;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e99ac27f95b2..4af181dacf9e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -365,7 +365,7 @@ extern const char * const x86_power_flags[32];
static __always_inline __pure bool __static_cpu_has(u16 bit)
{
#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
- asm goto("1: jmp %l[t_no]\n"
+ asm_volatile_goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index cccd07fa5e3a..779c2efe2e97 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -29,7 +29,7 @@ extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data;
-extern void parse_e820_ext(struct setup_data *data);
+extern void parse_e820_ext(u64 phys_addr, u32 data_len);
#if defined(CONFIG_X86_64) || \
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 60c89f30c727..7a76dc284166 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,7 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
#endif /* CONFIG_X86_32 */
extern int add_efi_memmap;
-extern unsigned long x86_efi_facility;
+extern struct efi_scratch efi_scratch;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e25cc33ec54d..e72b2e41499e 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -295,12 +295,13 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. "m" is a random variable that should be in L1 */
- alternative_input(
- ASM_NOP8 ASM_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %P[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (tsk->thread.fpu.has_fpu));
+ if (unlikely(static_cpu_has(X86_FEATURE_FXSAVE_LEAK))) {
+ asm volatile(
+ "fnclex\n\t"
+ "emms\n\t"
+ "fildl %P[addr]" /* set F?P to defined value */
+ : : [addr] "m" (tsk->thread.fpu.has_fpu));
+ }
return fpu_restore_checking(&tsk->thread.fpu);
}
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index a8091216963b..68c05398bba9 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+ ptep_clear_flush(vma, addr, ptep);
}
static inline int huge_pte_none(pte_t pte)
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 3a16c1483b45..029766958e69 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -13,7 +13,7 @@
static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:"
+ asm_volatile_goto("1:"
STATIC_KEY_INITIAL_NOP
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767c..8b320722de7a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -92,7 +92,7 @@
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 80
#define KVM_NR_FIXED_MTRR_REGION 88
-#define KVM_NR_VAR_MTRR 8
+#define KVM_NR_VAR_MTRR 10
#define ASYNC_PF_PER_VCPU 64
@@ -445,7 +445,7 @@ struct kvm_vcpu_arch {
bool nmi_injected; /* Trying to inject an NMI this entry */
struct mtrr_state_type mtrr_state;
- u32 pat;
+ u64 pat;
int switch_db_regs;
unsigned long db[KVM_NR_DB_REGS];
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fa5f71e021d5..e6833c655e59 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -32,11 +32,20 @@
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
-#define MCACOD 0xffff /* MCA Error Code */
+
+/*
+ * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
+ * bits 15:0. But bit 12 is the 'F' bit, defined for corrected
+ * errors to indicate that errors are being filtered by hardware.
+ * We should mask out bit 12 when looking for specific signatures
+ * of uncorrected errors - so the F bit is deliberately skipped
+ * in this #define.
+ */
+#define MCACOD 0xefff /* MCA Error Code */
/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */
#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
#define MCACOD_DATA 0x0134 /* Data Load */
#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index cdbf36776106..be12c534fd59 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -45,22 +45,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Re-load page tables */
load_cr3(next->pgd);
- /* stop flush ipis for the previous mm */
+ /* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
- /*
- * load the LDT, if the LDT is different:
- */
+ /* Load the LDT, if the LDT is different: */
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
- else {
+ else {
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
- if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
- /* We were in lazy tlb mode and leave_mm disabled
+ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ /*
+ * On established mms, the mm_cpumask is only changed
+ * from irq context, from ptep_clear_flush() while in
+ * lazy tlb mode, and here. Irqs are blocked during
+ * schedule, protecting us from simultaneous changes.
+ */
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+ /*
+ * We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*/
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1e672234c4ff..5460bf923e16 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -415,9 +415,16 @@ static inline int pte_present(pte_t a)
}
#define pte_accessible pte_accessible
-static inline int pte_accessible(pte_t a)
+static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
- return pte_flags(a) & _PAGE_PRESENT;
+ if (pte_flags(a) & _PAGE_PRESENT)
+ return true;
+
+ if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
+ mm_tlb_flush_pending(mm))
+ return true;
+
+ return false;
}
static inline int pte_hidden(pte_t pte)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 942a08623a1a..68e9f007cd4a 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -232,6 +232,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
#define ARCH_HAS_USER_SINGLE_STEP_INFO
+/*
+ * When hitting ptrace_stop(), we cannot return using SYSRET because
+ * that does not restore the full CPU state, only a minimal set. The
+ * ptracer can change arbitrary register values, which is usually okay
+ * because the usual ptrace stops run off the signal delivery path which
+ * forces IRET; however, ptrace_event() stops happen in arbitrary places
+ * in the kernel and don't force IRET path.
+ *
+ * So force IRET path after a ptrace stop.
+ */
+#define arch_ptrace_stop_needed(code, info) \
+({ \
+ set_thread_flag(TIF_NOTIFY_RESUME); \
+ false; \
+})
+
struct user_desc;
extern int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 33692eaabab5..e3ddd7db723f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -233,8 +233,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
-/* The {read|write|spin}_lock() on x86 are full memory barriers. */
-static inline void smp_mb__after_lock(void) { }
-#define ARCH_HAS_SMP_MB_AFTER_LOCK
-
#endif /* _ASM_X86_SPINLOCK_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 095b21507b6a..60bd2748a7c9 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -119,9 +119,10 @@ static inline void setup_node_to_cpumask_map(void) { }
extern const struct cpumask *cpu_coregroup_mask(int cpu);
-#ifdef ENABLE_TOPO_DEFINES
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+
+#ifdef ENABLE_TOPO_DEFINES
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 6aef9fbc09b7..b913915e8e63 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
}
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
+static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
{
unsigned long pfn;
- int ret = 0;
+ int ret;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- if (unlikely(mfn >= machine_to_phys_nr)) {
- pfn = ~0;
- goto try_override;
- }
- pfn = 0;
+ if (unlikely(mfn >= machine_to_phys_nr))
+ return ~0;
+
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
-try_override:
- /* ret might be < 0 if there are no entries in the m2p for mfn */
if (ret < 0)
- pfn = ~0;
- else if (get_phys_to_machine(pfn) != mfn)
+ return ~0;
+
+ return pfn;
+}
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+ unsigned long pfn;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
+
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) != mfn) {
/*
* If this appears to be a foreign mfn (because the pfn
* doesn't map back to the mfn), then check the local override
@@ -111,6 +119,7 @@ try_override:
* m2p_find_override_pfn returns ~0 if it doesn't find anything.
*/
pfn = m2p_find_override_pfn(mfn, ~0);
+ }
/*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 7ea79c5fa1f2..492b29802f57 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = {
#define AVX_XOR_SPEED \
do { \
- if (cpu_has_avx) \
+ if (cpu_has_avx && cpu_has_osxsave) \
xor_speed(&xor_block_avx); \
} while (0)
#define AVX_SELECT(FASTEST) \
- (cpu_has_avx ? &xor_block_avx : FASTEST)
+ (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
#else
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 2af848dfa754..d3fd447ecbee 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -179,6 +179,7 @@
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index b44577bc9744..ec94e11807dc 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -48,9 +48,20 @@ int acpi_suspend_lowlevel(void)
#ifndef CONFIG_64BIT
native_store_gdt((struct desc_ptr *)&header->pmode_gdt);
+ /*
+ * We have to check that we can write back the value, and not
+ * just read it. At least on 90 nm Pentium M (Family 6, Model
+ * 13), reading an invalid MSR is not guaranteed to trap, see
+ * Erratum X4 in "Intel Pentium M Processor on 90 nm Process
+ * with 2-MB L2 Cache and Intel® Processor A100 and A110 on 90
+ * nm process with 512-KB L2 Cache Specification Update".
+ */
if (!rdmsr_safe(MSR_EFER,
&header->pmode_efer_low,
- &header->pmode_efer_high))
+ &header->pmode_efer_high) &&
+ !wrmsr_safe(MSR_EFER,
+ header->pmode_efer_low,
+ header->pmode_efer_high))
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
#endif /* !CONFIG_64BIT */
@@ -61,7 +72,10 @@ int acpi_suspend_lowlevel(void)
}
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
&header->pmode_misc_en_low,
- &header->pmode_misc_en_high))
+ &header->pmode_misc_en_high) &&
+ !wrmsr_safe(MSR_IA32_MISC_ENABLE,
+ header->pmode_misc_en_low,
+ header->pmode_misc_en_high))
header->pmode_behavior |=
(1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
header->realmode_flags = acpi_realmode_flags;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3048ded1b598..59554dca96ec 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{}
};
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{}
};
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void)
next_northbridge(misc, amd_nb_misc_ids);
node_to_amd_nb(i)->link = link =
next_northbridge(link, amd_nb_link_ids);
- }
+ }
+ /* GART present only on Fam15h upto model 0fh */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- boot_cpu_data.x86 == 0x15)
+ (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
amd_northbridges.flags |= AMD_NB_GART;
/*
+ * Check for L3 cache presence.
+ */
+ if (!cpuid_edx(0x80000006))
+ return 0;
+
+ /*
* Some CPU families support L3 Cache Index Disable. There are some
* limitations because of E382 and E388 on family 0x10.
*/
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 794f6eb54cd3..b32dbb411a9a 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -98,7 +98,7 @@ static int __init early_get_pnodeid(void)
break;
case UV3_HUB_PART_NUMBER:
case UV3_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
+ uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
break;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5013a48d1aff..ae177a014180 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -508,6 +508,16 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
}
#endif
+
+ /* F16h erratum 793, CVE-2013-6885 */
+ if (c->x86 == 0x16 && c->x86_model <= 0xf) {
+ u64 val;
+
+ rdmsrl(MSR_AMD64_LS_CFG, val);
+ if (!(val & BIT(15)))
+ wrmsrl(MSR_AMD64_LS_CFG, val | BIT(15));
+ }
+
}
static const int amd_erratum_383[];
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22018f70a671..deeb48d9459b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -284,8 +284,13 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
raw_local_save_flags(eflags);
BUG_ON(eflags & X86_EFLAGS_AC);
- if (cpu_has(c, X86_FEATURE_SMAP))
+ if (cpu_has(c, X86_FEATURE_SMAP)) {
+#ifdef CONFIG_X86_SMAP
set_in_cr4(X86_CR4_SMAP);
+#else
+ clear_in_cr4(X86_CR4_SMAP);
+#endif
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9b0c441c03f5..f187806dfc18 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -387,7 +387,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+ if (c->x86 == 6 && cpu_has_clflush &&
+ (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
#ifdef CONFIG_X86_64
@@ -627,7 +628,7 @@ static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
tlb_flushall_shift = 5;
break;
case 0x63a: /* Ivybridge */
- tlb_flushall_shift = 1;
+ tlb_flushall_shift = 2;
break;
default:
tlb_flushall_shift = 6;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fa72a39e5d46..3982357de5b0 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
- unsigned int mask_lo, mask_hi, base_lo, base_hi;
- unsigned int tmp, hi;
+ u32 mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int hi;
+ u64 tmp, mask;
/*
* get_mtrr doesn't need to update mtrr_state, also it could be called
@@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask: */
- tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
- mask_lo = size_or_mask | tmp;
+ tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask = size_or_mask | tmp;
/* Expand tmp with high bits to all 1s: */
- hi = fls(tmp);
+ hi = fls64(tmp);
if (hi > 0) {
- tmp |= ~((1<<(hi - 1)) - 1);
+ tmp |= ~((1ULL<<(hi - 1)) - 1);
- if (tmp != mask_lo) {
+ if (tmp != mask) {
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- mask_lo = tmp;
+ mask = tmp;
}
}
@@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
* This works correctly if size is a power of two, i.e. a
* contiguous range:
*/
- *size = -mask_lo;
- *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *size = -mask;
+ *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
*type = base_lo & 0xff;
out_put_cpu:
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 726bf963c227..ca22b73aaa25 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -305,7 +305,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return -EINVAL;
}
- if (base & size_or_mask || size & size_or_mask) {
+ if ((base | (base + size - 1)) >>
+ (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
pr_warning("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -583,6 +584,7 @@ static struct syscore_ops mtrr_syscore_ops = {
int __initdata changed_by_mtrr_cleanup;
+#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -600,7 +602,7 @@ void __init mtrr_bp_init(void)
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(36);
size_and_mask = 0x00f00000;
phys_addr = 36;
@@ -619,7 +621,7 @@ void __init mtrr_bp_init(void)
boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
- size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+ size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
@@ -627,7 +629,7 @@ void __init mtrr_bp_init(void)
* VIA C* family have Intel style MTRRs,
* but don't support PAE
*/
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
phys_addr = 32;
}
@@ -637,21 +639,21 @@ void __init mtrr_bp_init(void)
if (cpu_has_k6_mtrr) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CENTAUR:
if (cpu_has_centaur_mcr) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
if (cpu_has_cyrix_arr) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d20..123d9e2271dc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1165,6 +1165,9 @@ static void x86_pmu_del(struct perf_event *event, int flags)
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
+ if (i >= cpuc->n_events - cpuc->n_added)
+ --cpuc->n_added;
+
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, event);
@@ -1249,10 +1252,20 @@ void perf_events_lapic_init(void)
static int __kprobes
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
+ int ret;
+ u64 start_clock;
+ u64 finish_clock;
+
if (!atomic_read(&active_events))
return NMI_DONE;
- return x86_pmu.handle_irq(regs);
+ start_clock = local_clock();
+ ret = x86_pmu.handle_irq(regs);
+ finish_clock = local_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
+ return ret;
}
struct event_constraint emptyconstraint;
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 5f0581e713c2..b46601ada813 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/ptrace.h>
+#include <linux/syscore_ops.h>
#include <asm/apic.h>
@@ -816,6 +817,18 @@ out:
return ret;
}
+static void ibs_eilvt_setup(void)
+{
+ /*
+ * Force LVT offset assignment for family 10h: The offsets are
+ * not assigned by the BIOS for this family, so the OS is
+ * responsible for doing it. If the OS assignment fails, fall
+ * back to BIOS settings and try to setup this.
+ */
+ if (boot_cpu_data.x86 == 0x10)
+ force_ibs_eilvt_setup();
+}
+
static inline int get_ibs_lvt_offset(void)
{
u64 val;
@@ -851,6 +864,36 @@ static void clear_APIC_ibs(void *dummy)
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
}
+#ifdef CONFIG_PM
+
+static int perf_ibs_suspend(void)
+{
+ clear_APIC_ibs(NULL);
+ return 0;
+}
+
+static void perf_ibs_resume(void)
+{
+ ibs_eilvt_setup();
+ setup_APIC_ibs(NULL);
+}
+
+static struct syscore_ops perf_ibs_syscore_ops = {
+ .resume = perf_ibs_resume,
+ .suspend = perf_ibs_suspend,
+};
+
+static void perf_ibs_pm_init(void)
+{
+ register_syscore_ops(&perf_ibs_syscore_ops);
+}
+
+#else
+
+static inline void perf_ibs_pm_init(void) { }
+
+#endif
+
static int __cpuinit
perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
@@ -877,18 +920,12 @@ static __init int amd_ibs_init(void)
if (!caps)
return -ENODEV; /* ibs not supported by the cpu */
- /*
- * Force LVT offset assignment for family 10h: The offsets are
- * not assigned by the BIOS for this family, so the OS is
- * responsible for doing it. If the OS assignment fails, fall
- * back to BIOS settings and try to setup this.
- */
- if (boot_cpu_data.x86 == 0x10)
- force_ibs_eilvt_setup();
+ ibs_eilvt_setup();
if (!ibs_eilvt_valid())
goto out;
+ perf_ibs_pm_init();
get_online_cpus();
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af538..8aac56bda7dc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -314,8 +314,8 @@ static struct uncore_event_desc snbep_uncore_imc_events[] = {
static struct uncore_event_desc snbep_uncore_qpi_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
- INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"),
- INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"),
+ INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
{ /* end: all zeroes */ },
};
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abeabbda5..174da5fc5a7b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -658,15 +658,18 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata)
+void __init parse_e820_ext(u64 phys_addr, u32 data_len)
{
int entries;
struct e820entry *extmap;
+ struct setup_data *sdata;
+ sdata = early_memremap(phys_addr, data_len);
entries = sdata->len / sizeof(struct e820entry);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ early_iounmap(sdata, data_len);
printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 94ab6b90dd3f..4f7c82cdd0f5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -196,16 +196,21 @@ static void __init ati_bugs_contd(int num, int slot, int func)
static void __init intel_remapping_check(int num, int slot, int func)
{
u8 revision;
+ u16 device;
+ device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
/*
- * Revision 0x13 of this chipset supports irq remapping
- * but has an erratum that breaks its behavior, flag it as such
+ * Revision <= 13 of all triggering devices id in this quirk
+ * have a problem draining interrupts when irq remapping is
+ * enabled, and should be flagged as broken. Additionally
+ * revision 0x22 of device id 0x3405 has this problem.
*/
- if (revision == 0x13)
+ if (revision <= 0x13)
+ set_irq_remapping_broken();
+ else if (device == 0x3405 && revision == 0x22)
set_irq_remapping_broken();
-
}
#define QFLAG_APPLY_ONCE 0x1
@@ -239,6 +244,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
{ PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+ { PCI_VENDOR_ID_INTEL, 0x3405, PCI_CLASS_BRIDGE_HOST,
+ PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8f3e2dec1df3..ac6328176097 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -434,9 +434,10 @@ sysenter_past_esp:
jnz sysenter_audit
sysenter_do_call:
cmpl $(NR_syscalls), %eax
- jae syscall_badsys
+ jae sysenter_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
+sysenter_after_call:
LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
@@ -554,11 +555,6 @@ ENTRY(iret_exc)
CFI_RESTORE_STATE
ldt_ss:
- larl PT_OLDSS(%esp), %eax
- jnz restore_nocheck
- testl $0x00400000, %eax # returning to 32bit stack?
- jnz restore_nocheck # allright, normal return
-
#ifdef CONFIG_PARAVIRT
/*
* The kernel can't run on a non-flat stack if paravirt mode
@@ -691,7 +687,12 @@ END(syscall_fault)
syscall_badsys:
movl $-ENOSYS,PT_EAX(%esp)
- jmp resume_userspace
+ jmp syscall_exit
+END(syscall_badsys)
+
+sysenter_badsys:
+ movl $-ENOSYS,PT_EAX(%esp)
+ jmp sysenter_after_call
END(syscall_badsys)
CFI_ENDPROC
/*
@@ -1075,7 +1076,7 @@ ENTRY(ftrace_caller)
pushl $0 /* Pass NULL as regs pointer */
movl 4*4(%esp), %eax
movl 0x4(%ebp), %edx
- leal function_trace_op, %ecx
+ movl function_trace_op, %ecx
subl $MCOUNT_INSN_SIZE, %eax
.globl ftrace_call
@@ -1133,7 +1134,7 @@ ENTRY(ftrace_regs_caller)
movl 12*4(%esp), %eax /* Load ip (1st parameter) */
subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
- leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+ movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
pushl %esp /* Save pt_regs as 4th parameter */
GLOBAL(ftrace_regs_call)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 727208941030..7ac938a4bfab 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -88,7 +88,7 @@ END(function_hook)
MCOUNT_SAVE_FRAME \skip
/* Load the ftrace_ops into the 3rd parameter */
- leaq function_trace_op, %rdx
+ movq function_trace_op(%rip), %rdx
/* Load ip into the first parameter */
movq RIP(%rsp), %rdi
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 42a392a9fd02..1ffc32dbe450 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -77,8 +77,7 @@ within(unsigned long addr, unsigned long start, unsigned long end)
return addr >= start && addr < end;
}
-static int
-do_ftrace_mod_code(unsigned long ip, const void *new_code)
+static unsigned long text_ip_addr(unsigned long ip)
{
/*
* On x86_64, kernel text mappings are mapped read-only with
@@ -91,7 +90,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa_symbol(ip));
- return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
+ return ip;
}
static const unsigned char *ftrace_nop_replace(void)
@@ -123,8 +122,10 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
+ ip = text_ip_addr(ip);
+
/* replace the text with the new text */
- if (do_ftrace_mod_code(ip, new_code))
+ if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
return -EPERM;
sync_core();
@@ -221,33 +222,56 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return -EINVAL;
}
-int ftrace_update_ftrace_func(ftrace_func_t func)
+static unsigned long ftrace_update_func;
+
+static int update_ftrace_func(unsigned long ip, void *new)
{
- unsigned long ip = (unsigned long)(&ftrace_call);
- unsigned char old[MCOUNT_INSN_SIZE], *new;
+ unsigned char old[MCOUNT_INSN_SIZE];
int ret;
- memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
- new = ftrace_call_replace(ip, (unsigned long)func);
+ memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
+
+ ftrace_update_func = ip;
+ /* Make sure the breakpoints see the ftrace_update_func update */
+ smp_wmb();
/* See comment above by declaration of modifying_ftrace_code */
atomic_inc(&modifying_ftrace_code);
ret = ftrace_modify_code(ip, old, new);
+ atomic_dec(&modifying_ftrace_code);
+
+ return ret;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char *new;
+ int ret;
+
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = update_ftrace_func(ip, new);
+
/* Also update the regs callback function */
if (!ret) {
ip = (unsigned long)(&ftrace_regs_call);
- memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
- ret = ftrace_modify_code(ip, old, new);
+ ret = update_ftrace_func(ip, new);
}
- atomic_dec(&modifying_ftrace_code);
-
return ret;
}
+static int is_ftrace_caller(unsigned long ip)
+{
+ if (ip == ftrace_update_func)
+ return 1;
+
+ return 0;
+}
+
/*
* A breakpoint was added to the code address we are about to
* modify, and this is the handle that will just skip over it.
@@ -257,10 +281,13 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
*/
int ftrace_int3_handler(struct pt_regs *regs)
{
+ unsigned long ip;
+
if (WARN_ON_ONCE(!regs))
return 0;
- if (!ftrace_location(regs->ip - 1))
+ ip = regs->ip - 1;
+ if (!ftrace_location(ip) && !is_ftrace_caller(ip))
return 0;
regs->ip += MCOUNT_INSN_SIZE - 1;
@@ -632,8 +659,8 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
ret = -EPERM;
goto out;
}
- run_sync();
out:
+ run_sync();
return ret;
fail_update:
@@ -665,45 +692,41 @@ int __init ftrace_dyn_arch_init(void *data)
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
-static int ftrace_mod_jmp(unsigned long ip,
- int old_offset, int new_offset)
+static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
{
- unsigned char code[MCOUNT_INSN_SIZE];
+ static union ftrace_code_union calc;
- if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
+ /* Jmp not a call (ignore the .e8) */
+ calc.e8 = 0xe9;
+ calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
- if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
- return -EINVAL;
+ /*
+ * ftrace external locks synchronize the access to the static variable.
+ */
+ return calc.code;
+}
- *(int *)(&code[1]) = new_offset;
+static int ftrace_mod_jmp(unsigned long ip, void *func)
+{
+ unsigned char *new;
- if (do_ftrace_mod_code(ip, &code))
- return -EPERM;
+ new = ftrace_jmp_replace(ip, (unsigned long)func);
- return 0;
+ return update_ftrace_func(ip, new);
}
int ftrace_enable_ftrace_graph_caller(void)
{
unsigned long ip = (unsigned long)(&ftrace_graph_call);
- int old_offset, new_offset;
- old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
- new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
-
- return ftrace_mod_jmp(ip, old_offset, new_offset);
+ return ftrace_mod_jmp(ip, &ftrace_graph_caller);
}
int ftrace_disable_ftrace_graph_caller(void)
{
unsigned long ip = (unsigned long)(&ftrace_graph_call);
- int old_offset, new_offset;
-
- old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
- new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
- return ftrace_mod_jmp(ip, old_offset, new_offset);
+ return ftrace_mod_jmp(ip, &ftrace_stub);
}
#endif /* !CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 73afd11799ca..df63cae573e0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -566,6 +566,10 @@ ENDPROC(early_idt_handlers)
/* This is global to keep gas from relaxing the jumps */
ENTRY(early_idt_handler)
cld
+
+ cmpl $2,(%esp) # X86_TRAP_NMI
+ je is_nmi # Ignore NMI
+
cmpl $2,%ss:early_recursion_flag
je hlt_loop
incl %ss:early_recursion_flag
@@ -616,8 +620,9 @@ ex_entry:
pop %edx
pop %ecx
pop %eax
- addl $8,%esp /* drop vector number and error code */
decl %ss:early_recursion_flag
+is_nmi:
+ addl $8,%esp /* drop vector number and error code */
iret
ENDPROC(early_idt_handler)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 321d65ebaffe..f2a9a2aa98f3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -343,6 +343,9 @@ early_idt_handlers:
ENTRY(early_idt_handler)
cld
+ cmpl $2,(%rsp) # X86_TRAP_NMI
+ je is_nmi # Ignore NMI
+
cmpl $2,early_recursion_flag(%rip)
jz 1f
incl early_recursion_flag(%rip)
@@ -405,8 +408,9 @@ ENTRY(early_idt_handler)
popq %rdx
popq %rcx
popq %rax
- addq $16,%rsp # drop vector number and error code
decl early_recursion_flag(%rip)
+is_nmi:
+ addq $16,%rsp # drop vector number and error code
INTERRUPT_RETURN
ENDPROC(early_idt_handler)
@@ -513,7 +517,7 @@ ENTRY(phys_base)
#include "../../x86/xen/xen-head.S"
.section .bss, "aw", @nobits
- .align L1_CACHE_BYTES
+ .align PAGE_SIZE
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index cb339097b9ea..b03ff1842547 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin);
void __kernel_fpu_end(void)
{
- if (use_eager_fpu())
- math_state_restore();
- else
+ if (use_eager_fpu()) {
+ /*
+ * For eager fpu, most the time, tsk_used_math() is true.
+ * Restore the user math as we are done with the kernel usage.
+ * At few instances during thread exit, signal handling etc,
+ * tsk_used_math() is false. Those few places will take proper
+ * actions, so we don't need to restore the math here.
+ */
+ if (likely(tsk_used_math(current)))
+ math_state_restore();
+ } else {
stts();
+ }
}
EXPORT_SYMBOL(__kernel_fpu_end);
@@ -116,7 +125,7 @@ static void __cpuinit mxcsr_feature_mask_init(void)
if (cpu_has_fxsr) {
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (fx_scratch));
+ asm volatile("fxsave %0" : "+m" (fx_scratch));
mask = fx_scratch.mxcsr_mask;
if (mask == 0)
mask = 0x0000ffbf;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ebc987398923..dcbbaa165bde 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,6 +20,8 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+int sysctl_ldt16 = 0;
+
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
{
@@ -229,6 +231,17 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
}
}
+ /*
+ * On x86-64 we do not support 16-bit segments due to
+ * IRET leaking the high bits of the kernel stack address.
+ */
+#ifdef CONFIG_X86_64
+ if (!ldt_info.seg_32bit && !sysctl_ldt16) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+#endif
+
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index efdec7cd8e01..b516dfb411ec 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -430,7 +430,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
if (request_firmware(&fw, (const char *)fw_name, device)) {
- pr_err("failed to load file %s\n", fw_name);
+ pr_debug("failed to load file %s\n", fw_name);
goto out;
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 872079a67e4d..f7d0672481fd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,8 +100,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
flag |= __GFP_ZERO;
again:
page = NULL;
- if (!(flag & GFP_ATOMIC))
+ /* CMA can be used only in the context which permits sleeping */
+ if (flag & __GFP_WAIT)
page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ /* fallback */
if (!page)
page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
if (!page)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81a5f5e8f142..59b90379cb6a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
* The switch back from broadcast mode needs to be
* called with interrupts disabled.
*/
- local_irq_disable();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
- local_irq_enable();
+ local_irq_disable();
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+ local_irq_enable();
} else
default_idle();
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 04ee1e2e4c02..52dbf1e400dc 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev)
return;
pci_read_config_dword(nb_ht, 0x60, &val);
- node = val & 7;
+ node = pcibus_to_node(dev->bus) | (val & 7);
/*
* Some hardware may return an invalid node ID,
* so check it first:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 76fa1e9a2b39..90fd1195f276 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -447,6 +447,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
},
},
+ { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
+ { /* Some C6100 machines were shipped with vendor being 'Dell'. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56f7fcfe7fa2..a3627ade4b15 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -426,25 +426,23 @@ static void __init reserve_initrd(void)
static void __init parse_setup_data(void)
{
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- u32 data_len, map_len;
+ u32 data_len, map_len, data_type;
map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
(u64)sizeof(struct setup_data));
data = early_memremap(pa_data, map_len);
data_len = data->len + sizeof(struct setup_data);
- if (data_len > map_len) {
- early_iounmap(data, map_len);
- data = early_memremap(pa_data, data_len);
- map_len = data_len;
- }
+ data_type = data->type;
+ pa_next = data->next;
+ early_iounmap(data, map_len);
- switch (data->type) {
+ switch (data_type) {
case SETUP_E820_EXT:
- parse_e820_ext(data);
+ parse_e820_ext(pa_data, data_len);
break;
case SETUP_DTB:
add_dtb(pa_data);
@@ -452,8 +450,7 @@ static void __init parse_setup_data(void)
default:
break;
}
- pa_data = data->next;
- early_iounmap(data, map_len);
+ pa_data = pa_next;
}
}
@@ -911,11 +908,11 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4)) {
- set_bit(EFI_BOOT, &x86_efi_facility);
+ set_bit(EFI_BOOT, &efi.flags);
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4)) {
- set_bit(EFI_BOOT, &x86_efi_facility);
- set_bit(EFI_64BIT, &x86_efi_facility);
+ set_bit(EFI_BOOT, &efi.flags);
+ set_bit(EFI_64BIT, &efi.flags);
}
if (efi_enabled(EFI_BOOT))
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e457..087ab2af381a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -364,7 +364,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -429,7 +429,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -496,7 +496,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+ compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index dbded5aedb81..30277e27431a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
*begin = new_begin;
}
} else {
- *begin = TASK_UNMAPPED_BASE;
+ *begin = current->mm->mmap_legacy_base;
*end = TASK_SIZE;
}
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5953dcea752d..5484d54582ca 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4207,7 +4207,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
case OpMem8:
ctxt->memop.bytes = 1;
if (ctxt->memop.type == OP_REG) {
- ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1);
+ int highbyte_regs = ctxt->rex_prefix == 0;
+
+ ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm,
+ highbyte_regs);
fetch_register_operand(&ctxt->memop);
}
goto mem_common;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa0ef94..518d86471b76 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
#include "irq.h"
#include "i8254.h"
+#include "x86.h"
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
atomic_set(&ps->pending, 0);
ps->irq_ack = 1;
+ /*
+ * Do not allow the guest to program periodic timers with small
+ * interval, since the hrtimers are not throttled by the host
+ * scheduler.
+ */
+ if (ps->is_periodic) {
+ s64 min_period = min_timer_period_us * 1000LL;
+
+ if (ps->period < min_period) {
+ pr_info_ratelimited(
+ "kvm: requested %lld ns "
+ "i8254 timer period limited to %lld ns\n",
+ ps->period, min_period);
+ ps->period = min_period;
+ }
+ }
+
hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
HRTIMER_MODE_ABS);
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0eee2c8b64d1..279d093524b4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)
-static unsigned int min_timer_period_us = 500;
-module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
-
static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
{
*((u32 *) (apic->regs + reg_off)) = val;
@@ -153,6 +150,8 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
+#define KVM_X2APIC_CID_BITS 0
+
static void recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@@ -190,7 +189,8 @@ static void recalculate_apic_map(struct kvm *kvm)
if (apic_x2apic_mode(apic)) {
new->ldr_bits = 32;
new->cid_shift = 16;
- new->cid_mask = new->lid_mask = 0xffff;
+ new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
+ new->lid_mask = 0xffff;
} else if (kvm_apic_sw_enabled(apic) &&
!new->cid_mask /* flat mode */ &&
kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
@@ -370,6 +370,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
{
+ /* Note that we never get here with APIC virtualization enabled. */
+
if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -381,12 +383,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
apic->highest_isr_cache = vec;
}
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+ int result;
+
+ /*
+ * Note that isr_count is always 1, and highest_isr_cache
+ * is always -1, with APIC virtualization enabled.
+ */
+ if (!apic->isr_count)
+ return -1;
+ if (likely(apic->highest_isr_cache != -1))
+ return apic->highest_isr_cache;
+
+ result = find_highest_vector(apic->regs + APIC_ISR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
{
- if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+ struct kvm_vcpu *vcpu;
+ if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+ return;
+
+ vcpu = apic->vcpu;
+
+ /*
+ * We do get here for APIC virtualization enabled if the guest
+ * uses the Hyper-V APIC enlightenment. In this case we may need
+ * to trigger a new interrupt delivery by writing the SVI field;
+ * on the other hand isr_count and highest_isr_cache are unused
+ * and must be left alone.
+ */
+ if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+ kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+ apic_find_highest_isr(apic));
+ else {
--apic->isr_count;
- BUG_ON(apic->isr_count < 0);
- apic->highest_isr_cache = -1;
+ BUG_ON(apic->isr_count < 0);
+ apic->highest_isr_cache = -1;
+ }
}
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
@@ -466,22 +504,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}
-static inline int apic_find_highest_isr(struct kvm_lapic *apic)
-{
- int result;
-
- /* Note that isr_count is always 1 with vid enabled */
- if (!apic->isr_count)
- return -1;
- if (likely(apic->highest_isr_cache != -1))
- return apic->highest_isr_cache;
-
- result = find_highest_vector(apic->regs + APIC_ISR);
- ASSERT(result == -1 || result >= 16);
-
- return result;
-}
-
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -855,7 +877,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
ASSERT(apic != NULL);
/* if initial count is 0, current count should also be 0 */
- if (kvm_apic_get_reg(apic, APIC_TMICT) == 0)
+ if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 ||
+ apic->lapic_timer.period == 0)
return 0;
remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
@@ -1360,8 +1383,12 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
return;
}
+ if (!kvm_vcpu_is_bsp(apic->vcpu))
+ value &= ~MSR_IA32_APICBASE_BSP;
+ vcpu->arch.apic_base = value;
+
/* update jump label if enable bit changes */
- if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
+ if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
if (value & MSR_IA32_APICBASE_ENABLE)
static_key_slow_dec_deferred(&apic_hw_disabled);
else
@@ -1369,10 +1396,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
recalculate_apic_map(vcpu->kvm);
}
- if (!kvm_vcpu_is_bsp(apic->vcpu))
- value &= ~MSR_IA32_APICBASE_BSP;
-
- vcpu->arch.apic_base = value;
if ((old_value ^ value) & X2APIC_ENABLE) {
if (value & X2APIC_ENABLE) {
u32 id = kvm_apic_id(apic);
@@ -1618,6 +1641,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
int vector = kvm_apic_has_interrupt(vcpu);
struct kvm_lapic *apic = vcpu->arch.apic;
+ /* Note that we never get here with APIC virtualization enabled. */
+
if (vector == -1)
return -1;
@@ -1705,7 +1730,6 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
{
u32 data;
- void *vapic;
if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
@@ -1713,9 +1737,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
- data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
- kunmap_atomic(vapic);
+ kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+ sizeof(u32));
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
@@ -1751,7 +1774,6 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
u32 data, tpr;
int max_irr, max_isr;
struct kvm_lapic *apic = vcpu->arch.apic;
- void *vapic;
apic_sync_pv_eoi_to_guest(vcpu, apic);
@@ -1767,18 +1789,24 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
max_isr = 0;
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
- *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
- kunmap_atomic(vapic);
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+ sizeof(u32));
}
-void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
+int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
{
- vcpu->arch.apic->vapic_addr = vapic_addr;
- if (vapic_addr)
+ if (vapic_addr) {
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.apic->vapic_cache,
+ vapic_addr, sizeof(u32)))
+ return -EINVAL;
__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
- else
+ } else {
__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
+ }
+
+ vcpu->arch.apic->vapic_addr = vapic_addr;
+ return 0;
}
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index c730ac9fe801..c8b0d0d2da5c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -34,7 +34,7 @@ struct kvm_lapic {
*/
void *regs;
gpa_t vapic_addr;
- struct page *vapic_page;
+ struct gfn_to_hva_cache vapic_cache;
unsigned long pending_events;
unsigned int sipi_vector;
};
@@ -76,7 +76,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
-void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
+int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 004cc87b781c..711c649f80b7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2585,6 +2585,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int emulate = 0;
gfn_t pseudo_gfn;
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return 0;
+
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
@@ -2748,6 +2751,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
bool ret = false;
u64 spte = 0ull;
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return false;
+
if (!page_fault_can_be_fast(vcpu, error_code))
return false;
@@ -3139,6 +3145,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
struct kvm_shadow_walk_iterator iterator;
u64 spte = 0ull;
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return spte;
+
walk_shadow_page_lockless_begin(vcpu);
for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
if (!is_shadow_present_pte(spte))
@@ -4329,6 +4338,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
u64 spte;
int nr_sptes = 0;
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return nr_sptes;
+
walk_shadow_page_lockless_begin(vcpu);
for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index da20860b457a..7e6090e13237 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -423,6 +423,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (FNAME(gpte_changed)(vcpu, gw, top_level))
goto out_gpte_changed;
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ goto out_gpte_changed;
+
for (shadow_walk_init(&it, vcpu, addr);
shadow_walk_okay(&it) && it.level > gw->level;
shadow_walk_next(&it)) {
@@ -671,6 +674,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
*/
mmu_topup_memory_caches(vcpu);
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
+ WARN_ON(1);
+ return;
+ }
+
spin_lock(&vcpu->kvm->mmu_lock);
for_each_shadow_entry(vcpu, gva, iterator) {
level = iterator.level;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a14a6eaf871d..765210d4d925 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2985,10 +2985,8 @@ static int cr8_write_interception(struct vcpu_svm *svm)
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
/* instruction emulation calls kvm_set_cr8() */
r = cr_interception(svm);
- if (irqchip_in_kernel(svm->vcpu.kvm)) {
- clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ if (irqchip_in_kernel(svm->vcpu.kvm))
return r;
- }
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
return r;
kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@@ -3550,6 +3548,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
return;
+ clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+
if (irr == -1)
return;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 260a91939555..7cdafb6dc705 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3399,15 +3399,22 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = vmx_read_guest_seg_limit(vmx, seg);
var->selector = vmx_read_guest_seg_selector(vmx, seg);
ar = vmx_read_guest_seg_ar(vmx, seg);
+ var->unusable = (ar >> 16) & 1;
var->type = ar & 15;
var->s = (ar >> 4) & 1;
var->dpl = (ar >> 5) & 3;
- var->present = (ar >> 7) & 1;
+ /*
+ * Some userspaces do not preserve unusable property. Since usable
+ * segment has to be present according to VMX spec we can use present
+ * property to amend userspace bug by making unusable segment always
+ * nonpresent. vmx_segment_access_rights() already marks nonpresent
+ * segment as unusable.
+ */
+ var->present = !var->unusable;
var->avl = (ar >> 12) & 1;
var->l = (ar >> 13) & 1;
var->db = (ar >> 14) & 1;
var->g = (ar >> 15) & 1;
- var->unusable = (ar >> 16) & 1;
}
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
@@ -7126,8 +7133,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
free_vpid(vmx);
- free_nested(vmx);
free_loaded_vmcs(vmx->loaded_vmcs);
+ free_nested(vmx);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c34180..1be0a9e75d1f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
+unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
bool kvm_has_tsc_control;
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
u32 kvm_max_guest_tsc_khz;
@@ -3138,8 +3141,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&va, argp, sizeof va))
goto out;
- r = 0;
- kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+ r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
break;
}
case KVM_X86_SETUP_MCE: {
@@ -5539,36 +5541,6 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
!kvm_event_needs_reinjection(vcpu);
}
-static int vapic_enter(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
- struct page *page;
-
- if (!apic || !apic->vapic_addr)
- return 0;
-
- page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
- if (is_error_page(page))
- return -EFAULT;
-
- vcpu->arch.apic->vapic_page = page;
- return 0;
-}
-
-static void vapic_exit(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
- int idx;
-
- if (!apic || !apic->vapic_addr)
- return;
-
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- kvm_release_page_dirty(apic->vapic_page);
- mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
-}
-
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
{
int max_irr, tpr;
@@ -5889,11 +5861,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- r = vapic_enter(vcpu);
- if (r) {
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- return r;
- }
r = 1;
while (r > 0) {
@@ -5951,8 +5918,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- vapic_exit(vcpu);
-
return r;
}
@@ -6017,7 +5982,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
frag->len -= len;
}
- if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+ if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
vcpu->mmio_needed = 0;
if (vcpu->mmio_is_write)
return 1;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e224f7a671b6..3186542f2fa3 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -124,5 +124,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
extern u64 host_xcr0;
+extern unsigned int min_timer_period_us;
+
extern struct static_key kvm_no_apic_vcpu;
#endif
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 25b7ae8d058a..7609e0e421ec 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -6,6 +6,7 @@
*/
#include <asm/checksum.h>
#include <linux/module.h>
+#include <asm/smap.h>
/**
* csum_partial_copy_from_user - Copy and checksum from user space.
@@ -52,8 +53,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
len -= 2;
}
}
+ stac();
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, errp, NULL);
+ clac();
if (unlikely(*errp))
goto out_err;
@@ -82,6 +85,8 @@ __wsum
csum_partial_copy_to_user(const void *src, void __user *dst,
int len, __wsum isum, int *errp)
{
+ __wsum ret;
+
might_sleep();
if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
@@ -105,8 +110,11 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
}
*errp = 0;
- return csum_partial_copy_generic(src, (void __force *)dst,
- len, isum, NULL, errp);
+ stac();
+ ret = csum_partial_copy_generic(src, (void __force *)dst,
+ len, isum, NULL, errp);
+ clac();
+ return ret;
}
EXPORT_SYMBOL(csum_partial_copy_to_user);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 654be4ae3047..c1e9e4cbbd76 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -989,6 +989,12 @@ static int fault_in_kernel_space(unsigned long address)
static inline bool smap_violation(int error_code, struct pt_regs *regs)
{
+ if (!IS_ENABLED(CONFIG_X86_SMAP))
+ return false;
+
+ if (!static_cpu_has(X86_FEATURE_SMAP))
+ return false;
+
if (error_code & PF_USER)
return false;
@@ -1091,11 +1097,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
- if (static_cpu_has(X86_FEATURE_SMAP)) {
- if (unlikely(smap_violation(error_code, regs))) {
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
+ if (unlikely(smap_violation(error_code, regs))) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1f34e9219775..7a5bf1b76e2f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -78,8 +78,8 @@ __ref void *alloc_low_pages(unsigned int num)
return __va(pfn << PAGE_SHIFT);
}
-/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE)
+/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */
+#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE)
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
void __init early_alloc_pgt_buf(void)
{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9a1e6583910c..86c758de4b34 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -50,6 +50,21 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
return err;
}
+static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
+ void *arg)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; ++i)
+ if (pfn_valid(start_pfn + i) &&
+ !PageReserved(pfn_to_page(start_pfn + i)))
+ return 1;
+
+ WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn);
+
+ return 0;
+}
+
/*
* Remap an arbitrary physical address space into the kernel virtual
* address space. Needed when the kernel wants to access high addresses
@@ -93,14 +108,11 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
+ pfn = phys_addr >> PAGE_SHIFT;
last_pfn = last_addr >> PAGE_SHIFT;
- for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) {
- int is_ram = page_is_ram(pfn);
-
- if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
- return NULL;
- WARN_ON_ONCE(is_ram);
- }
+ if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
+ __ioremap_check_ram) == 1)
+ return NULL;
/*
* Mappings have to be page-aligned
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df6835f9f..5c1ae28825cd 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -112,12 +112,14 @@ static unsigned long mmap_legacy_base(void)
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
{
+ mm->mmap_legacy_base = mmap_legacy_base();
+ mm->mmap_base = mmap_base();
+
if (mmap_is_legacy()) {
- mm->mmap_base = mmap_legacy_base();
+ mm->mmap_base = mm->mmap_legacy_base;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
- mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 877b9a1b2152..01495755701b 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -140,7 +140,7 @@ bpf_slow_path_byte_msh:
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
- mov $SIZE,%ecx; /* size */ \
+ mov $SIZE,%edx; /* size */ \
call bpf_internal_load_pointer_neg_helper; \
test %rax,%rax; \
pop SKBDATA; \
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f66b54086ce5..0c966fecfb8c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -324,15 +324,21 @@ void bpf_jit_compile(struct sk_filter *fp)
EMIT2(0x89, 0xd0); /* mov %edx,%eax */
break;
case BPF_S_ALU_MOD_K: /* A %= K; */
+ if (K == 1) {
+ CLEAR_A();
+ break;
+ }
EMIT2(0x31, 0xd2); /* xor %edx,%edx */
EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
EMIT2(0xf7, 0xf1); /* div %ecx */
EMIT2(0x89, 0xd0); /* mov %edx,%eax */
break;
- case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
- EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
- EMIT(K, 4);
- EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+ case BPF_S_ALU_DIV_K: /* A /= K */
+ if (K == 1)
+ break;
+ EMIT2(0x31, 0xd2); /* xor %edx,%edx */
+ EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
+ EMIT2(0xf7, 0xf1); /* div %ecx */
break;
case BPF_S_ALU_AND_X:
seen |= SEEN_XREG;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d2fbcedcf6ea..816e940b3998 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -60,34 +60,19 @@
static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
-struct efi __read_mostly efi = {
- .mps = EFI_INVALID_TABLE_ADDR,
- .acpi = EFI_INVALID_TABLE_ADDR,
- .acpi20 = EFI_INVALID_TABLE_ADDR,
- .smbios = EFI_INVALID_TABLE_ADDR,
- .sal_systab = EFI_INVALID_TABLE_ADDR,
- .boot_info = EFI_INVALID_TABLE_ADDR,
- .hcdp = EFI_INVALID_TABLE_ADDR,
- .uga = EFI_INVALID_TABLE_ADDR,
- .uv_systab = EFI_INVALID_TABLE_ADDR,
-};
-EXPORT_SYMBOL(efi);
-
struct efi_memory_map memmap;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
-unsigned long x86_efi_facility;
+static __initdata efi_config_table_type_t arch_tables[] = {
+#ifdef CONFIG_X86_UV
+ {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+#endif
+ {NULL_GUID, NULL, 0},
+};
-/*
- * Returns 1 if 'facility' is enabled, 0 otherwise.
- */
-int efi_enabled(int facility)
-{
- return test_bit(facility, &x86_efi_facility) != 0;
-}
-EXPORT_SYMBOL(efi_enabled);
+u64 efi_setup; /* efi setup_data physical address */
static bool __initdata disable_runtime = false;
static int __init setup_noefi(char *arg)
@@ -397,6 +382,8 @@ int __init efi_memblock_x86_reserve_range(void)
memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+ efi.memmap = &memmap;
+
return 0;
}
@@ -438,7 +425,7 @@ void __init efi_reserve_boot_services(void)
* - Not within any part of the kernel
* - Not the bios reserved area
*/
- if ((start+size >= __pa_symbol(_text)
+ if ((start + size > __pa_symbol(_text)
&& start <= __pa_symbol(_end)) ||
!e820_all_mapped(start, start+size, E820_RAM) ||
memblock_is_region_reserved(start, size)) {
@@ -454,7 +441,7 @@ void __init efi_reserve_boot_services(void)
void __init efi_unmap_memmap(void)
{
- clear_bit(EFI_MEMMAP, &x86_efi_facility);
+ clear_bit(EFI_MEMMAP, &efi.flags);
if (memmap.map) {
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
@@ -576,80 +563,6 @@ static int __init efi_systab_init(void *phys)
return 0;
}
-static int __init efi_config_init(u64 tables, int nr_tables)
-{
- void *config_tables, *tablep;
- int i, sz;
-
- if (efi_enabled(EFI_64BIT))
- sz = sizeof(efi_config_table_64_t);
- else
- sz = sizeof(efi_config_table_32_t);
-
- /*
- * Let's see what config tables the firmware passed to us.
- */
- config_tables = early_ioremap(tables, nr_tables * sz);
- if (config_tables == NULL) {
- pr_err("Could not map Configuration table!\n");
- return -ENOMEM;
- }
-
- tablep = config_tables;
- pr_info("");
- for (i = 0; i < efi.systab->nr_tables; i++) {
- efi_guid_t guid;
- unsigned long table;
-
- if (efi_enabled(EFI_64BIT)) {
- u64 table64;
- guid = ((efi_config_table_64_t *)tablep)->guid;
- table64 = ((efi_config_table_64_t *)tablep)->table;
- table = table64;
-#ifdef CONFIG_X86_32
- if (table64 >> 32) {
- pr_cont("\n");
- pr_err("Table located above 4GB, disabling EFI.\n");
- early_iounmap(config_tables,
- efi.systab->nr_tables * sz);
- return -EINVAL;
- }
-#endif
- } else {
- guid = ((efi_config_table_32_t *)tablep)->guid;
- table = ((efi_config_table_32_t *)tablep)->table;
- }
- if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
- efi.mps = table;
- pr_cont(" MPS=0x%lx ", table);
- } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
- efi.acpi20 = table;
- pr_cont(" ACPI 2.0=0x%lx ", table);
- } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
- efi.acpi = table;
- pr_cont(" ACPI=0x%lx ", table);
- } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
- efi.smbios = table;
- pr_cont(" SMBIOS=0x%lx ", table);
-#ifdef CONFIG_X86_UV
- } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
- efi.uv_systab = table;
- pr_cont(" UVsystab=0x%lx ", table);
-#endif
- } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
- efi.hcdp = table;
- pr_cont(" HCDP=0x%lx ", table);
- } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
- efi.uga = table;
- pr_cont(" UGA=0x%lx ", table);
- }
- tablep += sz;
- }
- pr_cont("\n");
- early_iounmap(config_tables, efi.systab->nr_tables * sz);
- return 0;
-}
-
static int __init efi_runtime_init(void)
{
efi_runtime_services_t *runtime;
@@ -725,7 +638,11 @@ void __init efi_init(void)
if (efi_systab_init(efi_phys.systab))
return;
- set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+ efi.config_table = (unsigned long)efi.systab->tables;
+ efi.fw_vendor = (unsigned long)efi.systab->fw_vendor;
+ efi.runtime = (unsigned long)efi.systab->runtime;
/*
* Show what we know for posterity
@@ -743,10 +660,10 @@ void __init efi_init(void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
+ if (efi_config_init(arch_tables))
return;
- set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
+ set_bit(EFI_CONFIG_TABLES, &efi.flags);
/*
* Note: We currently don't support runtime services on an EFI
@@ -758,20 +675,13 @@ void __init efi_init(void)
else {
if (disable_runtime || efi_runtime_init())
return;
- set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
}
if (efi_memmap_init())
return;
- set_bit(EFI_MEMMAP, &x86_efi_facility);
-
-#ifdef CONFIG_X86_32
- if (efi_is_native()) {
- x86_platform.get_wallclock = efi_get_time;
- x86_platform.set_wallclock = efi_set_rtc_mmss;
- }
-#endif
+ set_bit(EFI_MEMMAP, &efi.flags);
#if EFI_DEBUG
print_efi_memmap();
@@ -814,34 +724,6 @@ static void __init runtime_code_page_mkexec(void)
}
}
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM. So, look it up in the existing EFI memory map instead. Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
- void *p;
- if (WARN_ON(!memmap.map))
- return NULL;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
- u64 size = md->num_pages << EFI_PAGE_SHIFT;
- u64 end = md->phys_addr + size;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
- if (!md->virt_addr)
- continue;
- if (phys_addr >= md->phys_addr && phys_addr < end) {
- phys_addr += md->virt_addr - md->phys_addr;
- return (__force void __iomem *)(unsigned long)phys_addr;
- }
- }
- return NULL;
-}
-
void efi_memory_uc(u64 addr, unsigned long size)
{
unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
@@ -910,10 +792,13 @@ void __init efi_enter_virtual_mode(void)
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+#ifdef CONFIG_X86_64
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+#endif
+ continue;
+ }
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 88692871823f..9cac82588cbc 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -73,9 +73,10 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/../../boot/code16gcc.h \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+ -mno-mmx -mno-sse \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
- $(call cc-option, -fno-unit-at-a-time)) \
+ $(call cc-option, -fno-unit-at-a-time)) \
$(call cc-option, -fno-stack-protector) \
$(call cc-option, -mpreferred-stack-boundary=2)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 38ae65dfd14f..63a899304d27 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -212,10 +212,10 @@
203 common sched_setaffinity sys_sched_setaffinity
204 common sched_getaffinity sys_sched_getaffinity
205 64 set_thread_area
-206 common io_setup sys_io_setup
+206 64 io_setup sys_io_setup
207 common io_destroy sys_io_destroy
208 common io_getevents sys_io_getevents
-209 common io_submit sys_io_submit
+209 64 io_submit sys_io_submit
210 common io_cancel sys_io_cancel
211 64 get_thread_area
212 common lookup_dcookie sys_lookup_dcookie
@@ -356,3 +356,5 @@
540 x32 process_vm_writev compat_sys_process_vm_writev
541 x32 setsockopt compat_sys_setsockopt
542 x32 getsockopt compat_sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0faad646f5fd..0f134c7cfc24 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -41,6 +41,7 @@ enum {
#ifdef CONFIG_X86_64
#define vdso_enabled sysctl_vsyscall32
#define arch_setup_additional_pages syscall32_setup_pages
+extern int sysctl_ldt16;
#endif
/*
@@ -380,6 +381,13 @@ static ctl_table abi_table2[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "ldt16",
+ .data = &sysctl_ldt16,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{}
};
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa5927e..156344448d19 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -878,7 +878,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- int ret = 0;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
@@ -925,8 +924,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
* frontend pages while they are being shared with the backend,
* because mfn_to_pfn (that ends up being called by GUPF) will
* return the backend pfn rather than the frontend pfn. */
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) == mfn)
set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
return 0;
@@ -941,7 +940,6 @@ int m2p_remove_override(struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- int ret = 0;
pfn = page_to_pfn(page);
mfn = get_phys_to_machine(pfn);
@@ -1019,8 +1017,8 @@ int m2p_remove_override(struct page *page,
* the original pfn causes mfn_to_pfn(mfn) to return the frontend
* pfn again. */
mfn &= ~FOREIGN_FRAME_BIT;
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
m2p_find_override(mfn) == NULL)
set_phys_to_machine(pfn, mfn);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 94eac5c85cdc..0a9fb7a0b452 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -313,6 +313,17 @@ static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
e820_add_region(start, end - start, type);
}
+void xen_ignore_unusable(struct e820entry *list, size_t map_size)
+{
+ struct e820entry *entry;
+ unsigned int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ if (entry->type == E820_UNUSABLE)
+ entry->type = E820_RAM;
+ }
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
@@ -353,6 +364,17 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
+ /*
+ * Xen won't allow a 1:1 mapping to be created to UNUSABLE
+ * regions, so if we're using the machine memory map leave the
+ * region as RAM as it is in the pseudo-physical map.
+ *
+ * UNUSABLE regions in domUs are not handled and will need
+ * a patch in the future.
+ */
+ if (xen_initial_domain())
+ xen_ignore_unusable(map, memmap.nr_entries);
+
/* Make sure the Xen-supplied memory map is well-ordered. */
sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d99cae8147d1..570c9a5c4d3f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -245,6 +245,15 @@ static void __init xen_smp_prepare_boot_cpu(void)
old memory can be recycled */
make_lowmem_page_readwrite(xen_initial_gdt);
+#ifdef CONFIG_X86_32
+ /*
+ * Xen starts us with XEN_FLAT_RING1_DS, but linux code
+ * expects __USER_DS
+ */
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#endif
+
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
@@ -667,8 +676,15 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc;
- rc = native_cpu_up(cpu, tidle);
- WARN_ON (xen_smp_intr_init(cpu));
+ /*
+ * xen_smp_intr_init() needs to run before native_cpu_up()
+ * so that IPI vectors are set up on the booting CPU before
+ * it is marked online in native_cpu_up().
+ */
+ rc = xen_smp_intr_init(cpu);
+ WARN_ON(rc);
+ if (!rc)
+ rc = native_cpu_up(cpu, tidle);
return rc;
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 3d88bfdf9e1c..13e8935e2eab 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -36,9 +36,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
/* snapshots of runstate info */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-/* unused ns of stolen and blocked time */
+/* unused ns of stolen time */
static DEFINE_PER_CPU(u64, xen_residual_stolen);
-static DEFINE_PER_CPU(u64, xen_residual_blocked);
/* return an consistent snapshot of 64-bit time/counter value */
static u64 get64(const u64 *p)
@@ -115,7 +114,7 @@ static void do_stolen_accounting(void)
{
struct vcpu_runstate_info state;
struct vcpu_runstate_info *snap;
- s64 blocked, runnable, offline, stolen;
+ s64 runnable, offline, stolen;
cputime_t ticks;
get_runstate_snapshot(&state);
@@ -125,7 +124,6 @@ static void do_stolen_accounting(void)
snap = &__get_cpu_var(xen_runstate_snapshot);
/* work out how much time the VCPU has not been runn*ing* */
- blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
@@ -141,17 +139,6 @@ static void do_stolen_accounting(void)
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
__this_cpu_write(xen_residual_stolen, stolen);
account_steal_ticks(ticks);
-
- /* Add the appropriate number of ticks of blocked time,
- including any left-overs from last time. */
- blocked += __this_cpu_read(xen_residual_blocked);
-
- if (blocked < 0)
- blocked = 0;
-
- ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
- __this_cpu_write(xen_residual_blocked, blocked);
- account_idle_ticks(ticks);
}
/* Get the TSC speed from Xen */