diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/boot/compressed/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/boot/compressed/head_32.S | 5 | ||||
-rw-r--r-- | arch/x86/boot/compressed/head_64.S | 5 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.c | 13 | ||||
-rw-r--r-- | arch/x86/boot/compressed/mkpiggy.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd_early.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 21 | ||||
-rw-r--r-- | arch/x86/tools/calc_run_size.pl | 30 |
11 files changed, 97 insertions, 39 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0fcd9133790c..14fe7cba21d1 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -75,8 +75,10 @@ suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 +RUN_SIZE = $(shell objdump -h vmlinux | \ + perl $(srctree)/arch/x86/tools/calc_run_size.pl) quiet_cmd_mkpiggy = MKPIGGY $@ - cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) + cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) targets += piggy.S $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f45ab7a36fb6..c5b56ed10aff 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -186,7 +186,8 @@ relocated: * Do the decompression, and jump to the new kernel.. */ /* push arguments for decompress_kernel: */ - pushl $z_output_len /* decompressed length */ + pushl $z_run_size /* size of kernel with .bss and .brk */ + pushl $z_output_len /* decompressed length, end of relocs */ leal z_extract_offset_negative(%ebx), %ebp pushl %ebp /* output address */ pushl $z_input_len /* input_len */ @@ -196,7 +197,7 @@ relocated: pushl %eax /* heap area */ pushl %esi /* real mode pointer */ call decompress_kernel /* returns kernel location in %eax */ - addl $24, %esp + addl $28, %esp /* * Jump to the decompressed kernel. diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b10fa66a2540..34bbc0911b7c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -334,13 +334,16 @@ relocated: * Do the decompression, and jump to the new kernel.. */ pushq %rsi /* Save the real mode argument */ + movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ + pushq %r9 movq %rsi, %rdi /* real mode address */ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ leaq input_data(%rip), %rdx /* input_data */ movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ - movq $z_output_len, %r9 /* decompressed length */ + movq $z_output_len, %r9 /* decompressed length, end of relocs */ call decompress_kernel /* returns kernel location in %rax */ + popq %r9 popq %rsi /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 196eaf373a06..eb25ca1eb6da 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -393,7 +393,8 @@ asmlinkage void *decompress_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, - unsigned long output_len) + unsigned long output_len, + unsigned long run_size) { real_mode = rmode; @@ -416,8 +417,14 @@ asmlinkage void *decompress_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; - output = choose_kernel_location(input_data, input_len, - output, output_len); + /* + * The memory hole needed for the kernel is the larger of either + * the entire decompressed kernel plus relocation table, or the + * entire decompressed kernel plus .bss and .brk sections. + */ + output = choose_kernel_location(input_data, input_len, output, + output_len > run_size ? output_len + : run_size); /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index b669ab65bf6c..d8222f213182 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -36,11 +36,13 @@ int main(int argc, char *argv[]) uint32_t olen; long ilen; unsigned long offs; + unsigned long run_size; FILE *f = NULL; int retval = 1; - if (argc < 2) { - fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); + if (argc < 3) { + fprintf(stderr, "Usage: %s compressed_file run_size\n", + argv[0]); goto bail; } @@ -74,6 +76,7 @@ int main(int argc, char *argv[]) offs += olen >> 12; /* Add 8 bytes for each 32K block */ offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ + run_size = atoi(argv[2]); printf(".section \".rodata..compressed\",\"a\",@progbits\n"); printf(".globl z_input_len\n"); @@ -85,6 +88,8 @@ int main(int argc, char *argv[]) /* z_extract_offset_negative allows simplification of head_32.S */ printf(".globl z_extract_offset_negative\n"); printf("z_extract_offset_negative = -0x%lx\n", offs); + printf(".globl z_run_size\n"); + printf("z_run_size = %lu\n", run_size); printf(".globl input_data, input_data_end\n"); printf("input_data:\n"); diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 617a9e284245..b63773ba1646 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size) * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. */ -static void apply_ucode_in_initrd(void *ucode, size_t size) +static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) { struct equiv_cpu_entry *eq; size_t *cont_sz; u32 *header; u8 *data, **cont; + u8 (*patch)[PATCH_MAX_SIZE]; u16 eq_id = 0; int offset, left; u32 rev, eax, ebx, ecx, edx; @@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); cont_sz = (size_t *)__pa_nodebug(&container_size); cont = (u8 **)__pa_nodebug(&container); + patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); #else new_rev = &ucode_new_rev; cont_sz = &container_size; cont = &container; + patch = &amd_ucode_patch; #endif data = ucode; @@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) rev = mc->hdr.patch_id; *new_rev = rev; - /* save ucode patch */ - memcpy(amd_ucode_patch, mc, - min_t(u32, header[1], PATCH_MAX_SIZE)); + if (save_patch) + memcpy(patch, mc, + min_t(u32, header[1], PATCH_MAX_SIZE)); } } @@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void) *data = cp.data; *size = cp.size; - apply_ucode_in_initrd(cp.data, cp.size); + apply_ucode_in_initrd(cp.data, cp.size, true); } #ifdef CONFIG_X86_32 @@ -263,7 +266,7 @@ void load_ucode_amd_ap(void) size_t *usize; void **ucode; - mc = (struct microcode_amd *)__pa(amd_ucode_patch); + mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { __apply_microcode_amd(mc); return; @@ -275,7 +278,7 @@ void load_ucode_amd_ap(void) if (!*ucode || !*usize) return; - apply_ucode_in_initrd(*ucode, *usize); + apply_ucode_in_initrd(*ucode, *usize, false); } static void __init collect_cpu_sig_on_bsp(void *arg) @@ -339,7 +342,7 @@ void load_ucode_amd_ap(void) * AP has a different equivalence ID than BSP, looks like * mixed-steppings silicon so go through the ucode blob anew. */ - apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); + apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false); } } #endif @@ -347,7 +350,9 @@ void load_ucode_amd_ap(void) int __init save_microcode_in_initrd_amd(void) { unsigned long cont; + int retval = 0; enum ucode_state ret; + u8 *cont_va; u32 eax; if (!container) @@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void) #ifdef CONFIG_X86_32 get_bsp_sig(); - cont = (unsigned long)container; + cont = (unsigned long)container; + cont_va = __va(container); #else /* * We need the physical address of the container for both bitness since * boot_params.hdr.ramdisk_image is a physical address. */ - cont = __pa(container); + cont = __pa(container); + cont_va = container; #endif /* @@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void) if (relocated_ramdisk) container = (u8 *)(__va(relocated_ramdisk) + (cont - boot_params.hdr.ramdisk_image)); + else + container = cont_va; if (ucode_new_rev) pr_info("microcode: updated early to new patch_level=0x%08x\n", @@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void) ret = load_microcode_amd(eax, container, container_size); if (ret != UCODE_OK) - return -EINVAL; + retval = -EINVAL; /* * This will be freed any msec now, stash patches for the current @@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void) container = NULL; container_size = 0; - return 0; + return retval; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1340ebfcb467..5ee8064bd1d2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2475,6 +2475,9 @@ __init int intel_pmu_init(void) case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7461f50d5bb1..0686fe313b3b 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1441,15 +1441,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, force_sig_info(SIGTRAP, &info, tsk); } - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 -#endif - /* * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. @@ -1487,7 +1478,7 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (IS_IA32) + if (is_ia32_task()) audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, regs->bx, regs->cx, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3fe310c05994..023b5ac94807 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4911,7 +4911,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (!is_guest_mode(vcpu)) { + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c96314abd144..0004ac72dbdd 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -399,13 +399,20 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - int young; - - young = ptep_test_and_clear_young(vma, address, ptep); - if (young) - flush_tlb_page(vma, address); - - return young; + /* + * On x86 CPUs, clearing the accessed bit without a TLB flush + * doesn't cause data corruption. [ It could cause incorrect + * page aging and the (mistaken) reclaim of hot pages, but the + * chance of that should be relatively low. ] + * + * So as a performance optimization don't flush the TLB when + * clearing the accessed bit, it will eventually be flushed by + * a context switch or a VM operation anyway. [ In the rare + * event of it not getting flushed for a long time the delay + * shouldn't really matter because there's no real memory + * pressure for swapout to react to. ] + */ + return ptep_test_and_clear_young(vma, address, ptep); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl new file mode 100644 index 000000000000..0b0b124d3ece --- /dev/null +++ b/arch/x86/tools/calc_run_size.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl +# +# Calculate the amount of space needed to run the kernel, including room for +# the .bss and .brk sections. +# +# Usage: +# objdump -h a.out | perl calc_run_size.pl +use strict; + +my $mem_size = 0; +my $file_offset = 0; + +my $sections=" *[0-9]+ \.(?:bss|brk) +"; +while (<>) { + if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) { + my $size = hex($1); + my $offset = hex($2); + $mem_size += $size; + if ($file_offset == 0) { + $file_offset = $offset; + } elsif ($file_offset != $offset) { + die ".bss and .brk lack common file offset\n"; + } + } +} + +if ($file_offset == 0) { + die "Never found .bss or .brk file offset\n"; +} +printf("%d\n", $mem_size + $file_offset); |