diff options
Diffstat (limited to 'arch/arm64/kernel')
36 files changed, 3467 insertions, 330 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index b9b87fa61bac..ac389d32ccde 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,21 +4,34 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \ + -I$(src)/../../../scripts/dtc/libfdt + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg # Object file lists. arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o +arm64-obj-$(CONFIG_SMP) += topology.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o -arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 338b568cd8ae..7f0512feaa13 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -56,3 +56,7 @@ EXPORT_SYMBOL(clear_bit); EXPORT_SYMBOL(test_and_clear_bit); EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 825d76c21d84..9a9fce090d58 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -25,6 +25,8 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> @@ -150,5 +152,14 @@ int main(void) DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); + DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); + DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); + DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif return 0; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index fea84694fce4..b33051d501e6 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -306,9 +306,6 @@ static int brk_handler(unsigned long addr, unsigned int esr, if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) return 0; - pr_warn("unexpected brk exception at %lx, esr=0x%x\n", - (long)instruction_pointer(regs), esr); - if (!user_mode(regs)) return -EFAULT; diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 000000000000..66716c9b9e5f --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,109 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> + +#define EFI_LOAD_ERROR 0x8000000000000001 + + __INIT + + /* + * We arrive here from the EFI boot manager with: + * + * * CPU in little-endian mode + * * MMU on with identity-mapped RAM + * * Icache and Dcache on + * + * We will most likely be running from some place other than where + * we want to be. The kernel image wants to be placed at TEXT_OFFSET + * from start of RAM. + */ +ENTRY(efi_stub_entry) + /* + * Create a stack frame to save FP/LR with extra space + * for image_addr variable passed to efi_entry(). + */ + stp x29, x30, [sp, #-32]! + + /* + * Call efi_entry to do the real work. + * x0 and x1 are already set up by firmware. Current runtime + * address of image is calculated and passed via *image_addr. + * + * unsigned long efi_entry(void *handle, + * efi_system_table_t *sys_table, + * unsigned long *image_addr) ; + */ + adrp x8, _text + add x8, x8, #:lo12:_text + add x2, sp, 16 + str x8, [x2] + bl efi_entry + cmn x0, #1 + b.eq efi_load_fail + + /* + * efi_entry() will have relocated the kernel image if necessary + * and we return here with device tree address in x0 and the kernel + * entry point stored at *image_addr. Save those values in registers + * which are callee preserved. + */ + mov x20, x0 // DTB address + ldr x0, [sp, #16] // relocated _text address + mov x21, x0 + + /* + * Flush dcache covering current runtime addresses + * of kernel text/data. Then flush all of icache. + */ + adrp x1, _text + add x1, x1, #:lo12:_text + adrp x2, _edata + add x2, x2, #:lo12:_edata + sub x1, x2, x1 + + bl __flush_dcache_area + ic ialluis + + /* Turn off Dcache and MMU */ + mrs x0, CurrentEL + cmp x0, #PSR_MODE_EL2t + ccmp x0, #PSR_MODE_EL2h, #0x4, ne + b.ne 1f + mrs x0, sctlr_el2 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el2, x0 + isb + b 2f +1: + mrs x0, sctlr_el1 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb +2: + /* Jump to kernel entry point */ + mov x0, x20 + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x21 + +efi_load_fail: + mov x0, #EFI_LOAD_ERROR + ldp x29, x30, [sp], #32 + ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 000000000000..e786e6cdc400 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org> + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/efi.h> +#include <linux/libfdt.h> +#include <asm/sections.h> + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, + efi_char16_t *str); + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table, + void *__image, void **__fh); +static efi_status_t efi_file_close(void *handle); + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz); + +/* Include shared EFI stub code */ +#include "../../../drivers/firmware/efi/efi-stub-helper.c" +#include "../../../drivers/firmware/efi/fdt.c" +#include "../../../drivers/firmware/efi/arm-stub.c" + + +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) +{ + efi_status_t status; + unsigned long kernel_size, kernel_memsize = 0; + + /* Relocate the image, if required. */ + kernel_size = _edata - _text; + if (*image_addr != (dram_base + TEXT_OFFSET)) { + kernel_memsize = kernel_size + (_end - _edata); + status = efi_relocate_kernel(sys_table, image_addr, + kernel_size, kernel_memsize, + dram_base + TEXT_OFFSET, + PAGE_SIZE); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + return status; + } + if (*image_addr != (dram_base + TEXT_OFFSET)) { + pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); + efi_free(sys_table, kernel_memsize, *image_addr); + return EFI_ERROR; + } + *image_size = kernel_memsize; + } + + + return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 000000000000..14db1f6e8d7f --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,469 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ + uefi_debug = 1; + + return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ + if (md->attribute & EFI_MEMORY_WB) + return 1; + return 0; +} + +static void __init efi_setup_idmap(void) +{ + struct memblock_region *r; + efi_memory_desc_t *md; + u64 paddr, npages, size; + + for_each_memblock(memory, r) + create_id_mapping(r->base, r->size, 0); + + /* map runtime io spaces */ + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + continue; + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + create_id_mapping(paddr, size, 1); + } +} + +static int __init uefi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i, retval; + + efi.systab = early_memremap(efi_system_table, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) { + pr_warn("Unable to map EFI system table.\n"); + return -ENOMEM; + } + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) < 2) + pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = early_memremap(efi.systab->fw_vendor, + sizeof(vendor)); + if (c16) { + for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + retval = efi_config_init(NULL); + if (retval == 0) + set_bit(EFI_CONFIG_TABLES, &efi.flags); + + early_memunmap(c16, sizeof(vendor)); + early_memunmap(efi.systab, sizeof(efi_system_table_t)); + + return retval; +} + +static __initdata char memory_type_name[][32] = { + {"Reserved"}, + {"Loader Code"}, + {"Loader Data"}, + {"Boot Code"}, + {"Boot Data"}, + {"Runtime Code"}, + {"Runtime Data"}, + {"Conventional Memory"}, + {"Unusable Memory"}, + {"ACPI Reclaim Memory"}, + {"ACPI Memory NVS"}, + {"Memory Mapped I/O"}, + {"MMIO Port Space"}, + {"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ + if (!is_normal_ram(md)) + return 0; + + if (md->attribute & EFI_MEMORY_RUNTIME) + return 1; + + if (md->type == EFI_ACPI_RECLAIM_MEMORY || + md->type == EFI_RESERVED_TYPE) + return 1; + + return 0; +} + +static __init void reserve_regions(void) +{ + efi_memory_desc_t *md; + u64 paddr, npages, size; + + if (uefi_debug) + pr_info("Processing EFI memory map:\n"); + + for_each_efi_memory_desc(&memmap, md) { + paddr = md->phys_addr; + npages = md->num_pages; + + if (uefi_debug) + pr_info(" 0x%012llx-0x%012llx [%s]", + paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, + memory_type_name[md->type]); + + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + early_init_dt_add_memory_arch(paddr, size); + + if (is_reserve_region(md) || + md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) { + memblock_reserve(paddr, size); + if (uefi_debug) + pr_cont("*"); + } + + if (uefi_debug) + pr_cont("\n"); + } +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ + u64 size = end - start; + + if (uefi_debug) + pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); + + free_bootmem_late(start, size); + return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ + u64 map_start, map_end, total = 0; + + if (end <= start) + return total; + + map_start = (u64)memmap.phys_map; + map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); + map_start &= PAGE_MASK; + + if (start < map_end && end > map_start) { + /* region overlaps UEFI memmap */ + if (start < map_start) + total += free_one_region(start, map_start); + + if (map_end < end) + total += free_one_region(map_end, end); + } else + total += free_one_region(start, end); + + return total; +} + +static void __init free_boot_services(void) +{ + u64 total_freed = 0; + u64 keep_end, free_start, free_end; + efi_memory_desc_t *md; + + /* + * If kernel uses larger pages than UEFI, we have to be careful + * not to inadvertantly free memory we want to keep if there is + * overlap at the kernel page size alignment. We do not want to + * free is_reserve_region() memory nor the UEFI memmap itself. + * + * The memory map is sorted, so we keep track of the end of + * any previous region we want to keep, remember any region + * we want to free and defer freeing it until we encounter + * the next region we want to keep. This way, before freeing + * it, we can clip it as needed to avoid freeing memory we + * want to keep for UEFI. + */ + + keep_end = 0; + free_start = 0; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + + if (is_reserve_region(md)) { + /* + * We don't want to free any memory from this region. + */ + if (free_start) { + /* adjust free_end then free region */ + if (free_end > md->phys_addr) + free_end -= PAGE_SIZE; + total_freed += free_region(free_start, free_end); + free_start = 0; + } + keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + continue; + } + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + /* no need to free this region */ + continue; + } + + /* + * We want to free memory from this region. + */ + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (free_start) { + if (paddr <= free_end) + free_end = paddr + size; + else { + total_freed += free_region(free_start, free_end); + free_start = paddr; + free_end = paddr + size; + } + } else { + free_start = paddr; + free_end = paddr + size; + } + if (free_start < keep_end) { + free_start += PAGE_SIZE; + if (free_start >= free_end) + free_start = 0; + } + } + if (free_start) + total_freed += free_region(free_start, free_end); + + if (total_freed) + pr_info("Freed 0x%llx bytes of EFI boot services memory", + total_freed); +} + +void __init efi_init(void) +{ + struct efi_fdt_params params; + + /* Grab UEFI information placed in FDT by stub */ + if (!efi_get_fdt_params(¶ms, uefi_debug)) + return; + + efi_system_table = params.system_table; + + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); + memmap.phys_map = (void *)params.mmap; + memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map_end = memmap.map + params.mmap_size; + memmap.desc_size = params.desc_size; + memmap.desc_version = params.desc_ver; + + if (uefi_init() < 0) + return; + + reserve_regions(); +} + +void __init efi_idmap_init(void) +{ + if (!efi_enabled(EFI_BOOT)) + return; + + /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ + efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ + u64 paddr, vaddr, npages, size; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + vaddr = (__force u64)ioremap_cache(paddr, size); + else + vaddr = (__force u64)ioremap(paddr, size); + + if (!vaddr) { + pr_err("Unable to remap 0x%llx pages @ %p\n", + npages, (void *)paddr); + return 0; + } + + /* adjust for any rounding when EFI and system pagesize differs */ + md->virt_addr = vaddr + (md->phys_addr - paddr); + + if (uefi_debug) + pr_info(" EFI remap 0x%012llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + memcpy(*new, md, memmap.desc_size); + *new += memmap.desc_size; + + return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + phys_addr_t virtmap_phys; + void *virtmap, *virt_md; + efi_status_t status; + u64 mapsize; + int count = 0; + unsigned long flags; + + if (!efi_enabled(EFI_BOOT)) { + pr_info("EFI services will not be available.\n"); + return -1; + } + + pr_info("Remapping and enabling EFI services.\n"); + + /* replace early memmap mapping with permanent mapping */ + mapsize = memmap.map_end - memmap.map; + early_memunmap(memmap.map, mapsize); + memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, + mapsize); + memmap.map_end = memmap.map + mapsize; + + efi.memmap = &memmap; + + /* Map the runtime regions */ + virtmap = kmalloc(mapsize, GFP_KERNEL); + if (!virtmap) { + pr_err("Failed to allocate EFI virtual memmap\n"); + return -1; + } + virtmap_phys = virt_to_phys(virtmap); + virt_md = virtmap; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (remap_region(md, &virt_md)) + ++count; + } + + efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + if (efi.systab) + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + local_irq_save(flags); + cpu_switch_mm(idmap_pg_dir, &init_mm); + + /* Call SetVirtualAddressMap with the physical address of the map */ + runtime = efi.systab->runtime; + efi.set_virtual_address_map = runtime->set_virtual_address_map; + + status = efi.set_virtual_address_map(count * memmap.desc_size, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)virtmap_phys); + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + local_irq_restore(flags); + + kfree(virtmap); + + free_boot_services(); + + if (status != EFI_SUCCESS) { + pr_err("Failed to set EFI virtual address map! [%lx]\n", + status); + return -1; + } + + /* Set up runtime services function pointers */ + runtime = efi.systab->runtime; + efi.get_time = runtime->get_time; + efi.set_time = runtime->set_time; + efi.get_wakeup_time = runtime->get_wakeup_time; + efi.set_wakeup_time = runtime->set_wakeup_time; + efi.get_variable = runtime->get_variable; + efi.get_next_variable = runtime->get_next_variable; + efi.set_variable = runtime->set_variable; + efi.query_variable_info = runtime->query_variable_info; + efi.update_capsule = runtime->update_capsule; + efi.query_capsule_caps = runtime->query_capsule_caps; + efi.get_next_high_mono_count = runtime->get_next_high_mono_count; + efi.reset_system = runtime->reset_system; + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + return 0; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 000000000000..b051871f2965 --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + * mov x0, x30 + * bl _mcount + * [function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp => 0:+-----+ + * in _mcount() | x29 | -> instrumented function's fp + * +-----+ + * | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp => +16:+-----+ + * when instrumented | | + * function calls | ... | + * _mcount() | | + * | | + * instrumented => +xx:+-----+ + * function's fp | x29 | -> parent's fp + * +-----+ + * | x30 | -> instrumented function's lr (= parent's pc) + * +-----+ + * | ... | + */ + + .macro mcount_enter + stp x29, x30, [sp, #-16]! + mov x29, sp + .endm + + .macro mcount_exit + ldp x29, x30, [sp], #16 + ret + .endm + + .macro mcount_adjust_addr rd, rn + sub \rd, \rn, #AARCH64_INSN_SIZE + .endm + + /* for instrumented function's parent */ + .macro mcount_get_parent_fp reg + ldr \reg, [x29] + ldr \reg, [\reg] + .endm + + /* for instrumented function */ + .macro mcount_get_pc0 reg + mcount_adjust_addr \reg, x30 + .endm + + .macro mcount_get_pc reg + ldr \reg, [x29, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr reg + ldr \reg, [x29] + ldr \reg, [\reg, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr_addr reg + ldr \reg, [x29] + add \reg, \reg, #8 + .endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ldr x0, =ftrace_trace_stop + ldr x0, [x0] // if ftrace_trace_stop + ret // return; +#endif + mcount_enter + + ldr x0, =ftrace_trace_function + ldr x2, [x0] + adr x0, ftrace_stub + cmp x0, x2 // if (ftrace_trace_function + b.eq skip_ftrace_call // != ftrace_stub) { + + mcount_get_pc x0 // function's pc + mcount_get_lr x1 // function's lr (= parent's pc) + blr x2 // (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call: // return; + mcount_exit // } +#else + mcount_exit // return; + // } +skip_ftrace_call: + ldr x1, =ftrace_graph_return + ldr x2, [x1] // if ((ftrace_graph_return + cmp x0, x2 // != ftrace_stub) + b.ne ftrace_graph_caller + + ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry + ldr x2, [x1] // != ftrace_graph_entry_stub)) + ldr x0, =ftrace_graph_entry_stub + cmp x0, x2 + b.ne ftrace_graph_caller // ftrace_graph_caller(); + + mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) + ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) + mcount_enter + + mcount_get_pc0 x0 // function's pc + mcount_get_lr x1 // function's lr + + .global ftrace_call +ftrace_call: // tracer(pc, lr); + nop // This will be replaced with "bl xxx" + // where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) + mcount_get_lr_addr x0 // pointer to function's saved lr + mcount_get_pc x1 // function's pc + mcount_get_parent_fp x2 // parent's fp + bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp) + + mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) + str x0, [sp, #-16]! + mov x0, x29 // parent's fp + bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); + mov x30, x0 // restore the original return address + ldr x0, [sp], #16 + ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 0aca56ac8a32..56ef569b2b62 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -275,7 +275,6 @@ el1_sp_pc: * Stack or PC alignment exception handling */ mrs x0, far_el1 - mov x1, x25 mov x2, sp b do_sp_pc_abort el1_undef: @@ -646,8 +645,9 @@ el0_svc_naked: // compat entry point enable_irq get_thread_info tsk - ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing - tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys @@ -663,9 +663,8 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x1, sp - mov w0, #0 // trace entry - bl syscall_trace + mov x0, sp + bl syscall_trace_enter adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs @@ -680,9 +679,8 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 - mov x1, sp - mov w0, #1 // trace exit - bl syscall_trace + mov x0, sp + bl syscall_trace_exit b ret_to_user /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2fa308e4a1fa..522df9c7f3a4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> @@ -85,6 +86,66 @@ void fpsimd_flush_thread(void) preempt_enable(); } +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin(void) +{ + /* Avoid using the NEON in interrupt context */ + BUG_ON(in_interrupt()); + preempt_disable(); + + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); +} +EXPORT_SYMBOL(kernel_neon_begin); + +void kernel_neon_end(void) +{ + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + + preempt_enable(); +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_EXIT: + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + /* * FP/SIMD support code initialisation. */ @@ -103,6 +164,8 @@ static int __init fpsimd_init(void) else elf_hwcap |= HWCAP_ASIMD; + fpsimd_pm_init(); + return 0; } late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 000000000000..649890a3ac4e --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,177 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + /* + * Note: + * Due to modules and __init, code can disappear and change, + * we need to protect against faulting as well as code changing. + * We do this by aarch64_insn_*() which use the probe_kernel_*(). + * + * No lock is held here because all the modifications are run + * through stop_machine(). + */ + if (validate) { + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + if (aarch64_insn_patch_text_nosync((void *)pc, new)) + return -EPERM; + + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_branch_imm(pc, addr, true); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, addr, true); + new = aarch64_insn_gen_nop(); + + return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *)data = 0; + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 branch, nop; + + branch = aarch64_insn_gen_branch_imm(pc, + (unsigned long)ftrace_graph_caller, false); + nop = aarch64_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 39a8a83f1883..bbc9fe1658fa 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -22,6 +22,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> @@ -96,8 +97,18 @@ /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ +#ifdef CONFIG_EFI +efi_head: + /* + * This add instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + add x13, x18, #0x16 + b stext +#else b stext // branch to kernel start, magic .long 0 // reserved +#endif .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved @@ -108,7 +119,109 @@ .byte 0x52 .byte 0x4d .byte 0x64 +#ifdef CONFIG_EFI + .long pe_header - efi_head // Offset to the PE header. +#else .word 0 // reserved +#endif + +#ifdef CONFIG_EFI + .align 3 +pe_header: + .ascii "PE" + .short 0 +coff_header: + .short 0xaa64 // AArch64 + .short 2 // nr_sections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 1 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short 0x206 // Characteristics. + // IMAGE_FILE_DEBUG_STRIPPED | + // IMAGE_FILE_EXECUTABLE_IMAGE | + // IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: + .short 0x20b // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long _edata - stext // SizeOfCode + .long 0 // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long efi_stub_entry - efi_head // AddressOfEntryPoint + .long stext - efi_head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long 0x20 // SectionAlignment + .long 0x8 // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _edata - efi_head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long stext - efi_head // SizeOfHeaders + .long 0 // CheckSum + .short 0xa // Subsystem (EFI application) + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long 0x6 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + + /* + * The EFI application loader requires a relocation section + * because EFI applications must be relocatable. This is a + * dummy section as far as we are concerned. + */ + .ascii ".reloc" + .byte 0 + .byte 0 // end of 0 padding of section name + .long 0 + .long 0 + .long 0 // SizeOfRawData + .long 0 // PointerToRawData + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long 0x42100040 // Characteristics (section flags) + + + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 // end of 0 padding of section name + .long _edata - stext // VirtualSize + .long stext - efi_head // VirtualAddress + .long _edata - stext // SizeOfRawData + .long stext - efi_head // PointerToRawData + + .long 0 // PointerToRelocations (0 for executables) + .long 0 // PointerToLineNumbers (0 for executables) + .short 0 // NumberOfRelocations (0 for executables) + .short 0 // NumberOfLineNumbers (0 for executables) + .long 0xe0500020 // Characteristics (section flags) + .align 5 +#endif ENTRY(stext) mov x21, x0 // x21=FDT @@ -148,12 +261,22 @@ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - b.eq 1f + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb ret /* Hyp configuration. */ -1: mov x0, #(1 << 31) // 64-bit EL1 +2: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -162,6 +285,23 @@ ENTRY(el2_setup) msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs_s x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 @@ -170,7 +310,8 @@ ENTRY(el2_setup) /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits - movk x0, #0x30d0, lsl #16 +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -207,11 +348,7 @@ ENTRY(set_cpu_boot_mode_flag) cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: dc cvac, x1 // Clean potentially dirty cache line - dsb sy - str w20, [x1] // This CPU has booted in EL1 - dc civac, x1 // Clean&invalidate potentially stale cache line - dsb sy +1: str w20, [x1] // This CPU has booted in EL1 ret ENDPROC(set_cpu_boot_mode_flag) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 5ab825c59db9..6de3460ede4c 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,13 +20,14 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include <linux/compat.h> +#include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> @@ -169,15 +170,68 @@ static enum debug_el debug_exception_level(int privilege) } } -/* - * Install a perf counter breakpoint. +enum hw_breakpoint_ops { + HW_BREAKPOINT_INSTALL, + HW_BREAKPOINT_UNINSTALL, + HW_BREAKPOINT_RESTORE +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + * operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + * slot index on success + * -ENOSPC if no slot is available/matches + * -EINVAL on wrong operations parameter */ -int arch_install_hw_breakpoint(struct perf_event *bp) +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, + struct perf_event *bp, + enum hw_breakpoint_ops ops) +{ + int i; + struct perf_event **slot; + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + switch (ops) { + case HW_BREAKPOINT_INSTALL: + if (!*slot) { + *slot = bp; + return i; + } + break; + case HW_BREAKPOINT_UNINSTALL: + if (*slot == bp) { + *slot = NULL; + return i; + } + break; + case HW_BREAKPOINT_RESTORE: + if (*slot == bp) + return i; + break; + default: + pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); + return -EINVAL; + } + } + return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, + enum hw_breakpoint_ops ops) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; + struct perf_event **slots; struct debug_info *debug_info = ¤t->thread.debug; int i, max_slots, ctrl_reg, val_reg, reg_enable; + enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); u32 ctrl; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { @@ -196,67 +250,54 @@ int arch_install_hw_breakpoint(struct perf_event *bp) reg_enable = !debug_info->wps_disabled; } - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (!*slot) { - *slot = bp; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return -ENOSPC; + i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); - /* Ensure debug monitors are enabled at the correct exception level. */ - enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) + return i; - /* Setup the address register. */ - write_wb_reg(val_reg, i, info->address); + switch (ops) { + case HW_BREAKPOINT_INSTALL: + /* + * Ensure debug monitors are enabled at the correct exception + * level. + */ + enable_debug_monitors(dbg_el); + /* Fall through */ + case HW_BREAKPOINT_RESTORE: + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); + + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, + reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + break; + case HW_BREAKPOINT_UNINSTALL: + /* Reset the control register. */ + write_wb_reg(ctrl_reg, i, 0); - /* Setup the control register. */ - ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + /* + * Release the debug monitors for the correct exception + * level. + */ + disable_debug_monitors(dbg_el); + break; + } return 0; } -void arch_uninstall_hw_breakpoint(struct perf_event *bp) +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; - int i, max_slots, base; - - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - /* Breakpoint */ - base = AARCH64_DBG_REG_BCR; - slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps; - } else { - /* Watchpoint */ - base = AARCH64_DBG_REG_WCR; - slots = __get_cpu_var(wp_on_reg); - max_slots = core_num_wrps; - } - - /* Remove the breakpoint. */ - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return; - - /* Reset the control register. */ - write_wb_reg(base, i, 0); + return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} - /* Release the debug monitors for the correct exception level. */ - disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL); } static int get_hbp_len(u8 hbp_len) @@ -806,18 +847,36 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void reset_ctrl_regs(void *unused) +static void hw_breakpoint_reset(void *unused) { int i; - - for (i = 0; i < core_num_brps; ++i) { - write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + struct perf_event **slots; + /* + * When a CPU goes through cold-boot, it does not have any installed + * slot, so it is safe to share the same function for restoring and + * resetting breakpoints; when a CPU is hotplugged in, it goes + * through the slots, which are all empty, hence it just resets control + * and value for debug registers. + * When this function is triggered on warm-boot through a CPU PM + * notifier some slots might be initialized; if so they are + * reprogrammed according to the debug slots content. + */ + for (slots = __get_cpu_var(bp_on_reg), i = 0; i < core_num_brps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } } - for (i = 0; i < core_num_wrps; ++i) { - write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + for (slots = __get_cpu_var(wp_on_reg), i = 0; i < core_num_wrps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } } } @@ -827,7 +886,7 @@ static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self, { int cpu = (long)hcpu; if (action == CPU_ONLINE) - smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } @@ -835,6 +894,14 @@ static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; +#ifdef CONFIG_ARM64_CPU_SUSPEND +extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +#else +static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +} +#endif + /* * One-time initialisation. */ @@ -850,8 +917,8 @@ static int __init arch_hw_breakpoint_init(void) * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ - smp_call_function(reset_ctrl_regs, NULL, 1); - reset_ctrl_regs(NULL); + smp_call_function(hw_breakpoint_reset, NULL, 1); + hw_breakpoint_reset(NULL); /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, @@ -861,6 +928,8 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&hw_breakpoint_reset_nb); + /* Register cpu_suspend hw breakpoint restore hook */ + cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); return 0; } diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 0959611d9ff1..a272f335c289 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 000000000000..92f36835486b --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ + return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ + if (!aarch64_insn_is_hint(insn)) + return false; + + switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_YIELD: + case AARCH64_INSN_HINT_WFE: + case AARCH64_INSN_HINT_WFI: + case AARCH64_INSN_HINT_SEV: + case AARCH64_INSN_HINT_SEVL: + return false; + default: + return true; + } +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + if (!ret) + *insnp = le32_to_cpu(val); + + return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ + insn = cpu_to_le32(insn); + return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) + return false; + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_svc(insn) || + aarch64_insn_is_hvc(insn) || + aarch64_insn_is_smc(insn) || + aarch64_insn_is_brk(insn) || + aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ + return __aarch64_insn_hotpatch_safe(old_insn) && + __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ + u32 *tp = addr; + int ret; + + /* A64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + + ret = aarch64_insn_write(tp, insn); + if (ret == 0) + flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); + + return ret; +} + +struct aarch64_insn_patch { + void **text_addrs; + u32 *new_insns; + int insn_cnt; + atomic_t cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ + int i, ret = 0; + struct aarch64_insn_patch *pp = arg; + + /* The first CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == 1) { + for (i = 0; ret == 0 && i < pp->insn_cnt; i++) + ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], + pp->new_insns[i]); + /* + * aarch64_insn_patch_text_nosync() calls flush_icache_range(), + * which ends with "dsb; isb" pair guaranteeing global + * visibility. + */ + atomic_set(&pp->cpu_count, -1); + } else { + while (atomic_read(&pp->cpu_count) != -1) + cpu_relax(); + isb(); + } + + return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ + struct aarch64_insn_patch patch = { + .text_addrs = addrs, + .new_insns = insns, + .insn_cnt = cnt, + .cpu_count = ATOMIC_INIT(0), + }; + + if (cnt <= 0) + return -EINVAL; + + return stop_machine(aarch64_insn_patch_text_cb, &patch, + cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ + int ret; + u32 insn; + + /* Unsafe to patch multiple instructions without synchronizaiton */ + if (cnt == 1) { + ret = aarch64_insn_read(addrs[0], &insn); + if (ret) + return ret; + + if (aarch64_insn_hotpatch_safe(insn, insns[0])) { + /* + * ARMv8 architecture doesn't guarantee all CPUs see + * the new instruction after returning from function + * aarch64_insn_patch_text_nosync(). So send IPIs to + * all other CPUs to achieve instruction + * synchronization. + */ + ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); + kick_all_cpus_sync(); + return ret; + } + } + + return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm) +{ + u32 immlo, immhi, lomask, himask, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + lomask = 0x3; + himask = 0x7ffff; + immlo = imm & lomask; + imm >>= 2; + immhi = imm & himask; + imm = (immlo << 24) | (immhi); + mask = (lomask << 24) | (himask); + shift = 5; + break; + case AARCH64_INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case AARCH64_INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + default: + pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + + /* Update the immediate field. */ + insn &= ~(mask << shift); + insn |= (imm & mask) << shift; + + return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, + enum aarch64_insn_branch_type type) +{ + u32 insn; + long offset; + + /* + * PC: A 64-bit Program Counter holding the address of the current + * instruction. A64 instructions must be word-aligned. + */ + BUG_ON((pc & 0x3) || (addr & 0x3)); + + /* + * B/BL support [-128M, 128M) offset + * ARM64 virtual address arrangement guarantees all kernel and module + * texts are within +/-128M. + */ + offset = ((long)addr - (long)pc); + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + + if (type == AARCH64_INSN_BRANCH_LINK) + insn = aarch64_insn_get_bl_value(); + else + insn = aarch64_insn_get_b_value(); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, + offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ + return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ + return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 000000000000..263a166291fb --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + bool is_static) +{ + void *addr = (void *)entry->code; + u32 insn; + + if (type == JUMP_LABEL_ENABLE) { + insn = aarch64_insn_gen_branch_imm(entry->code, + entry->target, + AARCH64_INSN_BRANCH_NOLINK); + } else { + insn = aarch64_insn_gen_nop(); + } + + if (is_static) + aarch64_insn_patch_text_nosync(addr, insn); + else + aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, true); +} + +#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 87542397b3ac..7787208e8cc6 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@ * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ + +#include <asm/unistd32.h> + .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -74,3 +77,42 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + + /* + * ARM code + */ + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index ca0e3d55da99..df08a6e0287d 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,6 +25,10 @@ #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> +#include <asm/insn.h> + +#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX +#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 void *module_alloc(unsigned long size) { @@ -94,25 +98,18 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) return 0; } -enum aarch64_imm_type { - INSN_IMM_MOVNZ, - INSN_IMM_MOVK, - INSN_IMM_ADR, - INSN_IMM_26, - INSN_IMM_19, - INSN_IMM_16, - INSN_IMM_14, - INSN_IMM_12, - INSN_IMM_9, -}; - -static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, enum aarch64_insn_imm_type imm_type) { - u32 immlo, immhi, lomask, himask, mask; - int shift; + u64 imm, limit = 0; + s64 sval; + u32 insn = le32_to_cpu(*(u32 *)place); + + sval = do_reloc(op, place, val); + sval >>= lsb; + imm = sval & 0xffff; - switch (type) { - case INSN_IMM_MOVNZ: + if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* * For signed MOVW relocations, we have to manipulate the * instruction encoding depending on whether or not the @@ -131,70 +128,12 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) */ imm = ~imm; } - case INSN_IMM_MOVK: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_ADR: - lomask = 0x3; - himask = 0x7ffff; - immlo = imm & lomask; - imm >>= 2; - immhi = imm & himask; - imm = (immlo << 24) | (immhi); - mask = (lomask << 24) | (himask); - shift = 5; - break; - case INSN_IMM_26: - mask = BIT(26) - 1; - shift = 0; - break; - case INSN_IMM_19: - mask = BIT(19) - 1; - shift = 5; - break; - case INSN_IMM_16: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_14: - mask = BIT(14) - 1; - shift = 5; - break; - case INSN_IMM_12: - mask = BIT(12) - 1; - shift = 10; - break; - case INSN_IMM_9: - mask = BIT(9) - 1; - shift = 12; - break; - default: - pr_err("encode_insn_immediate: unknown immediate encoding %d\n", - type); - return 0; + imm_type = AARCH64_INSN_IMM_MOVK; } - /* Update the immediate field. */ - insn &= ~(mask << shift); - insn |= (imm & mask) << shift; - - return insn; -} - -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_imm_type imm_type) -{ - u64 imm, limit = 0; - s64 sval; - u32 insn = *(u32 *)place; - - sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; - /* Update the instruction with the new encoding. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* Shift out the immediate field. */ sval >>= 16; @@ -203,9 +142,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * For unsigned immediates, the overflow check is straightforward. * For signed immediates, the sign bit is actually the bit past the * most significant bit of the field. - * The INSN_IMM_16 immediate type is unsigned. + * The AARCH64_INSN_IMM_16 immediate type is unsigned. */ - if (imm_type != INSN_IMM_16) { + if (imm_type != AARCH64_INSN_IMM_16) { sval++; limit++; } @@ -218,11 +157,11 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, } static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, int len, enum aarch64_imm_type imm_type) + int lsb, int len, enum aarch64_insn_imm_type imm_type) { u64 imm, imm_mask; s64 sval; - u32 insn = *(u32 *)place; + u32 insn = le32_to_cpu(*(u32 *)place); /* Calculate the relocation value. */ sval = do_reloc(op, place, val); @@ -233,7 +172,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, imm = sval & imm_mask; /* Update the instruction's immediate field. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* * Extract the upper value bits (including the sign bit) and @@ -315,125 +255,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; /* Immediate instruction relocations. */ case R_AARCH64_LD_PREL_LO19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_ADR_PREL_LO21: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST16_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST32_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST64_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST128_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_TSTBR14: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, - INSN_IMM_14); + AARCH64_INSN_IMM_14); break; case R_AARCH64_CONDBR19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, - INSN_IMM_26); + AARCH64_INSN_IMM_26); break; default: diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cea1594ff933..dfcd8fadde3c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -22,6 +22,7 @@ #include <linux/bitmap.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/perf_event.h> @@ -363,26 +364,53 @@ validate_group(struct perf_event *event) } static void +armpmu_disable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + disable_percpu_irq(irq); +} + +static void armpmu_release_hardware(struct arm_pmu *armpmu) { - int i, irq, irqs; + int irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (!irqs) + return; - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, armpmu); + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) + return; + + if (irq_is_percpu(irq)) { + on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &cpu_hw_events); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq > 0) + free_irq(irq, armpmu); + } } } +static void +armpmu_enable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int i, err, irq, irqs; + int err, irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; if (!pmu_device) { @@ -391,39 +419,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) } irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { + if (!irqs) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) { + pr_err("failed to get valid irq for PMU device\n"); + return -ENODEV; + } - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, armpmu->handle_irq, + "arm-pmu", &cpu_hw_events); - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING, - "arm-pmu", armpmu); if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); + pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", + irq); armpmu_release_hardware(armpmu); return err; } - cpumask_set_cpu(i, &armpmu->active_irqs); + on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq <= 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } } return 0; @@ -1299,8 +1347,8 @@ early_initcall(init_hw_perf_events); * Callchain handling code. */ struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long lr; } __attribute__((packed)); /* @@ -1337,22 +1385,84 @@ user_backtrace(struct frame_tail __user *tail, return buftail.fp; } +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } perf_callchain_store(entry, regs->pc); - tail = (struct frame_tail __user *)regs->regs[29]; - while (entry->nr < PERF_MAX_STACK_DEPTH && - tail && !((unsigned long)tail & 0xf)) - tail = user_backtrace(tail, entry); + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = compat_user_backtrace(tail, entry); +#endif + } } /* @@ -1380,6 +1490,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 000000000000..422ebd63b619 --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,46 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> + +#include <asm/compat.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) + return 0; + + /* + * Compat (i.e. 32 bit) mode: + * - PC has been set in the pt_regs struct in kernel_entry, + * - Handle SP and LR here. + */ + if (compat_user_mode(regs)) { + if ((u32)idx == PERF_REG_ARM64_SP) + return regs->compat_sp; + if ((u32)idx == PERF_REG_ARM64_LR) + return regs->compat_lr; + } + + return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (is_compat_thread(task_thread_info(task))) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 57bd961f2917..75c6b1e0d606 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@ #include <stdarg.h> +#include <linux/compat.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -33,6 +34,7 @@ #include <linux/kallsyms.h> #include <linux/init.h> #include <linux/cpu.h> +#include <linux/cpuidle.h> #include <linux/elfcore.h> #include <linux/pm.h> #include <linux/tick.h> @@ -107,8 +109,10 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - cpu_do_idle(); - local_irq_enable(); + if (cpuidle_idle_call()) { + cpu_do_idle(); + local_irq_enable(); + } } #ifdef CONFIG_HOTPLUG_CPU @@ -199,9 +203,27 @@ void exit_thread(void) { } +static void tls_thread_flush(void) +{ + asm ("msr tpidr_el0, xzr"); + + if (is_compat_task()) { + current->thread.tp_value = 0; + + /* + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + */ + barrier(); + asm ("msr tpidrro_el0, xzr"); + } +} + void flush_thread(void) { fpsimd_flush_thread(); + tls_thread_flush(); flush_ptrace_hw_breakpoint(current); } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ea4828a4aa96..0e32ab453e5b 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -15,15 +15,18 @@ #define pr_fmt(fmt) "psci: " fmt +#include <linux/cpuidle.h> #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> +#include <linux/slab.h> #include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/psci.h> #include <asm/smp_plat.h> +#include <asm/suspend.h> #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 @@ -54,6 +57,8 @@ enum psci_function { PSCI_FN_MAX, }; +static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state); + static u32 psci_function_id[PSCI_FN_MAX]; #define PSCI_RET_SUCCESS 0 @@ -94,6 +99,17 @@ static u32 psci_power_state_pack(struct psci_power_state state) << PSCI_POWER_STATE_AFFL_SHIFT); } +static void psci_power_state_unpack(u32 power_state, + struct psci_power_state *state) +{ + state->id = (power_state >> PSCI_POWER_STATE_ID_SHIFT) + & PSCI_POWER_STATE_ID_MASK; + state->type = (power_state >> PSCI_POWER_STATE_TYPE_SHIFT) + & PSCI_POWER_STATE_TYPE_MASK; + state->affinity_level = (power_state >> PSCI_POWER_STATE_AFFL_SHIFT) + & PSCI_POWER_STATE_AFFL_MASK; +} + /* * The following two functions are invoked via the invoke_psci_fn pointer * and will not be inlined, allowing us to piggyback on the AAPCS. @@ -176,6 +192,77 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; +int __init psci_dt_register_idle_states(struct cpuidle_driver *drv, + struct device_node *state_nodes[]) +{ + int cpu, i; + struct psci_power_state *psci_states; + const struct cpu_operations *cpu_ops_ptr; + + if (!state_nodes) + return -EINVAL; + /* + * This is belt-and-braces: make sure that if the idle + * specified protocol is psci, the cpu_ops have been + * initialized to psci operations. Anything else is + * a recipe for mayhem. + */ + for_each_cpu(cpu, drv->cpumask) { + cpu_ops_ptr = cpu_ops[cpu]; + if (WARN_ON(!cpu_ops_ptr || strcmp(cpu_ops_ptr->name, "psci"))) + return -EOPNOTSUPP; + } + + psci_states = kcalloc(drv->state_count, sizeof(*psci_states), + GFP_KERNEL); + + if (!psci_states) { + pr_warn("psci idle state allocation failed\n"); + return -ENOMEM; + } + + for_each_cpu(cpu, drv->cpumask) { + if (per_cpu(psci_power_state, cpu)) { + pr_warn("idle states already initialized on cpu %u\n", + cpu); + continue; + } + per_cpu(psci_power_state, cpu) = psci_states; + } + + + for (i = 0; i < drv->state_count; i++) { + u32 psci_power_state; + + if (!state_nodes[i]) { + /* + * An index with a missing node pointer falls back to + * simple STANDBYWFI + */ + psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY; + continue; + } + + if (of_property_read_u32(state_nodes[i], "entry-method-param", + &psci_power_state)) { + pr_warn(" * %s missing entry-method-param property\n", + state_nodes[i]->full_name); + /* + * If entry-method-param property is missing, fall + * back to STANDBYWFI state + */ + psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY; + continue; + } + + pr_debug("psci-power-state %#x index %u\n", + psci_power_state, i); + psci_power_state_unpack(psci_power_state, &psci_states[i]); + } + + return 0; +} + void __init psci_init(void) { struct device_node *np; @@ -279,6 +366,18 @@ static void cpu_psci_cpu_die(unsigned int cpu) } #endif +#ifdef CONFIG_ARM64_CPU_SUSPEND +static int cpu_psci_cpu_suspend(unsigned long index) +{ + struct psci_power_state *state = __get_cpu_var(psci_power_state); + + if (!state) + return -EOPNOTSUPP; + + return psci_ops.cpu_suspend(state[index], virt_to_phys(cpu_resume)); +} +#endif + const struct cpu_operations cpu_psci_ops = { .name = "psci", .cpu_init = cpu_psci_cpu_init, @@ -288,6 +387,9 @@ const struct cpu_operations cpu_psci_ops = { .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, #endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + .cpu_suspend = cpu_psci_cpu_suspend, +#endif }; #endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c484d5625ffb..bc09a147454f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -41,6 +42,9 @@ #include <asm/traps.h> #include <asm/system_misc.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -81,7 +85,8 @@ static void ptrace_hbptriggered(struct perf_event *bp, break; } } - for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) { + + for (i = 0; i < ARM_MAX_WRP; ++i) { if (current->thread.debug.hbp_watch[i] == bp) { info.si_errno = -((i << 1) + 1); break; @@ -634,28 +639,27 @@ static int compat_gpr_get(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; switch (idx) { case 15: - reg = (void *)&task_pt_regs(target)->pc; + reg = task_pt_regs(target)->pc; break; case 16: - reg = (void *)&task_pt_regs(target)->pstate; + reg = task_pt_regs(target)->pstate; break; case 17: - reg = (void *)&task_pt_regs(target)->orig_x0; + reg = task_pt_regs(target)->orig_x0; break; default: - reg = (void *)&task_pt_regs(target)->regs[idx]; + reg = task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); - + ret = copy_to_user(ubuf, ®, sizeof(reg)); if (ret) break; - else - ubuf += sizeof(compat_ulong_t); + + ubuf += sizeof(reg); } return ret; @@ -683,28 +687,28 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; + + ubuf += sizeof(reg); switch (idx) { case 15: - reg = (void *)&newregs.pc; + newregs.pc = reg; break; case 16: - reg = (void *)&newregs.pstate; + newregs.pstate = reg; break; case 17: - reg = (void *)&newregs.orig_x0; + newregs.orig_x0 = reg; break; default: - reg = (void *)&newregs.regs[idx]; + newregs.regs[idx] = reg; } - ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); - - if (ret) - goto out; - else - ubuf += sizeof(compat_ulong_t); } if (valid_user_regs(&newregs.user_regs)) @@ -712,7 +716,6 @@ static int compat_gpr_set(struct task_struct *target, else ret = -EINVAL; -out: return ret; } @@ -823,6 +826,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -830,10 +834,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } @@ -1060,35 +1067,49 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { + int regno; unsigned long saved_reg; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return regs->syscallno; - - if (is_compat_task()) { - /* AArch32 uses ip (r12) for scratch */ - saved_reg = regs->regs[12]; - regs->regs[12] = dir; - } else { - /* - * Save X7. X7 is used to denote syscall entry/exit: - * X7 = 0 -> entry, = 1 -> exit - */ - saved_reg = regs->regs[7]; - regs->regs[7] = dir; - } + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; - if (dir) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) regs->syscallno = ~0UL; - if (is_compat_task()) - regs->regs[12] = saved_reg; - else - regs->regs[7] = saved_reg; + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->syscallno); return regs->syscallno; } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 000000000000..89102a6ffad5 --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->pc; + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 2; + data.addr = NULL; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)return_address; /* dummy */ + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5fdfc0255953..e87b5fd07b8c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -41,6 +41,7 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/of_platform.h> +#include <linux/efi.h> #include <asm/fixmap.h> #include <asm/cputype.h> @@ -55,6 +56,7 @@ #include <asm/traps.h> #include <asm/memblock.h> #include <asm/psci.h> +#include <asm/efi.h> unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -62,6 +64,17 @@ EXPORT_SYMBOL(processor_id); unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + static const char *cpu_name; static const char *machine_name; phys_addr_t __fdt_pointer __initdata; @@ -114,9 +127,79 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) return phys_id == cpu_logical_map(cpu); } +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity, fs[4], bits[4], ls; + u64 mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits %#llx\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 4; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR_EL1 by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 32 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR_EL1 through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + + fs[3] - (bits[2] + bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.shift_aff[3], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + static void __init setup_processor(void) { struct cpu_info *cpu_info; + u64 features, block; cpu_info = lookup_processor_type(read_cpuid_id()); if (!cpu_info) { @@ -130,8 +213,71 @@ static void __init setup_processor(void) printk("CPU: %s [%08x] revision %d\n", cpu_name, read_cpuid_id(), read_cpuid_id() & 15); - sprintf(init_utsname()->machine, "aarch64"); + sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; + + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the Aarch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -273,11 +419,14 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + efi_init(); arm64_memblock_init(); paging_init(); request_standard_resources(); + efi_idmap_init(); + unflatten_device_tree(); psci_init(); @@ -286,6 +435,7 @@ void __init setup_arch(char **cmdline_p) cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); + smp_build_mpidr_hash(); #endif #ifdef CONFIG_VT @@ -324,6 +474,12 @@ subsys_initcall(topology_init); static const char *hwcap_str[] = { "fp", "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", NULL }; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 7ff2eee96c6b..e3cf09626245 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/personality.h> @@ -25,7 +26,6 @@ #include <linux/tracehook.h> #include <linux/ratelimit.h> -#include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/elf.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 3edf7f48c54b..e51bbe79f5b5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -100,34 +100,6 @@ struct compat_rt_sigframe { #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_compat_sigreturn) -#define SVC_SYS_SIGRETURN (0xef000000 | __NR_compat_sigreturn) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_compat_rt_sigreturn) -#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_compat_rt_sigreturn) - -/* - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_compat_sigreturn) << 16) | \ - 0x2700 | __NR_compat_sigreturn) -#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ - 0x2700 | __NR_compat_rt_sigreturn) - -const compat_ulong_t aarch32_sigret_code[6] = { - /* - * AArch32 sigreturn code. - * We don't construct an OABI SWI - instead we just set the imm24 field - * to the EABI syscall number so that we create a sane disassembly. - */ - MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, -}; - static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { compat_sigset_t cset; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 000000000000..b1925729c692 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + * u32 aff0, aff1, aff2, aff3; + * u64 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * aff2 = mpidr_masked & 0xff00000000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask + and \mpidr, \mpidr, \mask // mask out MPIDR bits + and \dst, \mpidr, #0xff // mask=aff0 + lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 + and \mask, \mpidr, #0xff00 // mask = aff1 + lsr \mask ,\mask, \rs1 + orr \dst, \dst, \mask // dst|=(aff1>>rs1) + and \mask, \mpidr, #0xff0000 // mask = aff2 + lsr \mask ,\mask, \rs2 + orr \dst, \dst, \mask // dst|=(aff2>>rs2) + and \mask, \mpidr, #0xff00000000 // mask = aff3 + lsr \mask ,\mask, \rs3 + orr \dst, \dst, \mask // dst|=(aff3>>rs3) + .endm +/* + * Save CPU state for a suspend. This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + * x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) + stp x29, lr, [sp, #-96]! + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + mov x2, sp + sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx + mov x1, sp + /* + * x1 now points to struct cpu_suspend_ctx allocated on the stack + */ + str x2, [x1, #CPU_CTX_SP] + ldr x2, =sleep_save_sp + ldr x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP + mrs x7, mpidr_el1 + ldr x9, =mpidr_hash + ldr x10, [x9, #MPIDR_HASH_MASK] + /* + * Following code relies on the struct mpidr_hash + * members size. + */ + ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 + add x2, x2, x8, lsl #3 +#endif + bl __cpu_suspend_finisher + /* + * Never gets here, unless suspend fails. + * Successful cpu_suspend should return from cpu_resume, returning + * through this code path is considered an error + * If the return value is set to 0 force x0 = -EOPNOTSUPP + * to make sure a proper error condition is propagated + */ + cmp x0, #0 + mov x3, #-EOPNOTSUPP + csel x0, x3, x0, eq + add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(__cpu_suspend) + .ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) + ldr x3, =cpu_resume_after_mmu + msr sctlr_el1, x0 // restore sctlr_el1 + isb + br x3 // global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: + mov x0, #0 // return zero on success + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(cpu_resume_after_mmu) + + .data +ENTRY(cpu_resume) + bl el2_setup // if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP + mrs x1, mpidr_el1 + adr x4, mpidr_hash_ptr + ldr x5, [x4] + add x8, x4, x5 // x8 = struct mpidr_hash phys address + /* retrieve mpidr_hash members to compute the hash */ + ldr x2, [x8, #MPIDR_HASH_MASK] + ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 + /* x7 contains hash index, let's use it to grab context pointer */ +#else + mov x7, xzr +#endif + adr x0, sleep_save_sp + ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr x0, [x0, x7, lsl #3] + /* load sp from context */ + ldr x2, [x0, #CPU_CTX_SP] + adr x1, sleep_idmap_phys + /* load physical address of identity map page table in x1 */ + ldr x1, [x1] + mov sp, x2 + /* + * cpu_do_resume expects x0 to contain context physical address + * pointer and x1 to contain physical address of 1:1 page tables + */ + bl cpu_do_resume // PC relative jump, MMU off + b cpu_resume_mmu // Resume MMU, never returns +ENDPROC(cpu_resume) + + .align 3 +mpidr_hash_ptr: + /* + * offset of mpidr_hash symbol from current location + * used to obtain run-time mpidr_hash address with MMU off + */ + .quad mpidr_hash - . +/* + * physical address of identity mapped page tables + */ + .type sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) + .quad 0 +/* + * struct sleep_save_sp { + * phys_addr_t *save_ptr_stash; + * phys_addr_t save_ptr_stash_phys; + * }; + */ + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) + .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6555060f9e97..7c868a2ac38b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -49,6 +49,9 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> +#define CREATE_TRACE_POINTS +#include <trace/events/arm-ipi.h> + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -61,6 +64,7 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_TIMER, }; /* @@ -113,6 +117,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -147,6 +156,13 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_ops[cpu]->cpu_postboot(); /* + * Enable GIC and timers. + */ + notify_cpu_starting(cpu); + + smp_store_cpu_info(cpu); + + /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online * before we continue. @@ -154,11 +170,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - /* - * Enable GIC and timers. - */ - notify_cpu_starting(cpu); - local_dbg_enable(); local_irq_enable(); local_fiq_enable(); @@ -394,6 +405,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int err; unsigned int cpu, ncores = num_possible_cpus(); + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); + /* * are we trying to boot more cores than exist? */ @@ -453,6 +468,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_TIMER, "Timer broadcast interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -538,6 +554,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + irq_enter(); + tick_receive_broadcast(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -550,6 +574,13 @@ void smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_TIMER); +} +#endif + void smp_send_stop(void) { unsigned long timeout; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index e3e5755f61bb..0347d38eea29 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -71,7 +71,16 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) return -ENODEV; release_addr = __va(cpu_release_addr[cpu]); - release_addr[0] = (void *)__pa(secondary_holding_pen); + + /* + * We write the release address as LE regardless of the native + * endianess of the kernel. Therefore, any boot-loaders that + * read this address need to convert this address to the + * boot-loader's endianess before jumping. This is mandated by + * the boot protocol. + */ + release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); + __flush_dcache_area(release_addr, sizeof(release_addr[0])); /* diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 38f0558f0c0a..55437ba1f5a4 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -35,7 +35,7 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 000000000000..1fa9ce4afd8f --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,140 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + * must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, + phys_addr_t *save_ptr) +{ + int cpu = smp_processor_id(); + + *save_ptr = virt_to_phys(ptr); + + cpu_do_suspend(ptr); + /* + * Only flush the context that must be retrieved with the MMU + * off. VA primitives ensure the flush is applied to all + * cache levels so context is pushed to DRAM. + */ + __flush_dcache_area(ptr, sizeof(*ptr)); + __flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ + /* Prevent multiple restore hook initializations */ + if (WARN_ON(hw_breakpoint_restore)) + return; + hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ + struct mm_struct *mm = current->active_mm; + int ret, cpu = smp_processor_id(); + unsigned long flags; + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + + /* + * From this point debug exceptions are disabled to prevent + * updates to mdscr register (saved and restored along with + * general purpose registers) from kernel debuggers. + */ + local_dbg_save(flags); + + /* + * mm context saved on the stack, it will be restored when + * the cpu comes out of reset through the identity mapped + * page tables, so that the thread address space is properly + * set-up on function return. + */ + ret = __cpu_suspend(arg); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); + } + + /* + * Restore pstate flags. OS lock and mdscr have been already + * restored, so from this point onwards, debugging is fully + * renabled if it was enabled when core started shutdown. + */ + local_dbg_restore(flags); + + return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ + void *ctx_ptr; + + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sleep_idmap_phys = virt_to_phys(idmap_pg_dir); + __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); + __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + + return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index a1b19ed7467c..423a5b3fc2be 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * extension. */ compat_sys_pread64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) compat_sys_pwrite64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) compat_sys_truncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) compat_sys_ftruncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) compat_sys_readahead_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) compat_sys_fadvise64_64_wrapper: mov w6, w1 - orr x1, x2, x3, lsl #32 - orr x2, x4, x5, lsl #32 + regs_to_64 x1, x2, x3 + regs_to_64 x2, x4, x5 mov w3, w6 b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) compat_sys_sync_file_range2_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) compat_sys_fallocate_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4eeaba8..78039927c807 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -79,6 +79,12 @@ long compat_arm_syscall(struct pt_regs *regs) case __ARM_NR_compat_set_tls: current->thread.tp_value = regs->regs[0]; + + /* + * Protect against register corruption from context switch. + * See comment in tls_thread_flush. + */ + barrier(); asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); return 0; diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 000000000000..db8bb29c3852 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,590 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/topology.h> +#include <asm/smp_plat.h> + + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) + return -1; + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) { + of_node_put(cpu_node); + return cpu; + } + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + + of_node_put(cpu_node); + return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, + int core_id) +{ + char name[10]; + bool leaf = true; + int i = 0; + int cpu; + struct device_node *t; + + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + of_node_put(t); + return -EINVAL; + } + of_node_put(t); + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return -EINVAL; + } + + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + return -EINVAL; + } + + return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + static int cluster_id __initdata; + int core_id = 0; + int i, ret; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + leaf = false; + ret = parse_cluster(c, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (depth == 0) { + pr_err("%s: cpu-map children should be clusters\n", + c->full_name); + of_node_put(c); + return -EINVAL; + } + + if (leaf) { + ret = parse_core(c, cluster_id, core_id++); + } else { + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; + + return 0; +} + +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + { "arm,cortex-a57", 3891 }, + { "arm,cortex-a53", 2048 }, + { NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static int __init parse_dt_topology(void) +{ + struct device_node *cn, *map; + int ret = 0; + int cpu; + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return 0; + } + + /* + * When topology is provided cpu-map is essentially a root + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); + if (!map) + goto out; + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. + */ + for_each_possible_cpu(cpu) { + if (cpu_topology[cpu].cluster_id == -1) { + pr_err("CPU%d: No topology information specified\n", + cpu); + ret = -EINVAL; + } + } + +out_map: + of_node_put(map); +out: + of_node_put(cn); + return ret; +} + +static void __init parse_dt_cpu_power(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn; + unsigned long min_capacity = ULONG_MAX; + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int cpu; + + __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), + GFP_NOWAIT); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + + /* Too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("Missing device node for CPU %d\n", cpu); + continue; + } + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) { + pr_warn("%s: Unknown CPU type\n", cn->full_name); + continue; + } + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s: Missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equal we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + return; + else if (4 * max_capacity < (3 * (max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; +} + +/* + * Look for a customed capacity of a CPU in the cpu_topo_data table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_power(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + pr_info("CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + if (cpuid_topo->cluster_id == -1) { + /* + * DT does not contain topology information for this cpu. + */ + pr_debug("CPU%u: No topology information configured\n", cpuid); + return; + } + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } +} + +#ifdef CONFIG_SCHED_HMP + +/* + * Retrieve logical cpu index corresponding to a given MPIDR[23:0] + * - mpidr: MPIDR[23:0] to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u32 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} + +static const char * const little_cores[] = { + "arm,cortex-a53", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) + return true; + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 8) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr+1)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr+1)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct cpumask hmp_fast_cpu_mask; + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) { + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + } + + return -EINVAL; +} + +void store_cpu_topology(unsigned int cpuid) +{ + update_siblings_masks(cpuid); + update_cpu_power(cpuid); +} + +static void __init reset_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpu_topo->thread_id = -1; + cpu_topo->core_id = 0; + cpu_topo->cluster_id = -1; + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + } +} + +static void __init reset_cpu_power(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + set_power_scale(cpu, SCHED_POWER_SCALE); +} + +void __init init_cpu_topology(void) +{ + reset_cpu_topology(); + + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ + if (parse_dt_topology()) + reset_cpu_topology(); + + reset_cpu_power(); + parse_dt_cpu_power(); +} diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 40ba5dea2ed7..84cafbc3eb54 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -58,7 +58,10 @@ static struct page *vectors_page[1]; static int alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long vpage; vpage = get_zeroed_page(GFP_ATOMIC); @@ -72,7 +75,7 @@ static int alloc_vectors_page(void) /* sigreturn code */ memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - aarch32_sigret_code, sizeof(aarch32_sigret_code)); + __aarch32_sigret_code_start, sigret_sz); flush_icache_range(vpage, vpage + PAGE_SIZE); vectors_page[0] = virt_to_page(vpage); |