diff options
Diffstat (limited to 'arch/arm64/kernel')
46 files changed, 4864 insertions, 835 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..a8ad571c4758 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,20 +4,35 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \ + -I$(src)/../../../scripts/dtc/libfdt + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg # Object file lists. arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o +arm64-obj-$(CONFIG_SMP) += topology.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o -arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o +arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o +arm64-obj-$(CONFIG_KGDB) += kgdb.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 41b4f626d554..a85843ddbde8 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -29,16 +29,14 @@ #include <asm/checksum.h> - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); + /* user mem (segment) */ EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); @@ -46,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr); /* string / mem functions */ EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp); /* atomic bitops */ EXPORT_SYMBOL(set_bit); @@ -58,3 +61,7 @@ EXPORT_SYMBOL(clear_bit); EXPORT_SYMBOL(test_and_clear_bit); EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d810bea3..9a9fce090d58 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -21,9 +21,12 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/dma-mapping.h> +#include <linux/kvm_host.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> @@ -104,5 +107,59 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); + DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); + DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); + DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); + DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); + DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); + DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); + DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); + DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); + DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); + DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); + DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); + DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); + DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif return 0; } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 000000000000..24fb449bc6aa --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,99 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, +#endif + &cpu_psci_ops, + NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = NULL; + u64 mpidr = cpu_logical_map(0); + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + u64 hwid; + const __be32 *prop; + + prop = of_get_property(dn, "reg", NULL); + if (!prop) + continue; + + hwid = of_read_number(prop, of_n_addr_cells(dn)); + if (hwid == mpidr) { + cpu_read_ops(dn, 0); + of_node_put(dn); + return; + } + } +} diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c new file mode 100644 index 000000000000..19d17f51db37 --- /dev/null +++ b/arch/arm64/kernel/cpuidle.c @@ -0,0 +1,31 @@ +/* + * ARM64 CPU idle arch support + * + * Copyright (C) 2014 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/of.h> +#include <linux/of_device.h> + +#include <asm/cpuidle.h> +#include <asm/cpu_ops.h> + +int cpu_init_idle(unsigned int cpu) +{ + int ret = -EOPNOTSUPP; + struct device_node *cpu_node = of_cpu_device_node_get(cpu); + + if (!cpu_node) + return -ENODEV; + + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) + ret = cpu_ops[cpu]->cpu_init_idle(cpu_node, cpu); + + of_node_put(cpu_node); + return ret; +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4e..fd3993cb060f 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@ extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = { { .cpu_id_val = 0x000f0000, .cpu_id_mask = 0x000f0000, diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..e3b37ee9076a 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,21 +24,13 @@ #include <linux/init.h> #include <linux/ptrace.h> #include <linux/stat.h> +#include <linux/uaccess.h> #include <asm/debug-monitors.h> #include <asm/local.h> #include <asm/cputype.h> #include <asm/system_misc.h> -/* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) -#define DBG_SPSR_SS (1 << 21) - -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - /* Determine debug architecture. */ u8 debug_monitors_arch(void) { @@ -137,7 +129,6 @@ void disable_debug_monitors(enum debug_el el) static void clear_os_lock(void *unused) { asm volatile("msr oslar_el1, %0" : : "r" (0)); - isb(); } static int __cpuinit os_lock_notify(struct notifier_block *self, @@ -156,8 +147,9 @@ static struct notifier_block __cpuinitdata os_lock_nb = { static int __cpuinit debug_monitors_init(void) { /* Clear the OS lock. */ - smp_call_function(clear_os_lock, NULL, 1); - clear_os_lock(NULL); + on_each_cpu(clear_os_lock, NULL, 1); + isb(); + local_dbg_enable(); /* Register hotplug handler. */ register_cpu_notifier(&os_lock_nb); @@ -187,6 +179,48 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) regs->pstate = spsr; } +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +static DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_add(&hook->node, &step_hook); + write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_del(&hook->node); + write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ + struct step_hook *hook; + int retval = DBG_HOOK_ERROR; + + read_lock(&step_hook_lock); + + list_for_each_entry(hook, &step_hook, node) { + retval = hook->fn(regs, esr); + if (retval == DBG_HOOK_HANDLED) + break; + } + + read_unlock(&step_hook_lock); + + return retval; +} + static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -214,7 +248,9 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { - /* TODO: route to KGDB */ + if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + pr_warning("Unexpected kernel single-step exception at EL1\n"); /* * Re-enable stepping since we know that we will be @@ -226,13 +262,113 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +static DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_add(&hook->node, &break_hook); + write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_del(&hook->node); + write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ + struct break_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + + read_lock(&break_hook_lock); + list_for_each_entry(hook, &break_hook, node) + if ((esr & hook->esr_mask) == hook->esr_val) + fn = hook->fn; + read_unlock(&break_hook_lock); + + return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} + +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (user_mode(regs)) { + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warning("Unexpected kernel BRK exception at EL1\n"); + return -EFAULT; + } + + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c index fbb6e1843659..ffbbdde7aba1 100644 --- a/arch/arm64/kernel/early_printk.c +++ b/arch/arm64/kernel/early_printk.c @@ -26,6 +26,8 @@ #include <linux/amba/serial.h> #include <linux/serial_reg.h> +#include <asm/fixmap.h> + static void __iomem *early_base; static void (*printch)(char ch); @@ -141,8 +143,10 @@ static int __init setup_early_printk(char *buf) } /* no options parsing yet */ - if (paddr) - early_base = early_io_map(paddr, EARLYCON_IOBASE); + if (paddr) { + set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr); + early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE); + } printch = match->printch; early_console = &early_console_dev; diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 000000000000..66716c9b9e5f --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,109 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> + +#define EFI_LOAD_ERROR 0x8000000000000001 + + __INIT + + /* + * We arrive here from the EFI boot manager with: + * + * * CPU in little-endian mode + * * MMU on with identity-mapped RAM + * * Icache and Dcache on + * + * We will most likely be running from some place other than where + * we want to be. The kernel image wants to be placed at TEXT_OFFSET + * from start of RAM. + */ +ENTRY(efi_stub_entry) + /* + * Create a stack frame to save FP/LR with extra space + * for image_addr variable passed to efi_entry(). + */ + stp x29, x30, [sp, #-32]! + + /* + * Call efi_entry to do the real work. + * x0 and x1 are already set up by firmware. Current runtime + * address of image is calculated and passed via *image_addr. + * + * unsigned long efi_entry(void *handle, + * efi_system_table_t *sys_table, + * unsigned long *image_addr) ; + */ + adrp x8, _text + add x8, x8, #:lo12:_text + add x2, sp, 16 + str x8, [x2] + bl efi_entry + cmn x0, #1 + b.eq efi_load_fail + + /* + * efi_entry() will have relocated the kernel image if necessary + * and we return here with device tree address in x0 and the kernel + * entry point stored at *image_addr. Save those values in registers + * which are callee preserved. + */ + mov x20, x0 // DTB address + ldr x0, [sp, #16] // relocated _text address + mov x21, x0 + + /* + * Flush dcache covering current runtime addresses + * of kernel text/data. Then flush all of icache. + */ + adrp x1, _text + add x1, x1, #:lo12:_text + adrp x2, _edata + add x2, x2, #:lo12:_edata + sub x1, x2, x1 + + bl __flush_dcache_area + ic ialluis + + /* Turn off Dcache and MMU */ + mrs x0, CurrentEL + cmp x0, #PSR_MODE_EL2t + ccmp x0, #PSR_MODE_EL2h, #0x4, ne + b.ne 1f + mrs x0, sctlr_el2 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el2, x0 + isb + b 2f +1: + mrs x0, sctlr_el1 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb +2: + /* Jump to kernel entry point */ + mov x0, x20 + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x21 + +efi_load_fail: + mov x0, #EFI_LOAD_ERROR + ldp x29, x30, [sp], #32 + ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 000000000000..e786e6cdc400 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org> + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/efi.h> +#include <linux/libfdt.h> +#include <asm/sections.h> + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, + efi_char16_t *str); + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table, + void *__image, void **__fh); +static efi_status_t efi_file_close(void *handle); + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz); + +/* Include shared EFI stub code */ +#include "../../../drivers/firmware/efi/efi-stub-helper.c" +#include "../../../drivers/firmware/efi/fdt.c" +#include "../../../drivers/firmware/efi/arm-stub.c" + + +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) +{ + efi_status_t status; + unsigned long kernel_size, kernel_memsize = 0; + + /* Relocate the image, if required. */ + kernel_size = _edata - _text; + if (*image_addr != (dram_base + TEXT_OFFSET)) { + kernel_memsize = kernel_size + (_end - _edata); + status = efi_relocate_kernel(sys_table, image_addr, + kernel_size, kernel_memsize, + dram_base + TEXT_OFFSET, + PAGE_SIZE); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + return status; + } + if (*image_addr != (dram_base + TEXT_OFFSET)) { + pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); + efi_free(sys_table, kernel_memsize, *image_addr); + return EFI_ERROR; + } + *image_size = kernel_memsize; + } + + + return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 000000000000..14db1f6e8d7f --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,469 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ + uefi_debug = 1; + + return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ + if (md->attribute & EFI_MEMORY_WB) + return 1; + return 0; +} + +static void __init efi_setup_idmap(void) +{ + struct memblock_region *r; + efi_memory_desc_t *md; + u64 paddr, npages, size; + + for_each_memblock(memory, r) + create_id_mapping(r->base, r->size, 0); + + /* map runtime io spaces */ + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + continue; + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + create_id_mapping(paddr, size, 1); + } +} + +static int __init uefi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i, retval; + + efi.systab = early_memremap(efi_system_table, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) { + pr_warn("Unable to map EFI system table.\n"); + return -ENOMEM; + } + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) < 2) + pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = early_memremap(efi.systab->fw_vendor, + sizeof(vendor)); + if (c16) { + for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + retval = efi_config_init(NULL); + if (retval == 0) + set_bit(EFI_CONFIG_TABLES, &efi.flags); + + early_memunmap(c16, sizeof(vendor)); + early_memunmap(efi.systab, sizeof(efi_system_table_t)); + + return retval; +} + +static __initdata char memory_type_name[][32] = { + {"Reserved"}, + {"Loader Code"}, + {"Loader Data"}, + {"Boot Code"}, + {"Boot Data"}, + {"Runtime Code"}, + {"Runtime Data"}, + {"Conventional Memory"}, + {"Unusable Memory"}, + {"ACPI Reclaim Memory"}, + {"ACPI Memory NVS"}, + {"Memory Mapped I/O"}, + {"MMIO Port Space"}, + {"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ + if (!is_normal_ram(md)) + return 0; + + if (md->attribute & EFI_MEMORY_RUNTIME) + return 1; + + if (md->type == EFI_ACPI_RECLAIM_MEMORY || + md->type == EFI_RESERVED_TYPE) + return 1; + + return 0; +} + +static __init void reserve_regions(void) +{ + efi_memory_desc_t *md; + u64 paddr, npages, size; + + if (uefi_debug) + pr_info("Processing EFI memory map:\n"); + + for_each_efi_memory_desc(&memmap, md) { + paddr = md->phys_addr; + npages = md->num_pages; + + if (uefi_debug) + pr_info(" 0x%012llx-0x%012llx [%s]", + paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, + memory_type_name[md->type]); + + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + early_init_dt_add_memory_arch(paddr, size); + + if (is_reserve_region(md) || + md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) { + memblock_reserve(paddr, size); + if (uefi_debug) + pr_cont("*"); + } + + if (uefi_debug) + pr_cont("\n"); + } +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ + u64 size = end - start; + + if (uefi_debug) + pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); + + free_bootmem_late(start, size); + return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ + u64 map_start, map_end, total = 0; + + if (end <= start) + return total; + + map_start = (u64)memmap.phys_map; + map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); + map_start &= PAGE_MASK; + + if (start < map_end && end > map_start) { + /* region overlaps UEFI memmap */ + if (start < map_start) + total += free_one_region(start, map_start); + + if (map_end < end) + total += free_one_region(map_end, end); + } else + total += free_one_region(start, end); + + return total; +} + +static void __init free_boot_services(void) +{ + u64 total_freed = 0; + u64 keep_end, free_start, free_end; + efi_memory_desc_t *md; + + /* + * If kernel uses larger pages than UEFI, we have to be careful + * not to inadvertantly free memory we want to keep if there is + * overlap at the kernel page size alignment. We do not want to + * free is_reserve_region() memory nor the UEFI memmap itself. + * + * The memory map is sorted, so we keep track of the end of + * any previous region we want to keep, remember any region + * we want to free and defer freeing it until we encounter + * the next region we want to keep. This way, before freeing + * it, we can clip it as needed to avoid freeing memory we + * want to keep for UEFI. + */ + + keep_end = 0; + free_start = 0; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + + if (is_reserve_region(md)) { + /* + * We don't want to free any memory from this region. + */ + if (free_start) { + /* adjust free_end then free region */ + if (free_end > md->phys_addr) + free_end -= PAGE_SIZE; + total_freed += free_region(free_start, free_end); + free_start = 0; + } + keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + continue; + } + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + /* no need to free this region */ + continue; + } + + /* + * We want to free memory from this region. + */ + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (free_start) { + if (paddr <= free_end) + free_end = paddr + size; + else { + total_freed += free_region(free_start, free_end); + free_start = paddr; + free_end = paddr + size; + } + } else { + free_start = paddr; + free_end = paddr + size; + } + if (free_start < keep_end) { + free_start += PAGE_SIZE; + if (free_start >= free_end) + free_start = 0; + } + } + if (free_start) + total_freed += free_region(free_start, free_end); + + if (total_freed) + pr_info("Freed 0x%llx bytes of EFI boot services memory", + total_freed); +} + +void __init efi_init(void) +{ + struct efi_fdt_params params; + + /* Grab UEFI information placed in FDT by stub */ + if (!efi_get_fdt_params(¶ms, uefi_debug)) + return; + + efi_system_table = params.system_table; + + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); + memmap.phys_map = (void *)params.mmap; + memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map_end = memmap.map + params.mmap_size; + memmap.desc_size = params.desc_size; + memmap.desc_version = params.desc_ver; + + if (uefi_init() < 0) + return; + + reserve_regions(); +} + +void __init efi_idmap_init(void) +{ + if (!efi_enabled(EFI_BOOT)) + return; + + /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ + efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ + u64 paddr, vaddr, npages, size; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + vaddr = (__force u64)ioremap_cache(paddr, size); + else + vaddr = (__force u64)ioremap(paddr, size); + + if (!vaddr) { + pr_err("Unable to remap 0x%llx pages @ %p\n", + npages, (void *)paddr); + return 0; + } + + /* adjust for any rounding when EFI and system pagesize differs */ + md->virt_addr = vaddr + (md->phys_addr - paddr); + + if (uefi_debug) + pr_info(" EFI remap 0x%012llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + memcpy(*new, md, memmap.desc_size); + *new += memmap.desc_size; + + return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + phys_addr_t virtmap_phys; + void *virtmap, *virt_md; + efi_status_t status; + u64 mapsize; + int count = 0; + unsigned long flags; + + if (!efi_enabled(EFI_BOOT)) { + pr_info("EFI services will not be available.\n"); + return -1; + } + + pr_info("Remapping and enabling EFI services.\n"); + + /* replace early memmap mapping with permanent mapping */ + mapsize = memmap.map_end - memmap.map; + early_memunmap(memmap.map, mapsize); + memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, + mapsize); + memmap.map_end = memmap.map + mapsize; + + efi.memmap = &memmap; + + /* Map the runtime regions */ + virtmap = kmalloc(mapsize, GFP_KERNEL); + if (!virtmap) { + pr_err("Failed to allocate EFI virtual memmap\n"); + return -1; + } + virtmap_phys = virt_to_phys(virtmap); + virt_md = virtmap; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (remap_region(md, &virt_md)) + ++count; + } + + efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + if (efi.systab) + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + local_irq_save(flags); + cpu_switch_mm(idmap_pg_dir, &init_mm); + + /* Call SetVirtualAddressMap with the physical address of the map */ + runtime = efi.systab->runtime; + efi.set_virtual_address_map = runtime->set_virtual_address_map; + + status = efi.set_virtual_address_map(count * memmap.desc_size, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)virtmap_phys); + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + local_irq_restore(flags); + + kfree(virtmap); + + free_boot_services(); + + if (status != EFI_SUCCESS) { + pr_err("Failed to set EFI virtual address map! [%lx]\n", + status); + return -1; + } + + /* Set up runtime services function pointers */ + runtime = efi.systab->runtime; + efi.get_time = runtime->get_time; + efi.set_time = runtime->set_time; + efi.get_wakeup_time = runtime->get_wakeup_time; + efi.set_wakeup_time = runtime->set_wakeup_time; + efi.get_variable = runtime->get_variable; + efi.get_next_variable = runtime->get_next_variable; + efi.set_variable = runtime->set_variable; + efi.query_variable_info = runtime->query_variable_info; + efi.update_capsule = runtime->update_capsule; + efi.query_capsule_caps = runtime->query_capsule_caps; + efi.get_next_high_mono_count = runtime->get_next_high_mono_count; + efi.reset_system = runtime->reset_system; + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + return 0; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 000000000000..b051871f2965 --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + * mov x0, x30 + * bl _mcount + * [function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp => 0:+-----+ + * in _mcount() | x29 | -> instrumented function's fp + * +-----+ + * | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp => +16:+-----+ + * when instrumented | | + * function calls | ... | + * _mcount() | | + * | | + * instrumented => +xx:+-----+ + * function's fp | x29 | -> parent's fp + * +-----+ + * | x30 | -> instrumented function's lr (= parent's pc) + * +-----+ + * | ... | + */ + + .macro mcount_enter + stp x29, x30, [sp, #-16]! + mov x29, sp + .endm + + .macro mcount_exit + ldp x29, x30, [sp], #16 + ret + .endm + + .macro mcount_adjust_addr rd, rn + sub \rd, \rn, #AARCH64_INSN_SIZE + .endm + + /* for instrumented function's parent */ + .macro mcount_get_parent_fp reg + ldr \reg, [x29] + ldr \reg, [\reg] + .endm + + /* for instrumented function */ + .macro mcount_get_pc0 reg + mcount_adjust_addr \reg, x30 + .endm + + .macro mcount_get_pc reg + ldr \reg, [x29, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr reg + ldr \reg, [x29] + ldr \reg, [\reg, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr_addr reg + ldr \reg, [x29] + add \reg, \reg, #8 + .endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ldr x0, =ftrace_trace_stop + ldr x0, [x0] // if ftrace_trace_stop + ret // return; +#endif + mcount_enter + + ldr x0, =ftrace_trace_function + ldr x2, [x0] + adr x0, ftrace_stub + cmp x0, x2 // if (ftrace_trace_function + b.eq skip_ftrace_call // != ftrace_stub) { + + mcount_get_pc x0 // function's pc + mcount_get_lr x1 // function's lr (= parent's pc) + blr x2 // (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call: // return; + mcount_exit // } +#else + mcount_exit // return; + // } +skip_ftrace_call: + ldr x1, =ftrace_graph_return + ldr x2, [x1] // if ((ftrace_graph_return + cmp x0, x2 // != ftrace_stub) + b.ne ftrace_graph_caller + + ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry + ldr x2, [x1] // != ftrace_graph_entry_stub)) + ldr x0, =ftrace_graph_entry_stub + cmp x0, x2 + b.ne ftrace_graph_caller // ftrace_graph_caller(); + + mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) + ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) + mcount_enter + + mcount_get_pc0 x0 // function's pc + mcount_get_lr x1 // function's lr + + .global ftrace_call +ftrace_call: // tracer(pc, lr); + nop // This will be replaced with "bl xxx" + // where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) + mcount_get_lr_addr x0 // pointer to function's saved lr + mcount_get_pc x1 // function's pc + mcount_get_parent_fp x2 // parent's fp + bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp) + + mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) + str x0, [sp, #-16]! + mov x0, x29 // parent's fp + bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); + mov x30, x0 // restore the original return address + ldr x0, [sp], #16 + ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7cd589ebca2a..56ef569b2b62 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -287,6 +287,8 @@ el1_dbg: /* * Debug exception handling */ + cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 mov x2, sp // struct pt_regs @@ -310,14 +312,14 @@ el1_irq: #endif #ifdef CONFIG_PREEMPT get_thread_info tsk - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x0, x24, #1 // increment it - str x0, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w0, w24, #1 // increment it + str w0, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - str x24, [tsk, #TI_PREEMPT] // restore preempt count - cbnz x24, 1f // preempt count != 0 + str w24, [tsk, #TI_PREEMPT] // restore preempt count + cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt @@ -422,6 +424,7 @@ el0_da: * Data abort handling */ mrs x0, far_el1 + bic x0, x0, #(0xff << 56) disable_step x1 isb enable_dbg @@ -475,6 +478,8 @@ el0_undef: * Undefined instruction */ mov x0, sp + // enable interrupts before calling the main handler + enable_irq b do_undefinstr el0_dbg: /* @@ -505,15 +510,15 @@ el0_irq_naked: #endif get_thread_info tsk #ifdef CONFIG_PREEMPT - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x23, x24, #1 // increment it - str x23, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w23, w24, #1 // increment it + str w23, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - ldr x0, [tsk, #TI_PREEMPT] - str x24, [tsk, #TI_PREEMPT] - cmp x0, x23 + ldr w0, [tsk, #TI_PREEMPT] + str w24, [tsk, #TI_PREEMPT] + cmp w0, w23 b.eq 1f mov x1, #0 str x1, [x1] // BUG @@ -640,8 +645,9 @@ el0_svc_naked: // compat entry point enable_irq get_thread_info tsk - ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing - tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys @@ -657,9 +663,8 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x1, sp - mov w0, #0 // trace entry - bl syscall_trace + mov x0, sp + bl syscall_trace_enter adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs @@ -674,9 +679,8 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 - mov x1, sp - mov w0, #1 // trace exit - bl syscall_trace + mov x0, sp + bl syscall_trace_exit b ret_to_user /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2fa308e4a1fa..522df9c7f3a4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> @@ -85,6 +86,66 @@ void fpsimd_flush_thread(void) preempt_enable(); } +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin(void) +{ + /* Avoid using the NEON in interrupt context */ + BUG_ON(in_interrupt()); + preempt_disable(); + + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); +} +EXPORT_SYMBOL(kernel_neon_begin); + +void kernel_neon_end(void) +{ + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + + preempt_enable(); +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_EXIT: + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + /* * FP/SIMD support code initialisation. */ @@ -103,6 +164,8 @@ static int __init fpsimd_init(void) else elf_hwcap |= HWCAP_ASIMD; + fpsimd_pm_init(); + return 0; } late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 000000000000..649890a3ac4e --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,177 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + /* + * Note: + * Due to modules and __init, code can disappear and change, + * we need to protect against faulting as well as code changing. + * We do this by aarch64_insn_*() which use the probe_kernel_*(). + * + * No lock is held here because all the modifications are run + * through stop_machine(). + */ + if (validate) { + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + if (aarch64_insn_patch_text_nosync((void *)pc, new)) + return -EPERM; + + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_branch_imm(pc, addr, true); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, addr, true); + new = aarch64_insn_gen_nop(); + + return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *)data = 0; + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 branch, nop; + + branch = aarch64_insn_gen_branch_imm(pc, + (unsigned long)ftrace_graph_caller, false); + nop = aarch64_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..6254f4a28eed 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -22,10 +22,12 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> #include <asm/cputype.h> #include <asm/memory.h> #include <asm/thread_info.h> @@ -34,29 +36,21 @@ #include <asm/page.h> #include <asm/virt.h> -/* - * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. - */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 -#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#if (TEXT_OFFSET & 0xfff) != 0 +#error TEXT_OFFSET must be at least 4KB aligned +#elif (PAGE_OFFSET & 0x1fffff) != 0 +#error PAGE_OFFSET must be at least 2MB aligned +#elif TEXT_OFFSET > 0x1fffff +#error TEXT_OFFSET must be less than 2MB #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE - - .globl idmap_pg_dir - .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE - - .macro pgtbl, ttb0, ttb1, phys - add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE - sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys .endm #ifdef CONFIG_ARM64_64K_PAGES @@ -107,16 +101,137 @@ /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ +#ifdef CONFIG_EFI +efi_head: + /* + * This add instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + add x13, x18, #0x16 + b stext +#else b stext // branch to kernel start, magic .long 0 // reserved +#endif .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .byte 0x41 // Magic number, "ARM\x64" + .byte 0x52 + .byte 0x4d + .byte 0x64 +#ifdef CONFIG_EFI + .long pe_header - efi_head // Offset to the PE header. +#else + .word 0 // reserved +#endif + +#ifdef CONFIG_EFI + .align 3 +pe_header: + .ascii "PE" + .short 0 +coff_header: + .short 0xaa64 // AArch64 + .short 2 // nr_sections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 1 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short 0x206 // Characteristics. + // IMAGE_FILE_DEBUG_STRIPPED | + // IMAGE_FILE_EXECUTABLE_IMAGE | + // IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: + .short 0x20b // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long _edata - stext // SizeOfCode + .long 0 // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long efi_stub_entry - efi_head // AddressOfEntryPoint + .long stext - efi_head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long 0x20 // SectionAlignment + .long 0x8 // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _edata - efi_head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long stext - efi_head // SizeOfHeaders + .long 0 // CheckSum + .short 0xa // Subsystem (EFI application) + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long 0x6 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + + /* + * The EFI application loader requires a relocation section + * because EFI applications must be relocatable. This is a + * dummy section as far as we are concerned. + */ + .ascii ".reloc" + .byte 0 + .byte 0 // end of 0 padding of section name + .long 0 + .long 0 + .long 0 // SizeOfRawData + .long 0 // PointerToRawData + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long 0x42100040 // Characteristics (section flags) + + + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 // end of 0 padding of section name + .long _edata - stext // VirtualSize + .long stext - efi_head // VirtualAddress + .long _edata - stext // SizeOfRawData + .long stext - efi_head // PointerToRawData + + .long 0 // PointerToRelocations (0 for executables) + .long 0 // PointerToLineNumbers (0 for executables) + .short 0 // NumberOfRelocations (0 for executables) + .short 0 // NumberOfLineNumbers (0 for executables) + .long 0xe0500020 // Characteristics (section flags) + .align 5 +#endif ENTRY(stext) mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET - bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag mrs x22, midr_el1 // x22=cpuid mov x0, x22 bl lookup_processor_type @@ -142,21 +257,30 @@ ENDPROC(stext) /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode - add x0, x0, x28 - b.eq 1f - str wzr, [x0] // Remember we don't have EL2... + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb ret /* Hyp configuration. */ -1: ldr w1, =BOOT_CPU_MODE_EL2 - str w1, [x0, #4] // This CPU has EL2 - mov x0, #(1 << 31) // 64-bit EL1 +2: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -165,6 +289,23 @@ ENTRY(el2_setup) msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs_s x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 @@ -173,7 +314,8 @@ ENTRY(el2_setup) /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits - movk x0, #0x30d0, lsl #16 +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -196,28 +338,41 @@ ENTRY(el2_setup) PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) /* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + dmb sy + dc ivac, x1 // Invalidate potentially stale cache line + ret +ENDPROC(set_cpu_boot_mode_flag) + +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data + .pushsection .data..cacheline_aligned ENTRY(__boot_cpu_mode) + .align L1_CACHE_SHIFT .long BOOT_CPU_MODE_EL2 .long 0 .popsection - .align 3 -2: .quad . - .quad PAGE_OFFSET - #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -227,8 +382,9 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl __calc_phys_offset // x24=phys offset - bl el2_setup // Drop to EL1 + bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 @@ -242,7 +398,16 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* @@ -254,7 +419,7 @@ ENTRY(secondary_startup) mov x23, x0 // x23=current cpu_table cbz x23, __error_p // invalid processor (x23=0)? - pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys blr x12 // initialise processor @@ -296,8 +461,13 @@ ENDPROC(__enable_mmu) * x27 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. */ - .align 6 + .align 4 __turn_mmu_on: msr sctlr_el1, x0 isb @@ -340,26 +510,18 @@ ENDPROC(__calc_phys_offset) * Preserves: tbl, flags * Corrupts: phys, start, end, pstate */ - .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + .macro create_block_map, tbl, flags, phys, start, end lsr \phys, \phys, #BLOCK_SHIFT - .if \idmap - and \start, \phys, #PTRS_PER_PTE - 1 // table index - .else lsr \start, \start, #BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - .endif orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - .ifnc \start,\end lsr \end, \end, #BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index - .endif 9999: str \phys, [\tbl, \start, lsl #3] // store the entry - .ifnc \start,\end add \start, \start, #1 // next entry add \phys, \phys, #BLOCK_SIZE // next block cmp \start, \end b.ls 9999b - .endif .endm /* @@ -368,10 +530,19 @@ ENDPROC(__calc_phys_offset) * - identity mapping to enable the MMU (low address, TTBR0) * - first few MB of the kernel linear mapping to jump to once the MMU has * been enabled, including the FDT blob (TTBR1) - * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) + * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range /* * Clear the idmap and swapper page tables. @@ -391,9 +562,13 @@ __create_page_tables: * Create the identity mapping. */ add x0, x25, #PAGE_SIZE // section table address - adr x3, __turn_mmu_on // virtual/physical address + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) create_pgd_entry x25, x0, x3, x5, x6 - create_block_map x0, x7, x3, x5, x5, idmap=1 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -401,7 +576,7 @@ __create_page_tables: add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET create_pgd_entry x26, x0, x5, x3, x6 - ldr x6, =KERNEL_END - 1 + ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -421,15 +596,16 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: -#ifdef CONFIG_EARLY_PRINTK /* - * Create the pgd entry for the UART mapping. The full mapping is done - * later based earlyprintk kernel parameter. + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. */ - ldr x5, =EARLYCON_IOBASE // UART virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 -#endif + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 ret ENDPROC(__create_page_tables) .ltorg @@ -438,10 +614,8 @@ ENDPROC(__create_page_tables) .type __switch_data, %object __switch_data: .quad __mmap_switched - .quad __data_loc // x4 - .quad _data // x5 .quad __bss_start // x6 - .quad _end // x7 + .quad __bss_stop // x7 .quad processor_id // x4 .quad __fdt_pointer // x5 .quad memstart_addr // x6 @@ -454,15 +628,7 @@ __switch_data: __mmap_switched: adr x3, __switch_data + 8 - ldp x4, x5, [x3], #16 ldp x6, x7, [x3], #16 - cmp x4, x5 // Copy data segment if needed -1: ccmp x5, x6, #4, ne - b.eq 2f - ldr x16, [x4], #8 - str x16, [x5], #8 - b 1b -2: 1: cmp x6, x7 b.hs 2f str xzr, [x6], #8 // Clear BSS diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 5ab825c59db9..6de3460ede4c 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,13 +20,14 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include <linux/compat.h> +#include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> @@ -169,15 +170,68 @@ static enum debug_el debug_exception_level(int privilege) } } -/* - * Install a perf counter breakpoint. +enum hw_breakpoint_ops { + HW_BREAKPOINT_INSTALL, + HW_BREAKPOINT_UNINSTALL, + HW_BREAKPOINT_RESTORE +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + * operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + * slot index on success + * -ENOSPC if no slot is available/matches + * -EINVAL on wrong operations parameter */ -int arch_install_hw_breakpoint(struct perf_event *bp) +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, + struct perf_event *bp, + enum hw_breakpoint_ops ops) +{ + int i; + struct perf_event **slot; + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + switch (ops) { + case HW_BREAKPOINT_INSTALL: + if (!*slot) { + *slot = bp; + return i; + } + break; + case HW_BREAKPOINT_UNINSTALL: + if (*slot == bp) { + *slot = NULL; + return i; + } + break; + case HW_BREAKPOINT_RESTORE: + if (*slot == bp) + return i; + break; + default: + pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); + return -EINVAL; + } + } + return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, + enum hw_breakpoint_ops ops) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; + struct perf_event **slots; struct debug_info *debug_info = ¤t->thread.debug; int i, max_slots, ctrl_reg, val_reg, reg_enable; + enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); u32 ctrl; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { @@ -196,67 +250,54 @@ int arch_install_hw_breakpoint(struct perf_event *bp) reg_enable = !debug_info->wps_disabled; } - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (!*slot) { - *slot = bp; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return -ENOSPC; + i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); - /* Ensure debug monitors are enabled at the correct exception level. */ - enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) + return i; - /* Setup the address register. */ - write_wb_reg(val_reg, i, info->address); + switch (ops) { + case HW_BREAKPOINT_INSTALL: + /* + * Ensure debug monitors are enabled at the correct exception + * level. + */ + enable_debug_monitors(dbg_el); + /* Fall through */ + case HW_BREAKPOINT_RESTORE: + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); + + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, + reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + break; + case HW_BREAKPOINT_UNINSTALL: + /* Reset the control register. */ + write_wb_reg(ctrl_reg, i, 0); - /* Setup the control register. */ - ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + /* + * Release the debug monitors for the correct exception + * level. + */ + disable_debug_monitors(dbg_el); + break; + } return 0; } -void arch_uninstall_hw_breakpoint(struct perf_event *bp) +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; - int i, max_slots, base; - - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - /* Breakpoint */ - base = AARCH64_DBG_REG_BCR; - slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps; - } else { - /* Watchpoint */ - base = AARCH64_DBG_REG_WCR; - slots = __get_cpu_var(wp_on_reg); - max_slots = core_num_wrps; - } - - /* Remove the breakpoint. */ - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return; - - /* Reset the control register. */ - write_wb_reg(base, i, 0); + return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} - /* Release the debug monitors for the correct exception level. */ - disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL); } static int get_hbp_len(u8 hbp_len) @@ -806,18 +847,36 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void reset_ctrl_regs(void *unused) +static void hw_breakpoint_reset(void *unused) { int i; - - for (i = 0; i < core_num_brps; ++i) { - write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + struct perf_event **slots; + /* + * When a CPU goes through cold-boot, it does not have any installed + * slot, so it is safe to share the same function for restoring and + * resetting breakpoints; when a CPU is hotplugged in, it goes + * through the slots, which are all empty, hence it just resets control + * and value for debug registers. + * When this function is triggered on warm-boot through a CPU PM + * notifier some slots might be initialized; if so they are + * reprogrammed according to the debug slots content. + */ + for (slots = __get_cpu_var(bp_on_reg), i = 0; i < core_num_brps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } } - for (i = 0; i < core_num_wrps; ++i) { - write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + for (slots = __get_cpu_var(wp_on_reg), i = 0; i < core_num_wrps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } } } @@ -827,7 +886,7 @@ static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self, { int cpu = (long)hcpu; if (action == CPU_ONLINE) - smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } @@ -835,6 +894,14 @@ static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; +#ifdef CONFIG_ARM64_CPU_SUSPEND +extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +#else +static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +} +#endif + /* * One-time initialisation. */ @@ -850,8 +917,8 @@ static int __init arch_hw_breakpoint_init(void) * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ - smp_call_function(reset_ctrl_regs, NULL, 1); - reset_ctrl_regs(NULL); + smp_call_function(hw_breakpoint_reset, NULL, 1); + hw_breakpoint_reset(NULL); /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, @@ -861,6 +928,8 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&hw_breakpoint_reset_nb); + /* Register cpu_suspend hw breakpoint restore hook */ + cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); return 0; } diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 0959611d9ff1..a272f335c289 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 000000000000..92f36835486b --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ + return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ + if (!aarch64_insn_is_hint(insn)) + return false; + + switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_YIELD: + case AARCH64_INSN_HINT_WFE: + case AARCH64_INSN_HINT_WFI: + case AARCH64_INSN_HINT_SEV: + case AARCH64_INSN_HINT_SEVL: + return false; + default: + return true; + } +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + if (!ret) + *insnp = le32_to_cpu(val); + + return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ + insn = cpu_to_le32(insn); + return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) + return false; + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_svc(insn) || + aarch64_insn_is_hvc(insn) || + aarch64_insn_is_smc(insn) || + aarch64_insn_is_brk(insn) || + aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ + return __aarch64_insn_hotpatch_safe(old_insn) && + __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ + u32 *tp = addr; + int ret; + + /* A64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + + ret = aarch64_insn_write(tp, insn); + if (ret == 0) + flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); + + return ret; +} + +struct aarch64_insn_patch { + void **text_addrs; + u32 *new_insns; + int insn_cnt; + atomic_t cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ + int i, ret = 0; + struct aarch64_insn_patch *pp = arg; + + /* The first CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == 1) { + for (i = 0; ret == 0 && i < pp->insn_cnt; i++) + ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], + pp->new_insns[i]); + /* + * aarch64_insn_patch_text_nosync() calls flush_icache_range(), + * which ends with "dsb; isb" pair guaranteeing global + * visibility. + */ + atomic_set(&pp->cpu_count, -1); + } else { + while (atomic_read(&pp->cpu_count) != -1) + cpu_relax(); + isb(); + } + + return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ + struct aarch64_insn_patch patch = { + .text_addrs = addrs, + .new_insns = insns, + .insn_cnt = cnt, + .cpu_count = ATOMIC_INIT(0), + }; + + if (cnt <= 0) + return -EINVAL; + + return stop_machine(aarch64_insn_patch_text_cb, &patch, + cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ + int ret; + u32 insn; + + /* Unsafe to patch multiple instructions without synchronizaiton */ + if (cnt == 1) { + ret = aarch64_insn_read(addrs[0], &insn); + if (ret) + return ret; + + if (aarch64_insn_hotpatch_safe(insn, insns[0])) { + /* + * ARMv8 architecture doesn't guarantee all CPUs see + * the new instruction after returning from function + * aarch64_insn_patch_text_nosync(). So send IPIs to + * all other CPUs to achieve instruction + * synchronization. + */ + ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); + kick_all_cpus_sync(); + return ret; + } + } + + return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm) +{ + u32 immlo, immhi, lomask, himask, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + lomask = 0x3; + himask = 0x7ffff; + immlo = imm & lomask; + imm >>= 2; + immhi = imm & himask; + imm = (immlo << 24) | (immhi); + mask = (lomask << 24) | (himask); + shift = 5; + break; + case AARCH64_INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case AARCH64_INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + default: + pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + + /* Update the immediate field. */ + insn &= ~(mask << shift); + insn |= (imm & mask) << shift; + + return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, + enum aarch64_insn_branch_type type) +{ + u32 insn; + long offset; + + /* + * PC: A 64-bit Program Counter holding the address of the current + * instruction. A64 instructions must be word-aligned. + */ + BUG_ON((pc & 0x3) || (addr & 0x3)); + + /* + * B/BL support [-128M, 128M) offset + * ARM64 virtual address arrangement guarantees all kernel and module + * texts are within +/-128M. + */ + offset = ((long)addr - (long)pc); + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + + if (type == AARCH64_INSN_BRANCH_LINK) + insn = aarch64_insn_get_bl_value(); + else + insn = aarch64_insn_get_b_value(); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, + offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ + return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ + return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ecb3354292ed..dfa6e3e74fdd 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -81,3 +81,64 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 000000000000..263a166291fb --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + bool is_static) +{ + void *addr = (void *)entry->code; + u32 insn; + + if (type == JUMP_LABEL_ENABLE) { + insn = aarch64_insn_gen_branch_imm(entry->code, + entry->target, + AARCH64_INSN_BRANCH_NOLINK); + } else { + insn = aarch64_insn_gen_nop(); + } + + if (is_static) + aarch64_insn_patch_text_nosync(addr, insn); + else + aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, true); +} + +#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 000000000000..75c9cf1aafee --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,336 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irq.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <asm/traps.h> + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "x0", 8, offsetof(struct pt_regs, regs[0])}, + { "x1", 8, offsetof(struct pt_regs, regs[1])}, + { "x2", 8, offsetof(struct pt_regs, regs[2])}, + { "x3", 8, offsetof(struct pt_regs, regs[3])}, + { "x4", 8, offsetof(struct pt_regs, regs[4])}, + { "x5", 8, offsetof(struct pt_regs, regs[5])}, + { "x6", 8, offsetof(struct pt_regs, regs[6])}, + { "x7", 8, offsetof(struct pt_regs, regs[7])}, + { "x8", 8, offsetof(struct pt_regs, regs[8])}, + { "x9", 8, offsetof(struct pt_regs, regs[9])}, + { "x10", 8, offsetof(struct pt_regs, regs[10])}, + { "x11", 8, offsetof(struct pt_regs, regs[11])}, + { "x12", 8, offsetof(struct pt_regs, regs[12])}, + { "x13", 8, offsetof(struct pt_regs, regs[13])}, + { "x14", 8, offsetof(struct pt_regs, regs[14])}, + { "x15", 8, offsetof(struct pt_regs, regs[15])}, + { "x16", 8, offsetof(struct pt_regs, regs[16])}, + { "x17", 8, offsetof(struct pt_regs, regs[17])}, + { "x18", 8, offsetof(struct pt_regs, regs[18])}, + { "x19", 8, offsetof(struct pt_regs, regs[19])}, + { "x20", 8, offsetof(struct pt_regs, regs[20])}, + { "x21", 8, offsetof(struct pt_regs, regs[21])}, + { "x22", 8, offsetof(struct pt_regs, regs[22])}, + { "x23", 8, offsetof(struct pt_regs, regs[23])}, + { "x24", 8, offsetof(struct pt_regs, regs[24])}, + { "x25", 8, offsetof(struct pt_regs, regs[25])}, + { "x26", 8, offsetof(struct pt_regs, regs[26])}, + { "x27", 8, offsetof(struct pt_regs, regs[27])}, + { "x28", 8, offsetof(struct pt_regs, regs[28])}, + { "x29", 8, offsetof(struct pt_regs, regs[29])}, + { "x30", 8, offsetof(struct pt_regs, regs[30])}, + { "sp", 8, offsetof(struct pt_regs, sp)}, + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "pstate", 8, offsetof(struct pt_regs, pstate)}, + { "v0", 16, -1 }, + { "v1", 16, -1 }, + { "v2", 16, -1 }, + { "v3", 16, -1 }, + { "v4", 16, -1 }, + { "v5", 16, -1 }, + { "v6", 16, -1 }, + { "v7", 16, -1 }, + { "v8", 16, -1 }, + { "v9", 16, -1 }, + { "v10", 16, -1 }, + { "v11", 16, -1 }, + { "v12", 16, -1 }, + { "v13", 16, -1 }, + { "v14", 16, -1 }, + { "v15", 16, -1 }, + { "v16", 16, -1 }, + { "v17", 16, -1 }, + { "v18", 16, -1 }, + { "v19", 16, -1 }, + { "v20", 16, -1 }, + { "v21", 16, -1 }, + { "v22", 16, -1 }, + { "v23", 16, -1 }, + { "v24", 16, -1 }, + { "v25", 16, -1 }, + { "v26", 16, -1 }, + { "v27", 16, -1 }, + { "v28", 16, -1 }, + { "v29", 16, -1 }, + { "v30", 16, -1 }, + { "v31", 16, -1 }, + { "fpsr", 4, -1 }, + { "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + struct pt_regs *thread_regs; + + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + thread_regs = task_pt_regs(task); + memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static int compiled_break; + +static void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(regs, regs->pc + 4); + + compiled_break = 0; +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + int err; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + /* + * Packet D (Detach), k (kill). No special handling + * is required here. Handle same as c packet. + */ + case 'c': + /* + * Packet c (Continue) to continue executing. + * Set pc to required address. + * Try to read optional parameter and set pc. + * If this was a compiled breakpoint, we need to move + * to the next instruction else we will just breakpoint + * over and over again. + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, -1); + kgdb_single_step = 0; + + /* + * Received continue command, disable single step + */ + if (kernel_active_single_step()) + kernel_disable_single_step(); + + err = 0; + break; + case 's': + /* + * Update step address value with address passed + * with step packet. + * On debug exception return PC is copied to ELR + * So just update PC. + * If no step address is passed, resume from the address + * pointed by PC. Do not update PC + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + kgdb_single_step = 1; + + /* + * Enable single step handling + */ + if (!kernel_active_single_step()) + kernel_enable_single_step(linux_regs); + err = 0; + break; + default: + err = -1; + } + return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static struct break_hook kgdb_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), + .fn = kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), + .fn = kgdb_compiled_brk_fn +}; + +static struct step_hook kgdb_step_hook = { + .fn = kgdb_step_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + /* + * Want to be lowest priority + */ + .priority = -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + + register_break_hook(&kgdb_brkpt_hook); + register_break_hook(&kgdb_compiled_brkpt_hook); + register_step_hook(&kgdb_step_hook); + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_break_hook(&kgdb_brkpt_hook); + unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_step_hook(&kgdb_step_hook); + unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { + KGDB_DYN_BRK_INS_BYTE0, + KGDB_DYN_BRK_INS_BYTE1, + KGDB_DYN_BRK_INS_BYTE2, + KGDB_DYN_BRK_INS_BYTE3, + } +}; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8bc..7787208e8cc6 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@ * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ + +#include <asm/unistd32.h> + .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -35,33 +38,32 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xf57ff05f // dmb sy .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 - .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr .align 5 __kuser_memory_barrier: // 0xffff0fa0 - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe12fff1e // bx lr .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xf57ff05f // dmb sy .inst 0xe1923f9f // 1: ldrex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 - .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xeaffffef // b <__kuser_memory_barrier> + .inst 0xe12fff1e // bx lr .align 5 __kuser_get_tls: // 0xffff0fe0 @@ -75,3 +77,42 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + + /* + * ARM code + */ + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index ca0e3d55da99..df08a6e0287d 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,6 +25,10 @@ #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> +#include <asm/insn.h> + +#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX +#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 void *module_alloc(unsigned long size) { @@ -94,25 +98,18 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) return 0; } -enum aarch64_imm_type { - INSN_IMM_MOVNZ, - INSN_IMM_MOVK, - INSN_IMM_ADR, - INSN_IMM_26, - INSN_IMM_19, - INSN_IMM_16, - INSN_IMM_14, - INSN_IMM_12, - INSN_IMM_9, -}; - -static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, enum aarch64_insn_imm_type imm_type) { - u32 immlo, immhi, lomask, himask, mask; - int shift; + u64 imm, limit = 0; + s64 sval; + u32 insn = le32_to_cpu(*(u32 *)place); + + sval = do_reloc(op, place, val); + sval >>= lsb; + imm = sval & 0xffff; - switch (type) { - case INSN_IMM_MOVNZ: + if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* * For signed MOVW relocations, we have to manipulate the * instruction encoding depending on whether or not the @@ -131,70 +128,12 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) */ imm = ~imm; } - case INSN_IMM_MOVK: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_ADR: - lomask = 0x3; - himask = 0x7ffff; - immlo = imm & lomask; - imm >>= 2; - immhi = imm & himask; - imm = (immlo << 24) | (immhi); - mask = (lomask << 24) | (himask); - shift = 5; - break; - case INSN_IMM_26: - mask = BIT(26) - 1; - shift = 0; - break; - case INSN_IMM_19: - mask = BIT(19) - 1; - shift = 5; - break; - case INSN_IMM_16: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_14: - mask = BIT(14) - 1; - shift = 5; - break; - case INSN_IMM_12: - mask = BIT(12) - 1; - shift = 10; - break; - case INSN_IMM_9: - mask = BIT(9) - 1; - shift = 12; - break; - default: - pr_err("encode_insn_immediate: unknown immediate encoding %d\n", - type); - return 0; + imm_type = AARCH64_INSN_IMM_MOVK; } - /* Update the immediate field. */ - insn &= ~(mask << shift); - insn |= (imm & mask) << shift; - - return insn; -} - -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_imm_type imm_type) -{ - u64 imm, limit = 0; - s64 sval; - u32 insn = *(u32 *)place; - - sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; - /* Update the instruction with the new encoding. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* Shift out the immediate field. */ sval >>= 16; @@ -203,9 +142,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * For unsigned immediates, the overflow check is straightforward. * For signed immediates, the sign bit is actually the bit past the * most significant bit of the field. - * The INSN_IMM_16 immediate type is unsigned. + * The AARCH64_INSN_IMM_16 immediate type is unsigned. */ - if (imm_type != INSN_IMM_16) { + if (imm_type != AARCH64_INSN_IMM_16) { sval++; limit++; } @@ -218,11 +157,11 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, } static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, int len, enum aarch64_imm_type imm_type) + int lsb, int len, enum aarch64_insn_imm_type imm_type) { u64 imm, imm_mask; s64 sval; - u32 insn = *(u32 *)place; + u32 insn = le32_to_cpu(*(u32 *)place); /* Calculate the relocation value. */ sval = do_reloc(op, place, val); @@ -233,7 +172,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, imm = sval & imm_mask; /* Update the instruction's immediate field. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* * Extract the upper value bits (including the sign bit) and @@ -315,125 +255,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; /* Immediate instruction relocations. */ case R_AARCH64_LD_PREL_LO19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_ADR_PREL_LO21: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST16_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST32_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST64_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST128_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_TSTBR14: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, - INSN_IMM_14); + AARCH64_INSN_IMM_14); break; case R_AARCH64_CONDBR19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, - INSN_IMM_26); + AARCH64_INSN_IMM_26); break; default: diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cea1594ff933..dfcd8fadde3c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -22,6 +22,7 @@ #include <linux/bitmap.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/perf_event.h> @@ -363,26 +364,53 @@ validate_group(struct perf_event *event) } static void +armpmu_disable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + disable_percpu_irq(irq); +} + +static void armpmu_release_hardware(struct arm_pmu *armpmu) { - int i, irq, irqs; + int irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (!irqs) + return; - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, armpmu); + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) + return; + + if (irq_is_percpu(irq)) { + on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &cpu_hw_events); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq > 0) + free_irq(irq, armpmu); + } } } +static void +armpmu_enable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int i, err, irq, irqs; + int err, irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; if (!pmu_device) { @@ -391,39 +419,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) } irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { + if (!irqs) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) { + pr_err("failed to get valid irq for PMU device\n"); + return -ENODEV; + } - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, armpmu->handle_irq, + "arm-pmu", &cpu_hw_events); - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING, - "arm-pmu", armpmu); if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); + pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", + irq); armpmu_release_hardware(armpmu); return err; } - cpumask_set_cpu(i, &armpmu->active_irqs); + on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq <= 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } } return 0; @@ -1299,8 +1347,8 @@ early_initcall(init_hw_perf_events); * Callchain handling code. */ struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long lr; } __attribute__((packed)); /* @@ -1337,22 +1385,84 @@ user_backtrace(struct frame_tail __user *tail, return buftail.fp; } +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } perf_callchain_store(entry, regs->pc); - tail = (struct frame_tail __user *)regs->regs[29]; - while (entry->nr < PERF_MAX_STACK_DEPTH && - tail && !((unsigned long)tail & 0xf)) - tail = user_backtrace(tail, entry); + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = compat_user_backtrace(tail, entry); +#endif + } } /* @@ -1380,6 +1490,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 000000000000..422ebd63b619 --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,46 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> + +#include <asm/compat.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) + return 0; + + /* + * Compat (i.e. 32 bit) mode: + * - PC has been set in the pt_regs struct in kernel_entry, + * - Handle SP and LR here. + */ + if (compat_user_mode(regs)) { + if ((u32)idx == PERF_REG_ARM64_SP) + return regs->compat_sp; + if ((u32)idx == PERF_REG_ARM64_LR) + return regs->compat_lr; + } + + return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (is_compat_thread(task_thread_info(task))) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 0860fc3077fc..a82af49bcc51 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@ #include <stdarg.h> +#include <linux/compat.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -33,6 +34,7 @@ #include <linux/kallsyms.h> #include <linux/init.h> #include <linux/cpu.h> +#include <linux/cpuidle.h> #include <linux/elfcore.h> #include <linux/pm.h> #include <linux/tick.h> @@ -50,29 +52,18 @@ #include <asm/processor.h> #include <asm/stacktrace.h> -static void setup_restart(void) -{ - /* - * Tell the mm system that we are going to reboot - - * we may need it to insert some 1:1 mappings so that - * soft boot works. - */ - setup_mm_for_reboot(); - - /* Clean and invalidate caches */ - flush_cache_all(); - - /* Turn D-cache off */ - cpu_cache_off(); - - /* Push out any further dirty data, and ensure cache is empty */ - flush_cache_all(); -} +#ifdef CONFIG_CC_STACKPROTECTOR +#include <linux/stackprotector.h> +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif void soft_restart(unsigned long addr) { - setup_restart(); - cpu_reset(addr); + setup_mm_for_reboot(); + cpu_soft_restart(virt_to_phys(cpu_reset), addr); + /* Should never get here */ + BUG(); } /* @@ -98,10 +89,19 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - cpu_do_idle(); - local_irq_enable(); + if (cpuidle_idle_call()) { + cpu_do_idle(); + local_irq_enable(); + } } +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif + void machine_shutdown(void) { #ifdef CONFIG_SMP @@ -143,15 +143,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); @@ -297,7 +308,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Complete any pending TLB or cache maintenance on this CPU in case * the thread migrates to a different CPU. */ - dsb(); + dsb(ish); /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -308,6 +319,7 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; + unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -315,9 +327,11 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); + stack_page = (unsigned long)task_stack_page(p); do { - int ret = unwind_frame(&frame); - if (ret < 0) + if (frame.sp < stack_page || + frame.sp >= stack_page + THREAD_SIZE || + unwind_frame(&frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 14f73c445ff5..4d827dd62199 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -15,63 +15,98 @@ #define pr_fmt(fmt) "psci: " fmt +#include <linux/cpuidle.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/smp.h> +#include <linux/slab.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> #include <asm/compiler.h> +#include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/psci.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/system_misc.h> -struct psci_operations psci_ops; +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); +}; + +static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; -static u32 psci_function_id[PSCI_FN_MAX]; +static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state); -#define PSCI_RET_SUCCESS 0 -#define PSCI_RET_EOPNOTSUPP -1 -#define PSCI_RET_EINVAL -2 -#define PSCI_RET_EPERM -3 +static u32 psci_function_id[PSCI_FN_MAX]; static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_EOPNOTSUPP: + case PSCI_RET_NOT_SUPPORTED: return -EOPNOTSUPP; - case PSCI_RET_EINVAL: + case PSCI_RET_INVALID_PARAMS: return -EINVAL; - case PSCI_RET_EPERM: + case PSCI_RET_DENIED: return -EPERM; }; return -EINVAL; } -#define PSCI_POWER_STATE_ID_MASK 0xffff -#define PSCI_POWER_STATE_ID_SHIFT 0 -#define PSCI_POWER_STATE_TYPE_MASK 0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT 16 -#define PSCI_POWER_STATE_AFFL_MASK 0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT 24 - static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id & PSCI_POWER_STATE_ID_MASK) - << PSCI_POWER_STATE_ID_SHIFT) | - ((state.type & PSCI_POWER_STATE_TYPE_MASK) - << PSCI_POWER_STATE_TYPE_SHIFT) | - ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) - << PSCI_POWER_STATE_AFFL_SHIFT); + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); +} + +static void psci_power_state_unpack(u32 power_state, + struct psci_power_state *state) +{ + state->id = (power_state >> PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK; + state->type = (power_state >> PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK; + state->affinity_level = (power_state >> PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK; } /* @@ -108,6 +143,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, return function_id; } +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -151,28 +194,36 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", }, - {}, -}; +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; -int __init psci_init(void) + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} + +static int psci_migrate_info_type(void) { - struct device_node *np; - const char *method; - u32 id; - int err = 0; + int err; + u32 fn; - np = of_find_matching_node(NULL, psci_of_match); - if (!np) - return -ENODEV; + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; - pr_info("probing function IDs from device-tree\n"); + pr_info("probing for conduit method from DT.\n"); if (of_property_read_string(np, "method", &method)) { - pr_warning("missing \"method\" property\n"); - err = -ENXIO; - goto out_put_node; + pr_warn("missing \"method\" property\n"); + return -ENXIO; } if (!strcmp("hvc", method)) { @@ -180,11 +231,99 @@ int __init psci_init(void) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warning("invalid \"method\" property: %s\n", method); - err = -EINVAL; + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(char str, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int __init psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } } + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int __init psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; psci_ops.cpu_suspend = psci_cpu_suspend; @@ -209,3 +348,136 @@ out_put_node: of_node_put(np); return err; } + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); +} + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ + return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("failed to boot CPU%d (%d)\n", cpu, err); + + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +} + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make op_cpu_kill() fail. */ + return 0; +} +#endif +#endif + +#ifdef CONFIG_ARM64_CPU_SUSPEND +static int cpu_psci_cpu_suspend(unsigned long index) +{ + struct psci_power_state *state = __get_cpu_var(psci_power_state); + + if (!state) + return -EOPNOTSUPP; + + return psci_ops.cpu_suspend(state[index], virt_to_phys(cpu_resume)); +} +#endif + +const struct cpu_operations cpu_psci_ops = { + .name = "psci", +#ifdef CONFIG_SMP + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, + .cpu_kill = cpu_psci_cpu_kill, +#endif +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + .cpu_suspend = cpu_psci_cpu_suspend, +#endif +}; + diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 33a74fc45959..5cafe0e40cfa 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -41,6 +42,9 @@ #include <asm/traps.h> #include <asm/system_misc.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -53,28 +57,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -657,28 +639,34 @@ static int compat_gpr_get(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; switch (idx) { case 15: - reg = (void *)&task_pt_regs(target)->pc; + reg = task_pt_regs(target)->pc; break; case 16: - reg = (void *)&task_pt_regs(target)->pstate; + reg = task_pt_regs(target)->pstate; break; case 17: - reg = (void *)&task_pt_regs(target)->orig_x0; + reg = task_pt_regs(target)->orig_x0; break; default: - reg = (void *)&task_pt_regs(target)->regs[idx]; + reg = task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); + if (kbuf) { + memcpy(kbuf, ®, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_to_user(ubuf, ®, sizeof(reg)); + if (ret) { + ret = -EFAULT; + break; + } - if (ret) - break; - else - ubuf += sizeof(compat_ulong_t); + ubuf += sizeof(reg); + } } return ret; @@ -706,28 +694,35 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + if (kbuf) { + memcpy(®, kbuf, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) { + ret = -EFAULT; + break; + } + + ubuf += sizeof(reg); + } switch (idx) { case 15: - reg = (void *)&newregs.pc; + newregs.pc = reg; break; case 16: - reg = (void *)&newregs.pstate; + newregs.pstate = reg; break; case 17: - reg = (void *)&newregs.orig_x0; + newregs.orig_x0 = reg; break; default: - reg = (void *)&newregs.regs[idx]; + newregs.regs[idx] = reg; } - ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); - - if (ret) - goto out; - else - ubuf += sizeof(compat_ulong_t); } if (valid_user_regs(&newregs.user_regs)) @@ -735,7 +730,6 @@ static int compat_gpr_set(struct task_struct *target, else ret = -EINVAL; -out: return ret; } @@ -816,33 +810,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1114,45 +1081,49 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { + int regno; unsigned long saved_reg; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return regs->syscallno; - - if (is_compat_task()) { - /* AArch32 uses ip (r12) for scratch */ - saved_reg = regs->regs[12]; - regs->regs[12] = dir; - } else { - /* - * Save X7. X7 is used to denote syscall entry/exit: - * X7 = 0 -> entry, = 1 -> exit - */ - saved_reg = regs->regs[7]; - regs->regs[7] = dir; - } + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; - if (dir) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) regs->syscallno = ~0UL; - if (is_compat_task()) - regs->regs[12] = saved_reg; - else - regs->regs[7] = saved_reg; + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->syscallno); return regs->syscallno; } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 000000000000..89102a6ffad5 --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->pc; + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 2; + data.addr = NULL; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)return_address; /* dummy */ + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7cc551d1b0e1..97676952b269 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -41,11 +41,14 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/of_platform.h> +#include <linux/efi.h> #include <linux/personality.h> +#include <asm/fixmap.h> #include <asm/cputype.h> #include <asm/elf.h> #include <asm/cputable.h> +#include <asm/cpu_ops.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -54,13 +57,25 @@ #include <asm/traps.h> #include <asm/memblock.h> #include <asm/psci.h> +#include <asm/efi.h> unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + static const char *cpu_name; static const char *machine_name; phys_addr_t __fdt_pointer __initdata; @@ -111,15 +126,95 @@ void cpuinfo_store_cpu(void) info->reg_midr = read_cpuid_id(); } -static void __init setup_processor(void) +void __init smp_setup_processor_id(void) { - struct cpu_info *cpu_info; + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity, fs[4], bits[4], ls; + u64 mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits %#llx\n", mask); /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc.S + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. */ + for (i = 0; i < 4; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR_EL1 by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 32 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR_EL1 through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + + fs[3] - (bits[2] + bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.shift_aff[3], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + +static void __init setup_processor(void) +{ + struct cpu_info *cpu_info; + u64 features, block; + cpu_info = lookup_processor_type(read_cpuid_id()); if (!cpu_info) { printk("CPU configuration botched (ID %08x), unable to continue.\n", @@ -132,8 +227,71 @@ static void __init setup_processor(void) printk("CPU: %s [%08x] revision %d\n", cpu_name, read_cpuid_id(), read_cpuid_id() & 15); - sprintf(init_utsname()->machine, "aarch64"); + sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; + + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the Aarch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -273,20 +431,27 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_ioremap_init(); + parse_early_param(); + efi_init(); arm64_memblock_init(); paging_init(); request_standard_resources(); + efi_idmap_init(); + unflatten_device_tree(); psci_init(); cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); + smp_build_mpidr_hash(); #endif #ifdef CONFIG_VT @@ -304,7 +469,7 @@ static int __init arm64_device_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } -arch_initcall(arm64_device_init); +arch_initcall_sync(arm64_device_init); static int __init topology_init(void) { @@ -323,6 +488,12 @@ subsys_initcall(topology_init); static const char *hwcap_str[] = { "fp", "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", NULL }; @@ -369,6 +540,7 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", loops_per_jiffy / (500000UL/HZ), loops_per_jiffy / (5000UL/HZ) % 100); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 890a591f75dd..e3cf09626245 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/personality.h> @@ -25,7 +26,6 @@ #include <linux/tracehook.h> #include <linux/ratelimit.h> -#include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/elf.h> #include <asm/cacheflush.h> @@ -100,8 +100,7 @@ static int restore_sigframe(struct pt_regs *regs, { sigset_t set; int i, err; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -121,8 +120,11 @@ static int restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs); - if (err == 0) - err |= restore_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= restore_fpsimd_context(fpsimd_ctx); + } return err; } @@ -167,8 +169,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; + struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &sf->fp, err); @@ -185,12 +187,17 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) - err |= preserve_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= preserve_fpsimd_context(fpsimd_ctx); + aux += sizeof(*fpsimd_ctx); + } /* set the "end" magic */ - __put_user_error(0, &aux->end.magic, err); - __put_user_error(0, &aux->end.size, err); + end = aux; + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); return err; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 3d478102b1c0..96f545cfbb70 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -100,34 +100,6 @@ struct compat_rt_sigframe { #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_compat_sigreturn) -#define SVC_SYS_SIGRETURN (0xef000000 | __NR_compat_sigreturn) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_compat_rt_sigreturn) -#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_compat_rt_sigreturn) - -/* - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_compat_sigreturn) << 16) | \ - 0x2700 | __NR_compat_sigreturn) -#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ - 0x2700 | __NR_compat_rt_sigreturn) - -const compat_ulong_t aarch32_sigret_code[6] = { - /* - * AArch32 sigreturn code. - * We don't construct an OABI SWI - instead we just set the imm24 field - * to the EABI syscall number so that we create a sane disassembly. - */ - MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, -}; - static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { compat_sigset_t cset; @@ -473,12 +445,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; - if (thumb) { + if (thumb) spsr |= COMPAT_PSR_T_BIT; - spsr &= ~COMPAT_PSR_IT_MASK; - } else { + else spsr &= ~COMPAT_PSR_T_BIT; - } + + /* The IT state must be cleared for both ARM and Thumb-2 */ + spsr &= ~COMPAT_PSR_IT_MASK; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 000000000000..b1925729c692 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + * u32 aff0, aff1, aff2, aff3; + * u64 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * aff2 = mpidr_masked & 0xff00000000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask + and \mpidr, \mpidr, \mask // mask out MPIDR bits + and \dst, \mpidr, #0xff // mask=aff0 + lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 + and \mask, \mpidr, #0xff00 // mask = aff1 + lsr \mask ,\mask, \rs1 + orr \dst, \dst, \mask // dst|=(aff1>>rs1) + and \mask, \mpidr, #0xff0000 // mask = aff2 + lsr \mask ,\mask, \rs2 + orr \dst, \dst, \mask // dst|=(aff2>>rs2) + and \mask, \mpidr, #0xff00000000 // mask = aff3 + lsr \mask ,\mask, \rs3 + orr \dst, \dst, \mask // dst|=(aff3>>rs3) + .endm +/* + * Save CPU state for a suspend. This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + * x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) + stp x29, lr, [sp, #-96]! + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + mov x2, sp + sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx + mov x1, sp + /* + * x1 now points to struct cpu_suspend_ctx allocated on the stack + */ + str x2, [x1, #CPU_CTX_SP] + ldr x2, =sleep_save_sp + ldr x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP + mrs x7, mpidr_el1 + ldr x9, =mpidr_hash + ldr x10, [x9, #MPIDR_HASH_MASK] + /* + * Following code relies on the struct mpidr_hash + * members size. + */ + ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 + add x2, x2, x8, lsl #3 +#endif + bl __cpu_suspend_finisher + /* + * Never gets here, unless suspend fails. + * Successful cpu_suspend should return from cpu_resume, returning + * through this code path is considered an error + * If the return value is set to 0 force x0 = -EOPNOTSUPP + * to make sure a proper error condition is propagated + */ + cmp x0, #0 + mov x3, #-EOPNOTSUPP + csel x0, x3, x0, eq + add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(__cpu_suspend) + .ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) + ldr x3, =cpu_resume_after_mmu + msr sctlr_el1, x0 // restore sctlr_el1 + isb + br x3 // global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: + mov x0, #0 // return zero on success + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(cpu_resume_after_mmu) + + .data +ENTRY(cpu_resume) + bl el2_setup // if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP + mrs x1, mpidr_el1 + adr x4, mpidr_hash_ptr + ldr x5, [x4] + add x8, x4, x5 // x8 = struct mpidr_hash phys address + /* retrieve mpidr_hash members to compute the hash */ + ldr x2, [x8, #MPIDR_HASH_MASK] + ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 + /* x7 contains hash index, let's use it to grab context pointer */ +#else + mov x7, xzr +#endif + adr x0, sleep_save_sp + ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr x0, [x0, x7, lsl #3] + /* load sp from context */ + ldr x2, [x0, #CPU_CTX_SP] + adr x1, sleep_idmap_phys + /* load physical address of identity map page table in x1 */ + ldr x1, [x1] + mov sp, x2 + /* + * cpu_do_resume expects x0 to contain context physical address + * pointer and x1 to contain physical address of 1:1 page tables + */ + bl cpu_do_resume // PC relative jump, MMU off + b cpu_resume_mmu // Resume MMU, never returns +ENDPROC(cpu_resume) + + .align 3 +mpidr_hash_ptr: + /* + * offset of mpidr_hash symbol from current location + * used to obtain run-time mpidr_hash address with MMU off + */ + .quad mpidr_hash - . +/* + * physical address of identity mapped page tables + */ + .type sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) + .quad 0 +/* + * struct sleep_save_sp { + * phys_addr_t *save_ptr_stash; + * phys_addr_t save_ptr_stash_phys; + * }; + */ + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) + .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b0a8703a25ec..9a3c7ef182fd 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/cpu_ops.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -48,76 +49,34 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> +#define CREATE_TRACE_POINTS +#include <trace/events/arm-ipi.h> + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core * where to place its SVC stack */ struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_TIMER, }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not. This is necessary for the hotplug code to work - * reliably. - */ -static void __cpuinit write_pen_release(u64 val) -{ - void *start = (void *)&secondary_holding_pen_release; - unsigned long size = sizeof(secondary_holding_pen_release); - - secondary_holding_pen_release = val; - __flush_dcache_area(start, size); -} - /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. */ static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; - - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - - /* - * Update the pen release flag. - */ - write_pen_release(cpu_logical_map(cpu)); - - /* - * Send an event, causing the secondaries to read pen_release. - */ - sev(); - - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; + return -EOPNOTSUPP; } static DECLARE_COMPLETION(cpu_running); @@ -158,6 +117,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -167,8 +131,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); - /* * All kernel threads share the same mm context; grab a * reference and switch to it. @@ -177,6 +139,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + printk("CPU%u: Booted secondary processor\n", cpu); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -187,17 +152,15 @@ asmlinkage void __cpuinit secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); - /* - * Let the primary processor know we're out of the - * pen, then head off into the C entry point - */ - write_pen_release(INVALID_HWID); + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); /* - * Synchronise with the boot thread. + * Enable GIC and timers. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + notify_cpu_starting(cpu); + + smp_store_cpu_info(cpu); /* * Log the CPU info before it is marked online and might get read. @@ -212,11 +175,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - /* - * Enable GIC and timers. - */ - notify_cpu_starting(cpu); - + local_dbg_enable(); local_irq_enable(); local_fiq_enable(); @@ -226,42 +185,139 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_startup_entry(CPUHP_ONLINE); } -void __init smp_cpus_done(unsigned int max_cpus) +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) { - unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; } -void __init smp_prepare_boot_cpu(void) +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) { -} + unsigned int cpu = smp_processor_id(); + int ret; -static void (*smp_cross_call)(const struct cpumask *, unsigned int); + ret = op_cpu_disable(cpu); + if (ret) + return ret; -static const struct smp_enable_ops *enable_ops[] __initconst = { - &smp_spin_table_ops, - &smp_psci_ops, - NULL, -}; + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); -static const struct smp_enable_ops *smp_enable_ops[NR_CPUS]; + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); -static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) + return 0; +} + +static int op_cpu_kill(unsigned int cpu) { - const struct smp_enable_ops **ops = enable_ops; + /* + * If we have no means of synchronising with the dying CPU, then assume + * that it is really dead. We can only wait for an arbitrary length of + * time and hope that it's dead, so let's skip the wait and just hope. + */ + if (!cpu_ops[cpu]->cpu_kill) + return 1; - while (*ops) { - if (!strcmp(name, (*ops)->name)) - return *ops; + return cpu_ops[cpu]->cpu_kill(cpu); +} + +static DECLARE_COMPLETION(cpu_died); - ops++; +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; } + pr_notice("CPU%u: shutdown\n", cpu); + + /* + * Now that the dying CPU is beyond the point of no return w.r.t. + * in-kernel synchronisation, try to get the firwmare to help us to + * verify that it has really left the kernel before we consider + * clobbering anything it might still be using. + */ + if (!op_cpu_kill(cpu)) + pr_warn("CPU%d may not have shut down cleanly\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + + idle_task_exit(); + + local_irq_disable(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); - return NULL; + BUG(); } +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ + unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + + pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); +} + +void __init smp_prepare_boot_cpu(void) +{ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +} + +static void (*smp_cross_call)(const struct cpumask *, unsigned int); /* * Enumerate the possible CPU set from the device tree and build the @@ -270,9 +326,8 @@ static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) */ void __init smp_init_cpus(void) { - const char *enable_method; struct device_node *dn = NULL; - int i, cpu = 1; + unsigned int i, cpu = 1; bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { @@ -341,25 +396,10 @@ void __init smp_init_cpus(void) if (cpu >= NR_CPUS) goto next; - /* - * We currently support only the "spin-table" enable-method. - */ - enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method) { - pr_err("%s: missing enable-method property\n", - dn->full_name); - goto next; - } - - smp_enable_ops[cpu] = smp_get_enable_ops(enable_method); - - if (!smp_enable_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); + if (cpu_read_ops(dn, cpu) != 0) goto next; - } - if (smp_enable_ops[cpu]->init_cpu(dn, cpu)) + if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); @@ -389,8 +429,12 @@ next: void __init smp_prepare_cpus(unsigned int max_cpus) { - int cpu, err; - unsigned int ncores = num_possible_cpus(); + int err; + unsigned int cpu, ncores = num_possible_cpus(); + + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); /* * are we trying to boot more cores than exist? @@ -417,10 +461,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (cpu == smp_processor_id()) continue; - if (!smp_enable_ops[cpu]) + if (!cpu_ops[cpu]) continue; - err = smp_enable_ops[cpu]->prepare_cpu(cpu); + err = cpu_ops[cpu]->cpu_prepare(cpu); if (err) continue; @@ -451,6 +495,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_TIMER, "Timer broadcast interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -460,7 +505,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, prec >= 4 ? " " : ""); - for_each_present_cpu(cpu) + for_each_online_cpu(cpu) seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs[i])); seq_printf(p, " %s\n", ipi_types[i]); @@ -536,6 +581,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + irq_enter(); + tick_receive_broadcast(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -548,6 +601,13 @@ void smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_TIMER); +} +#endif + void smp_send_stop(void) { unsigned long timeout; diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c deleted file mode 100644 index 0c533301be77..000000000000 --- a/arch/arm64/kernel/smp_psci.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PSCI SMP initialisation - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/psci.h> -#include <asm/smp_plat.h> - -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) -{ - return 0; -} - -static int __init smp_psci_prepare_cpu(int cpu) -{ - int err; - - if (!psci_ops.cpu_on) { - pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu); - return -ENODEV; - } - - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - - return 0; -} - -const struct smp_enable_ops smp_psci_ops __initconst = { - .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, -}; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7c35fa682f76..0347d38eea29 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -16,15 +16,38 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/delay.h> #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; -static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { /* * Determine the address from which the CPU is polling. @@ -40,7 +63,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) return 0; } -static int __init smp_spin_table_prepare_cpu(int cpu) +static int smp_spin_table_cpu_prepare(unsigned int cpu) { void **release_addr; @@ -48,7 +71,16 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return -ENODEV; release_addr = __va(cpu_release_addr[cpu]); - release_addr[0] = (void *)__pa(secondary_holding_pen); + + /* + * We write the release address as LE regardless of the native + * endianess of the kernel. Therefore, any boot-loaders that + * read this address need to convert this address to the + * boot-loader's endianess before jumping. This is mandated by + * the boot protocol. + */ + release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); + __flush_dcache_area(release_addr, sizeof(release_addr[0])); /* @@ -59,8 +91,24 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_spin_table_ops __initconst = { +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + return 0; +} + +const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", - .init_cpu = smp_spin_table_init_cpu, - .prepare_cpu = smp_spin_table_prepare_cpu, + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 048334bb2651..407991bf79f5 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -35,7 +35,7 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; @@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); - if (fp < low || fp > high || fp & 0xf) + if (fp < low || fp > high - 0x18 || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; @@ -111,10 +111,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } else { - register unsigned long current_sp asm("sp"); data.no_sched_functions = 0; frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_sp; + frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 000000000000..1fa9ce4afd8f --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,140 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + * must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, + phys_addr_t *save_ptr) +{ + int cpu = smp_processor_id(); + + *save_ptr = virt_to_phys(ptr); + + cpu_do_suspend(ptr); + /* + * Only flush the context that must be retrieved with the MMU + * off. VA primitives ensure the flush is applied to all + * cache levels so context is pushed to DRAM. + */ + __flush_dcache_area(ptr, sizeof(*ptr)); + __flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ + /* Prevent multiple restore hook initializations */ + if (WARN_ON(hw_breakpoint_restore)) + return; + hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ + struct mm_struct *mm = current->active_mm; + int ret, cpu = smp_processor_id(); + unsigned long flags; + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + + /* + * From this point debug exceptions are disabled to prevent + * updates to mdscr register (saved and restored along with + * general purpose registers) from kernel debuggers. + */ + local_dbg_save(flags); + + /* + * mm context saved on the stack, it will be restored when + * the cpu comes out of reset through the identity mapped + * page tables, so that the thread address space is properly + * set-up on function return. + */ + ret = __cpu_suspend(arg); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); + } + + /* + * Restore pstate flags. OS lock and mdscr have been already + * restored, so from this point onwards, debugging is fully + * renabled if it was enabled when core started shutdown. + */ + local_dbg_restore(flags); + + return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ + void *ctx_ptr; + + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sleep_idmap_phys = virt_to_phys(idmap_pg_dir); + __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); + __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + + return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index a1b19ed7467c..423a5b3fc2be 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * extension. */ compat_sys_pread64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) compat_sys_pwrite64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) compat_sys_truncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) compat_sys_ftruncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) compat_sys_readahead_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) compat_sys_fadvise64_64_wrapper: mov w6, w1 - orr x1, x2, x3, lsl #32 - orr x2, x4, x5, lsl #32 + regs_to_64 x1, x2, x3 + regs_to_64 x2, x4, x5 mov w3, w6 b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) compat_sys_sync_file_range2_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) compat_sys_fallocate_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a551f88ae2c1..1ddb2bd5932a 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -18,6 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/clockchips.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/interrupt.h> @@ -68,18 +69,14 @@ unsigned long long notrace sched_clock(void) return arch_timer_read_counter() * sched_clock_mult; } -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_timer_read_counter(); - return 0; -} - void __init time_init(void) { u32 arch_timer_rate; clocksource_of_init(); + tick_setup_hrtimer_broadcast(); + arch_timer_rate = arch_timer_get_rate(); if (!arch_timer_rate) panic("Unable to initialise architected timer.\n"); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 000000000000..db8bb29c3852 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,590 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/topology.h> +#include <asm/smp_plat.h> + + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) + return -1; + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) { + of_node_put(cpu_node); + return cpu; + } + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + + of_node_put(cpu_node); + return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, + int core_id) +{ + char name[10]; + bool leaf = true; + int i = 0; + int cpu; + struct device_node *t; + + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + of_node_put(t); + return -EINVAL; + } + of_node_put(t); + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return -EINVAL; + } + + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + return -EINVAL; + } + + return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + static int cluster_id __initdata; + int core_id = 0; + int i, ret; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + leaf = false; + ret = parse_cluster(c, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (depth == 0) { + pr_err("%s: cpu-map children should be clusters\n", + c->full_name); + of_node_put(c); + return -EINVAL; + } + + if (leaf) { + ret = parse_core(c, cluster_id, core_id++); + } else { + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; + + return 0; +} + +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + { "arm,cortex-a57", 3891 }, + { "arm,cortex-a53", 2048 }, + { NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static int __init parse_dt_topology(void) +{ + struct device_node *cn, *map; + int ret = 0; + int cpu; + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return 0; + } + + /* + * When topology is provided cpu-map is essentially a root + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); + if (!map) + goto out; + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. + */ + for_each_possible_cpu(cpu) { + if (cpu_topology[cpu].cluster_id == -1) { + pr_err("CPU%d: No topology information specified\n", + cpu); + ret = -EINVAL; + } + } + +out_map: + of_node_put(map); +out: + of_node_put(cn); + return ret; +} + +static void __init parse_dt_cpu_power(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn; + unsigned long min_capacity = ULONG_MAX; + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int cpu; + + __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), + GFP_NOWAIT); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + + /* Too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("Missing device node for CPU %d\n", cpu); + continue; + } + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) { + pr_warn("%s: Unknown CPU type\n", cn->full_name); + continue; + } + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s: Missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equal we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + return; + else if (4 * max_capacity < (3 * (max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; +} + +/* + * Look for a customed capacity of a CPU in the cpu_topo_data table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_power(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + pr_info("CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + if (cpuid_topo->cluster_id == -1) { + /* + * DT does not contain topology information for this cpu. + */ + pr_debug("CPU%u: No topology information configured\n", cpuid); + return; + } + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } +} + +#ifdef CONFIG_SCHED_HMP + +/* + * Retrieve logical cpu index corresponding to a given MPIDR[23:0] + * - mpidr: MPIDR[23:0] to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u32 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} + +static const char * const little_cores[] = { + "arm,cortex-a53", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) + return true; + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 8) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr+1)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr+1)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct cpumask hmp_fast_cpu_mask; + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) { + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + } + + return -EINVAL; +} + +void store_cpu_topology(unsigned int cpuid) +{ + update_siblings_masks(cpuid); + update_cpu_power(cpuid); +} + +static void __init reset_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpu_topo->thread_id = -1; + cpu_topo->core_id = 0; + cpu_topo->cluster_id = -1; + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + } +} + +static void __init reset_cpu_power(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + set_power_scale(cpu, SCHED_POWER_SCALE); +} + +void __init init_cpu_topology(void) +{ + reset_cpu_topology(); + + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ + if (parse_dt_topology()) + reset_cpu_topology(); + + reset_cpu_power(); + parse_dt_cpu_power(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..12cd34ff35d0 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include <linux/syscalls.h> #include <asm/atomic.h> +#include <asm/debug-monitors.h> #include <asm/traps.h> #include <asm/stacktrace.h> #include <asm/exception.h> @@ -131,7 +132,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - const register unsigned long current_sp asm ("sp"); pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); @@ -144,7 +144,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = regs->pc; } else if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_sp; + frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; } else { /* @@ -155,7 +155,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = thread_saved_pc(tsk); } - printk("Call trace:\n"); + pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; int ret; @@ -261,11 +261,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { @@ -329,17 +327,17 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pte %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); } void __init trap_init(void) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 0ea7a22bcdf2..84cafbc3eb54 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -58,7 +58,10 @@ static struct page *vectors_page[1]; static int alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long vpage; vpage = get_zeroed_page(GFP_ATOMIC); @@ -72,7 +75,7 @@ static int alloc_vectors_page(void) /* sigreturn code */ memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - aarch32_sigret_code, sizeof(aarch32_sigret_code)); + __aarch32_sigret_code_start, sigret_sz); flush_icache_range(vpage, vpage + PAGE_SIZE); vectors_page[0] = virt_to_page(vpage); @@ -103,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) static int __init vdso_init(void) { - struct page *pg; - char *vbase; - int i, ret = 0; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", vdso_pages + 1, vdso_pages, 1L, &vdso_start); /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); - if (vdso_pagelist == NULL) { - pr_err("Failed to allocate vDSO pagelist!\n"); + if (vdso_pagelist == NULL) return -ENOMEM; - } /* Grab the vDSO code pages. */ - for (i = 0; i < vdso_pages; i++) { - pg = virt_to_page(&vdso_start + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso_pagelist[i] = pg; - } - - /* Sanity check the shared object header. */ - vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); - if (vbase == NULL) { - pr_err("Failed to map vDSO pagelist!\n"); - return -ENOMEM; - } else if (memcmp(vbase, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - ret = -EINVAL; - goto unmap; - } + for (i = 0; i < vdso_pages; i++) + vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Grab the vDSO data page. */ - pg = virt_to_page(vdso_data); - get_page(pg); - vdso_pagelist[i] = pg; + vdso_pagelist[i] = virt_to_page(vdso_data); -unmap: - vunmap(vbase); - return ret; + return 0; } arch_initcall(vdso_init); @@ -153,11 +138,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_mapping_len; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; int ret; + vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + PAGE_SIZE; down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); @@ -167,35 +153,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, } mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, + ret = install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); - if (ret) { - mm->context.vdso = NULL; + if (ret) + goto up_fail; + + vdso_base += vdso_text_len; + ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_pagelist + vdso_pages); + if (ret) goto up_fail; - } -up_fail: up_write(&mm->mmap_sem); + return 0; +up_fail: + mm->context.vdso = NULL; + up_write(&mm->mmap_sem); return ret; } const char *arch_vma_name(struct vm_area_struct *vma) { + unsigned long vdso_text; + + if (!vma->vm_mm) + return NULL; + + vdso_text = (unsigned long)vma->vm_mm->context.vdso; + /* * We can re-use the vdso pointer in mm_context_t for identifying * the vectors page for compat applications. The vDSO will always * sit above TASK_UNMAPPED_BASE and so we don't need to worry about * it conflicting with the vectors base. */ - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { + if (vma->vm_start == vdso_text) { #ifdef CONFIG_COMPAT if (vma->vm_start == AARCH32_VECTORS_BASE) return "[vectors]"; #endif return "[vdso]"; + } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) { + return "[vvar]"; } return NULL; diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 6d20b7d162d8..ff3bddea482d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -43,13 +43,13 @@ $(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -$(obj-vdso): %.o: %.S +$(obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ +quiet_cmd_vdsold = VDSOL $@ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ -quiet_cmd_vdsoas = VDSOA $@ +quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< # Install commands for the unstripped file diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..de93a4bbdb7c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -13,10 +13,23 @@ #define ARM_EXIT_DISCARD(x) x OUTPUT_ARCH(aarch64) -ENTRY(stext) +ENTRY(_text) jiffies = jiffies_64; +#define HYPERVISOR_TEXT \ + /* \ + * Force the alignment to be compatible with \ + * the vectors requirements \ + */ \ + . = ALIGN(2048); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -41,7 +54,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - *(.smp.pen.text) __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -49,6 +61,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -56,7 +69,8 @@ SECTIONS } RO_DATA(PAGE_SIZE) - + EXCEPTION_TABLE(8) + NOTES _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -82,45 +96,29 @@ SECTIONS PERCPU_SECTION(64) __init_end = .; - . = ALIGN(THREAD_SIZE); - __data_loc = .; - - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(64) - READ_MOSTLY_DATA(64) - - /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } - _edata_loc = __data_loc + SIZEOF(.data); - NOTES + . = ALIGN(PAGE_SIZE); + _data = .; + _sdata = .; + RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + _edata = .; BSS_SECTION(0, 0, 0) + + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + swapper_pg_dir = .; + . += SWAPPER_DIR_SIZE; + _end = .; STABS_DEBUG .comment 0 : { *(.comment) } } + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), + "HYP init code too big") |