aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@linaro.org>2017-06-28 22:13:59 +0800
committerAlex Shi <alex.shi@linaro.org>2017-06-28 22:13:59 +0800
commitd87abdcbf8871b5ee58083b3bdf216ba406224b9 (patch)
tree5a25ee77e6be9107364139f02025ff6d0e50ab98
parent428ee0d0d38f2cf5b1c4d584f7b2eee76d7a992e (diff)
parent89912966d1c4cf3c48f632fdc571232b5b0bdd63 (diff)
Merge branch 'lsk/kdump/for-v4.4' into linux-linaro-lsk-v4.4
-rw-r--r--Documentation/devicetree/bindings/chosen.txt45
-rw-r--r--Documentation/kdump/kdump.txt16
-rw-r--r--arch/arm64/Kconfig21
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/include/asm/cacheflush.h1
-rw-r--r--arch/arm64/include/asm/hardirq.h2
-rw-r--r--arch/arm64/include/asm/kexec.h98
-rw-r--r--arch/arm64/include/asm/mmu.h2
-rw-r--r--arch/arm64/include/asm/smp.h52
-rw-r--r--arch/arm64/include/asm/virt.h5
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/cpu-reset.S54
-rw-r--r--arch/arm64/kernel/cpu-reset.h34
-rw-r--r--arch/arm64/kernel/cpufeature.c36
-rw-r--r--arch/arm64/kernel/crash_dump.c71
-rw-r--r--arch/arm64/kernel/efi.c2
-rw-r--r--arch/arm64/kernel/head.S34
-rw-r--r--arch/arm64/kernel/hibernate.c10
-rw-r--r--arch/arm64/kernel/hyp-stub.S10
-rw-r--r--arch/arm64/kernel/machine_kexec.c364
-rw-r--r--arch/arm64/kernel/relocate_kernel.S130
-rw-r--r--arch/arm64/kernel/setup.c7
-rw-r--r--arch/arm64/kernel/smp.c153
-rw-r--r--arch/arm64/mm/init.c181
-rw-r--r--arch/arm64/mm/mmu.c141
-rw-r--r--arch/arm64/mm/pageattr.c13
-rw-r--r--drivers/firmware/efi/libstub/fdt.c28
-rw-r--r--include/linux/memblock.h15
-rw-r--r--include/linux/mm.h12
-rw-r--r--include/uapi/linux/kexec.h1
-rw-r--r--mm/memblock.c115
-rw-r--r--mm/slab.c98
33 files changed, 1587 insertions, 172 deletions
diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt
index 6ae9d82d4c37..b5e39af4ddc0 100644
--- a/Documentation/devicetree/bindings/chosen.txt
+++ b/Documentation/devicetree/bindings/chosen.txt
@@ -52,3 +52,48 @@ This property is set (currently only on PowerPC, and only needed on
book3e) by some versions of kexec-tools to tell the new kernel that it
is being booted by kexec, as the booting environment may differ (e.g.
a different secondary CPU release mechanism)
+
+linux,usable-memory-range
+-------------------------
+
+This property (arm64 only) holds a base address and size, describing a
+limited region in which memory may be considered available for use by
+the kernel. Memory outside of this range is not available for use.
+
+This property describes a limitation: memory within this range is only
+valid when also described through another mechanism that the kernel
+would otherwise use to determine available memory (e.g. memory nodes
+or the EFI memory map). Valid memory may be sparse within the range.
+e.g.
+
+/ {
+ chosen {
+ linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>;
+ };
+};
+
+The main usage is for crash dump kernel to identify its own usable
+memory and exclude, at its boot time, any other memory areas that are
+part of the panicked kernel's memory.
+
+While this property does not represent a real hardware, the address
+and the size are expressed in #address-cells and #size-cells,
+respectively, of the root node.
+
+linux,elfcorehdr
+----------------
+
+This property (currently used only on arm64) holds the memory range,
+the address and the size, of the elf core header which mainly describes
+the panicked kernel's memory layout as PT_LOAD segments of elf format.
+e.g.
+
+/ {
+ chosen {
+ linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>;
+ };
+};
+
+While this property does not represent a real hardware, the address
+and the size are expressed in #address-cells and #size-cells,
+respectively, of the root node.
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index bc4bd5a44b88..b6fe6a885bf8 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
a remote system.
Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x and arm architectures.
+s390x, arm and arm64 architectures.
When the system kernel boots, it reserves a small section of memory for
the dump-capture kernel. This ensures that ongoing Direct Memory Access
@@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm)
AUTO_ZRELADDR=y
+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+- Please note that kvm of the dump-capture kernel will not be enabled
+ on non-VHE systems even if it is configured. This is because the CPU
+ will not be reset to EL2 on panic.
+
Extended crashkernel syntax
===========================
@@ -312,6 +319,8 @@ Boot into System Kernel
any space below the alignment point may be overwritten by the dump-capture kernel,
which means it is possible that the vmcore is not that precise as expected.
+ On arm64, use "crashkernel=Y[@X]". Note that the start address of
+ the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
Load the Dump-capture Kernel
============================
@@ -334,6 +343,8 @@ For s390x:
- Use image or bzImage
For arm:
- Use zImage
+For arm64:
+ - Use vmlinux or Image
If you are using a uncompressed vmlinux image then use following command
to load dump-capture kernel.
@@ -377,6 +388,9 @@ For s390x:
For arm:
"1 maxcpus=1 reset_devices"
+For arm64:
+ "1 maxcpus=1 reset_devices"
+
Notes on loading the dump-capture kernel:
* By default, the ELF headers are stored in ELF64 format to support
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6fba73b6fcc8..703b7f921806 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -598,6 +598,27 @@ config SECCOMP
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
+config KEXEC
+ depends on PM_SLEEP_SMP
+ select KEXEC_CORE
+ bool "kexec system call"
+ ---help---
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is independent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/kdump/kdump.txt
+
config XEN_DOM0
def_bool y
depends on XEN
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 79717faf2161..7dc5e58f7b7a 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -58,6 +58,8 @@ CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
CONFIG_CMDLINE="console=ttyAMA0"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 22dda613f9c9..22aabdeacc24 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -155,5 +155,6 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_valid(unsigned long addr, unsigned long size, int enable);
#endif
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 8740297dac77..1473fc2f7ab7 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 6
+#define NR_IPI 7
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 000000000000..e17f0529a882
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,98 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+#define KEXEC_ARCH KEXEC_ARCH_AARCH64
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs)
+{
+ if (oldregs) {
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ } else {
+ u64 tmp1, tmp2;
+
+ __asm__ __volatile__ (
+ "stp x0, x1, [%2, #16 * 0]\n"
+ "stp x2, x3, [%2, #16 * 1]\n"
+ "stp x4, x5, [%2, #16 * 2]\n"
+ "stp x6, x7, [%2, #16 * 3]\n"
+ "stp x8, x9, [%2, #16 * 4]\n"
+ "stp x10, x11, [%2, #16 * 5]\n"
+ "stp x12, x13, [%2, #16 * 6]\n"
+ "stp x14, x15, [%2, #16 * 7]\n"
+ "stp x16, x17, [%2, #16 * 8]\n"
+ "stp x18, x19, [%2, #16 * 9]\n"
+ "stp x20, x21, [%2, #16 * 10]\n"
+ "stp x22, x23, [%2, #16 * 11]\n"
+ "stp x24, x25, [%2, #16 * 12]\n"
+ "stp x26, x27, [%2, #16 * 13]\n"
+ "stp x28, x29, [%2, #16 * 14]\n"
+ "mov %0, sp\n"
+ "stp x30, %0, [%2, #16 * 15]\n"
+
+ "/* faked current PSTATE */\n"
+ "mrs %0, CurrentEL\n"
+ "mrs %1, SPSEL\n"
+ "orr %0, %0, %1\n"
+ "mrs %1, DAIF\n"
+ "orr %0, %0, %1\n"
+ "mrs %1, NZCV\n"
+ "orr %0, %0, %1\n"
+ /* pc */
+ "adr %1, 1f\n"
+ "1:\n"
+ "stp %1, %0, [%2, #16 * 16]\n"
+ : "=&r" (tmp1), "=&r" (tmp2)
+ : "r" (newregs)
+ : "memory"
+ );
+ }
+}
+
+#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
+extern bool crash_is_nosave(unsigned long pfn);
+extern void crash_prepare_suspend(void);
+extern void crash_post_resume(void);
+#else
+static inline bool crash_is_nosave(unsigned long pfn) {return false; }
+static inline void crash_prepare_suspend(void) {}
+static inline void crash_post_resume(void) {}
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 990124a67eeb..5472251c8e6c 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -33,7 +33,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
- pgprot_t prot);
+ pgprot_t prot, bool allow_block_mappings);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 2013a4dc5124..1d3ff7e4a6c2 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -16,6 +16,19 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+/* Values for secondary_data.status */
+
+#define CPU_MMU_OFF (-1)
+#define CPU_BOOT_SUCCESS (0)
+/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
+#define CPU_KILL_ME (1)
+/* The cpu couldn't die gracefully and is looping in the kernel */
+#define CPU_STUCK_IN_KERNEL (2)
+/* Fatal system error detected by secondary CPU, crash the system */
+#define CPU_PANIC_KERNEL (3)
+
+#ifndef __ASSEMBLY__
+
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
@@ -54,11 +67,17 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
+ * @stack - sp for the secondary CPU
+ * @status - Result passed back from the secondary CPU to
+ * indicate failure.
*/
struct secondary_data {
void *stack;
+ long status;
};
+
extern struct secondary_data secondary_data;
+extern long __early_cpu_boot_status;
extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
@@ -77,5 +96,38 @@ extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void);
+extern void cpu_die_early(void);
+
+static inline void cpu_park_loop(void)
+{
+ for (;;) {
+ wfe();
+ wfi();
+ }
+}
+
+static inline void update_cpu_boot_status(int val)
+{
+ WRITE_ONCE(secondary_data.status, val);
+ /* Ensure the visibility of the status update */
+ dsb(ishst);
+}
+
+/*
+ * If a secondary CPU enters the kernel but fails to come online,
+ * (e.g. due to mismatched features), and cannot exit the kernel,
+ * we increment cpus_stuck_in_kernel and leave the CPU in a
+ * quiesecent loop within the kernel text. The memory containing
+ * this loop must not be re-used for anything else as the 'stuck'
+ * core is executing it.
+ *
+ * This function is used to inhibit features like kexec and hibernate.
+ */
+bool cpus_are_stuck_in_kernel(void);
+
+extern void smp_send_crash_stop(void);
+extern bool smp_crash_stop_failed(void);
+
+#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 06e6a5238c4c..e6c27b8ce311 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -34,6 +34,11 @@
*/
#define HVC_SET_VECTORS 1
+/*
+ * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
+ */
+#define HVC_SOFT_RESTART 2
+
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 20bcc2db06bf..96ae961ada72 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -44,7 +44,9 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
-arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
+arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
+ cpu-reset.o
+arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 2bb17bd556f8..a506ddc270c0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -117,6 +117,8 @@ int main(void)
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
+ DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
new file mode 100644
index 000000000000..65f42d257414
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -0,0 +1,54 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+.text
+.pushsection .idmap.text, "ax"
+
+/*
+ * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
+ * cpu_soft_restart.
+ *
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @entry: Location to jump to for soft reset.
+ * arg0: First argument passed to @entry.
+ * arg1: Second argument passed to @entry.
+ * arg2: Third argument passed to @entry.
+ *
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
+ */
+ENTRY(__cpu_soft_restart)
+ /* Clear sctlr_el1 flags. */
+ mrs x12, sctlr_el1
+ ldr x13, =SCTLR_ELx_FLAGS
+ bic x12, x12, x13
+ msr sctlr_el1, x12
+ isb
+
+ cbz x0, 1f // el2_switch?
+ mov x0, #HVC_SOFT_RESTART
+ hvc #0 // no return
+
+1: mov x18, x1 // entry
+ mov x0, x2 // arg0
+ mov x1, x3 // arg1
+ mov x2, x4 // arg2
+ br x18
+ENDPROC(__cpu_soft_restart)
+
+.popsection
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
new file mode 100644
index 000000000000..d4e9ecb264f0
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -0,0 +1,34 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_CPU_RESET_H
+#define _ARM64_CPU_RESET_H
+
+#include <asm/virt.h>
+
+void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2);
+
+static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
+ unsigned long entry, unsigned long arg0, unsigned long arg1,
+ unsigned long arg2)
+{
+ typeof(__cpu_soft_restart) *restart;
+
+ el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
+ is_hyp_mode_available();
+ restart = (void *)virt_to_phys(__cpu_soft_restart);
+
+ cpu_install_idmap();
+ restart(el2_switch, entry, arg0, arg1, arg2);
+ unreachable();
+}
+
+#endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index eda7d5915fbb..476fb5caa361 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -894,28 +894,6 @@ static u64 __raw_read_system_reg(u32 sys_id)
}
/*
- * Park the CPU which doesn't have the capability as advertised
- * by the system.
- */
-static void fail_incapable_cpu(char *cap_type,
- const struct arm64_cpu_capabilities *cap)
-{
- int cpu = smp_processor_id();
-
- pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
- /* Mark this CPU absent */
- set_cpu_present(cpu, 0);
-
- /* Check if we can park ourselves */
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
- asm(
- "1: wfe\n"
- " wfi\n"
- " b 1b");
-}
-
-/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
* Any new CPU should match the system wide status of the capability. If the
@@ -943,8 +921,11 @@ void verify_local_cpu_capabilities(void)
* If the new CPU misses an advertised feature, we cannot proceed
* further, park the cpu.
*/
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
- fail_incapable_cpu("arm64_features", &caps[i]);
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+ pr_crit("CPU%d: missing feature: %s\n",
+ smp_processor_id(), caps[i].desc);
+ cpu_die_early();
+ }
if (caps[i].enable)
caps[i].enable(NULL);
}
@@ -952,8 +933,11 @@ void verify_local_cpu_capabilities(void)
for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
if (!cpus_have_hwcap(&caps[i]))
continue;
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
- fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+ pr_crit("CPU%d: missing HWCAP: %s\n",
+ smp_processor_id(), caps[i].desc);
+ cpu_die_early();
+ }
}
}
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index 000000000000..f46d57c31443
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2017 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset,
+ int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+ memunmap(vaddr);
+ return -EFAULT;
+ }
+ } else {
+ memcpy(buf, vaddr + offset, csize);
+ }
+
+ memunmap(vaddr);
+
+ return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+ return count;
+}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 4eeb17198cfa..40270c1fa947 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -264,7 +264,7 @@ static bool __init efi_virtmap_init(void)
create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
- __pgprot(pgprot_val(prot) | PTE_NG));
+ __pgprot(pgprot_val(prot) | PTE_NG), true);
}
return true;
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0ea9a97467a6..6c5595928169 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -36,6 +36,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@@ -643,7 +644,8 @@ __secondary_switched:
msr vbar_el1, x5
isb
- ldr_l x0, secondary_data // get secondary_data.stack
+ adr_l x0, secondary_data
+ ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
mov sp, x0
and x0, x0, #~(THREAD_SIZE - 1)
msr sp_el0, x0 // save thread_info
@@ -652,6 +654,29 @@ __secondary_switched:
ENDPROC(__secondary_switched)
/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ *
+ * update_early_cpu_boot_status tmp, status
+ * - Corrupts tmp1, tmp2
+ * - Writes 'status' to __early_cpu_boot_status and makes sure
+ * it is committed to memory.
+ */
+
+ .macro update_early_cpu_boot_status status, tmp1, tmp2
+ mov \tmp2, #\status
+ str_l \tmp2, __early_cpu_boot_status, \tmp1
+ dmb sy
+ dc ivac, \tmp1 // Invalidate potentially stale cache line
+ .endm
+
+ .pushsection .data..cacheline_aligned
+ .align L1_CACHE_SHIFT
+ENTRY(__early_cpu_boot_status)
+ .long 0
+ .popsection
+
+/*
* Enable the MMU.
*
* x0 = SCTLR_EL1 value for turning on the MMU.
@@ -669,6 +694,7 @@ ENTRY(__enable_mmu)
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
+ update_early_cpu_boot_status 0, x1, x2
msr ttbr0_el1, x25 // load TTBR0
msr ttbr1_el1, x26 // load TTBR1
isb
@@ -708,8 +734,12 @@ ENTRY(__enable_mmu)
ENDPROC(__enable_mmu)
__no_granule_support:
+ /* Indicate that this CPU can't boot and is stuck in the kernel */
+ update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
+1:
wfe
- b __no_granule_support
+ wfi
+ b 1b
ENDPROC(__no_granule_support)
__primary_switch:
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 6dd18140ebb8..35a33d705536 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -27,6 +27,7 @@
#include <asm/barrier.h>
#include <asm/cacheflush.h>
#include <asm/irqflags.h>
+#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
@@ -100,7 +101,8 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
- return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+ return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
+ crash_is_nosave(pfn);
}
void notrace save_processor_state(void)
@@ -250,11 +252,17 @@ int swsusp_arch_suspend(void)
local_dbg_save(flags);
if (__cpu_suspend_enter(&state)) {
+ /* make the crash dump kernel image visible/saveable */
+ crash_prepare_suspend();
+
ret = swsusp_save();
} else {
/* Clean kernel to PoC for secondary core startup */
__flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+ /* make the crash dump kernel image protected again */
+ crash_post_resume();
+
/*
* Tell the hibernation core that we've just restored
* the memory
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 8727f4490772..d3b5f75e652e 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -71,8 +71,16 @@ el1_sync:
msr vbar_el2, x1
b 9f
+2: cmp x0, #HVC_SOFT_RESTART
+ b.ne 3f
+ mov x0, x2
+ mov x2, x4
+ mov x4, x1
+ mov x1, x3
+ br x4 // no return
+
/* Someone called kvm_call_hyp() against the hyp-stub... */
-2: mov x0, #ARM_EXCEPTION_HYP_GONE
+3: mov x0, #ARM_EXCEPTION_HYP_GONE
9: eret
ENDPROC(el1_sync)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 000000000000..481f54a866c5
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,364 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/page-flags.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+ const struct kimage *kimage)
+{
+ unsigned long i;
+
+ pr_debug("%s:%d:\n", func, line);
+ pr_debug(" kexec kimage info:\n");
+ pr_debug(" type: %d\n", kimage->type);
+ pr_debug(" start: %lx\n", kimage->start);
+ pr_debug(" head: %lx\n", kimage->head);
+ pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
+
+ for (i = 0; i < kimage->nr_segments; i++) {
+ pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+ i,
+ kimage->segment[i].mem,
+ kimage->segment[i].mem + kimage->segment[i].memsz,
+ kimage->segment[i].memsz,
+ kimage->segment[i].memsz / PAGE_SIZE);
+ }
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+ /* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
+ * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+ kexec_image_info(kimage);
+
+ if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
+ pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
+ */
+static void kexec_list_flush(struct kimage *kimage)
+{
+ kimage_entry_t *entry;
+
+ for (entry = &kimage->head; ; entry++) {
+ unsigned int flag;
+ void *addr;
+
+ /* flush the list entries. */
+ __flush_dcache_area(entry, sizeof(kimage_entry_t));
+
+ flag = *entry & IND_FLAGS;
+ if (flag == IND_DONE)
+ break;
+
+ addr = phys_to_virt(*entry & PAGE_MASK);
+
+ switch (flag) {
+ case IND_INDIRECTION:
+ /* Set entry point just before the new list page. */
+ entry = (kimage_entry_t *)addr - 1;
+ break;
+ case IND_SOURCE:
+ /* flush the source pages. */
+ __flush_dcache_area(addr, PAGE_SIZE);
+ break;
+ case IND_DESTINATION:
+ break;
+ default:
+ BUG();
+ }
+ }
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+ unsigned long i;
+
+ pr_debug("%s:\n", __func__);
+
+ for (i = 0; i < kimage->nr_segments; i++) {
+ pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+ i,
+ kimage->segment[i].mem,
+ kimage->segment[i].mem + kimage->segment[i].memsz,
+ kimage->segment[i].memsz,
+ kimage->segment[i].memsz / PAGE_SIZE);
+
+ __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+ kimage->segment[i].memsz);
+ }
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+ phys_addr_t reboot_code_buffer_phys;
+ void *reboot_code_buffer;
+ bool in_kexec_crash = (kimage == kexec_crash_image);
+ bool stuck_cpus = cpus_are_stuck_in_kernel();
+
+ /*
+ * New cpus may have become stuck_in_kernel after we loaded the image.
+ */
+ BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
+ WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
+ "Some CPUs may be stale, kdump will be unreliable.\n");
+
+ reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+ reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+ kexec_image_info(kimage);
+
+ pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
+ kimage->control_code_page);
+ pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
+ &reboot_code_buffer_phys);
+ pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
+ reboot_code_buffer);
+ pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
+ arm64_relocate_new_kernel);
+ pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
+ __func__, __LINE__, arm64_relocate_new_kernel_size,
+ arm64_relocate_new_kernel_size);
+
+ /*
+ * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+ * after the kernel is shut down.
+ */
+ memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+ arm64_relocate_new_kernel_size);
+
+ /* Flush the reboot_code_buffer in preparation for its execution. */
+ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+ flush_icache_range((uintptr_t)reboot_code_buffer,
+ arm64_relocate_new_kernel_size);
+
+ /* Flush the kimage list and its buffers. */
+ kexec_list_flush(kimage);
+
+ /* Flush the new image if already in place. */
+ if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
+ kexec_segment_flush(kimage);
+
+ pr_info("Bye!\n");
+
+ /* Disable all DAIF exceptions. */
+ asm volatile ("msr daifset, #0xf" : : : "memory");
+
+ /*
+ * cpu_soft_restart will shutdown the MMU, disable data caches, then
+ * transfer control to the reboot_code_buffer which contains a copy of
+ * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
+ * uses physical addressing to relocate the new image to its final
+ * position and transfers control to the image entry point when the
+ * relocation is complete.
+ */
+
+ cpu_soft_restart(kimage != kexec_crash_image,
+ reboot_code_buffer_phys, kimage->head, kimage->start, 0);
+
+ BUG(); /* Should never get here. */
+}
+
+static void machine_kexec_mask_interrupts(void)
+{
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+ int ret;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ /*
+ * First try to remove the active state. If this
+ * fails, try to EOI the interrupt.
+ */
+ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+ if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+ chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-crashing cpus and save registers
+ */
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ local_irq_disable();
+
+ /* shutdown non-crashing cpus */
+ smp_send_crash_stop();
+
+ /* for crashing cpu */
+ crash_save_cpu(regs, smp_processor_id());
+ machine_kexec_mask_interrupts();
+
+ pr_info("Starting crashdump kernel...\n");
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+ int i;
+
+ kexec_segment_flush(kexec_crash_image);
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+ int i;
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
+}
+
+#ifdef CONFIG_HIBERNATION
+/*
+ * To preserve the crash dump kernel image, the relevant memory segments
+ * should be mapped again around the hibernation.
+ */
+void crash_prepare_suspend(void)
+{
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+}
+
+void crash_post_resume(void)
+{
+ if (kexec_crash_image)
+ arch_kexec_protect_crashkres();
+}
+
+/*
+ * crash_is_nosave
+ *
+ * Return true only if a page is part of reserved memory for crash dump kernel,
+ * but does not hold any data of loaded kernel image.
+ *
+ * Note that all the pages in crash dump kernel memory have been initially
+ * marked as Reserved in kexec_reserve_crashkres_pages().
+ *
+ * In hibernation, the pages which are Reserved and yet "nosave" are excluded
+ * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * dump kernel and will reduce the total size of hibernation image.
+ */
+
+bool crash_is_nosave(unsigned long pfn)
+{
+ int i;
+ phys_addr_t addr;
+
+ if (!crashk_res.end)
+ return false;
+
+ /* in reserved memory? */
+ addr = __pfn_to_phys(pfn);
+ if ((addr < crashk_res.start) || (crashk_res.end < addr))
+ return false;
+
+ if (!kexec_crash_image)
+ return true;
+
+ /* not part of loaded kernel image? */
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ if (addr >= kexec_crash_image->segment[i].mem &&
+ addr < (kexec_crash_image->segment[i].mem +
+ kexec_crash_image->segment[i].memsz))
+ return false;
+
+ return true;
+}
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ page = phys_to_page(addr);
+ ClearPageReserved(page);
+ free_reserved_page(page);
+ }
+}
+#endif /* CONFIG_HIBERNATION */
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ VMCOREINFO_NUMBER(VA_BITS);
+ /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+ vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
+ kimage_voffset);
+ vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
+ PHYS_OFFSET);
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..51b73cdde287
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,130 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location. To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+ENTRY(arm64_relocate_new_kernel)
+
+ /* Setup the list loop variables. */
+ mov x17, x1 /* x17 = kimage_start */
+ mov x16, x0 /* x16 = kimage_head */
+ dcache_line_size x15, x0 /* x15 = dcache line size */
+ mov x14, xzr /* x14 = entry ptr */
+ mov x13, xzr /* x13 = copy dest */
+
+ /* Clear the sctlr_el2 flags. */
+ mrs x0, CurrentEL
+ cmp x0, #CurrentEL_EL2
+ b.ne 1f
+ mrs x0, sctlr_el2
+ ldr x1, =SCTLR_ELx_FLAGS
+ bic x0, x0, x1
+ msr sctlr_el2, x0
+ isb
+1:
+
+ /* Check if the new image needs relocation. */
+ tbnz x16, IND_DONE_BIT, .Ldone
+
+.Lloop:
+ and x12, x16, PAGE_MASK /* x12 = addr */
+
+ /* Test the entry flags. */
+.Ltest_source:
+ tbz x16, IND_SOURCE_BIT, .Ltest_indirection
+
+ /* Invalidate dest page to PoC. */
+ mov x0, x13
+ add x20, x0, #PAGE_SIZE
+ sub x1, x15, #1
+ bic x0, x0, x1
+2: dc ivac, x0
+ add x0, x0, x15
+ cmp x0, x20
+ b.lo 2b
+ dsb sy
+
+ mov x20, x13
+ mov x21, x12
+ copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
+
+ /* dest += PAGE_SIZE */
+ add x13, x13, PAGE_SIZE
+ b .Lnext
+
+.Ltest_indirection:
+ tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
+
+ /* ptr = addr */
+ mov x14, x12
+ b .Lnext
+
+.Ltest_destination:
+ tbz x16, IND_DESTINATION_BIT, .Lnext
+
+ /* dest = addr */
+ mov x13, x12
+
+.Lnext:
+ /* entry = *ptr++ */
+ ldr x16, [x14], #8
+
+ /* while (!(entry & DONE)) */
+ tbz x16, IND_DONE_BIT, .Lloop
+
+.Ldone:
+ /* wait for writes from copy_page to finish */
+ dsb nsh
+ ic iallu
+ dsb nsh
+ isb
+
+ /* Start new image. */
+ mov x0, xzr
+ mov x1, xzr
+ mov x2, xzr
+ mov x3, xzr
+ br x17
+
+ENDPROC(arm64_relocate_new_kernel)
+
+.ltorg
+
+.align 3 /* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+ .quad .Lcopy_end - arm64_relocate_new_kernel
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 1e33d967c0ae..566f3b9b1588 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
#include <linux/screen_info.h>
#include <linux/init.h>
#include <linux/kexec.h>
-#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
@@ -220,6 +219,12 @@ static void __init request_standard_resources(void)
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, &kernel_data);
+#ifdef CONFIG_KEXEC_CORE
+ /* Userspace will find "Crash kernel" region in /proc/iomem. */
+ if (crashk_res.end && crashk_res.start >= res->start &&
+ crashk_res.end <= res->end)
+ request_resource(res, &crashk_res);
+#endif
}
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index a84623d91410..a1d06fc42048 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@
#include <linux/completion.h>
#include <linux/of.h>
#include <linux/irq_work.h>
+#include <linux/kexec.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -63,16 +64,29 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
+/* Number of CPUs which aren't online, but looping in kernel text. */
+int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
+ IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
IPI_WAKEUP
};
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_kill(unsigned int cpu);
+#else
+static inline int op_cpu_kill(unsigned int cpu)
+{
+ return -ENOSYS;
+}
+#endif
+
+
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
@@ -90,12 +104,14 @@ static DECLARE_COMPLETION(cpu_running);
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
+ long status;
/*
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+ update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
/*
@@ -119,6 +135,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
secondary_data.stack = NULL;
+ status = READ_ONCE(secondary_data.status);
+ if (ret && status) {
+
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
+
+ switch (status) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
+ break;
+ }
+ /* Fall through */
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
+ }
+ }
return ret;
}
@@ -184,6 +226,9 @@ asmlinkage void secondary_start_kernel(void)
*/
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
+ update_cpu_boot_status(CPU_BOOT_SUCCESS);
+ /* Make sure the status update is visible before we complete */
+ smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -311,6 +356,30 @@ void cpu_die(void)
}
#endif
+/*
+ * Kill the calling secondary CPU, early in bringup before it is turned
+ * online.
+ */
+void cpu_die_early(void)
+{
+ int cpu = smp_processor_id();
+
+ pr_crit("CPU%d: will not boot\n", cpu);
+
+ /* Mark this CPU absent */
+ set_cpu_present(cpu, 0);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ update_cpu_boot_status(CPU_KILL_ME);
+ /* Check if we can park ourselves */
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+#endif
+ update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
+
+ cpu_park_loop();
+}
+
static void __init hyp_mode_check(void)
{
if (is_hyp_mode_available())
@@ -634,6 +703,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
S(IPI_CALL_FUNC, "Function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
+ S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
S(IPI_WAKEUP, "CPU wake-up interrupts"),
@@ -718,6 +788,29 @@ static void ipi_cpu_stop(unsigned int cpu)
cpu_relax();
}
+#ifdef CONFIG_KEXEC_CORE
+static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
+#endif
+
+static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+#ifdef CONFIG_KEXEC_CORE
+ crash_save_cpu(regs, cpu);
+
+ atomic_dec(&waiting_for_crash_ipi);
+
+ local_irq_disable();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+#endif
+
+ /* just in case */
+ cpu_park_loop();
+#endif
+}
+
/*
* Main handler for inter-processor interrupts
*/
@@ -748,6 +841,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
irq_exit();
break;
+ case IPI_CPU_CRASH_STOP:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ irq_enter();
+ ipi_cpu_crash_stop(cpu, regs);
+
+ unreachable();
+ }
+ break;
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
irq_enter();
@@ -816,6 +918,39 @@ void smp_send_stop(void)
pr_warning("SMP: failed to stop secondary CPUs\n");
}
+#ifdef CONFIG_KEXEC_CORE
+void smp_send_crash_stop(void)
+{
+ cpumask_t mask;
+ unsigned long timeout;
+
+ if (num_online_cpus() == 1)
+ return;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+
+ pr_crit("SMP: stopping secondary CPUs\n");
+ smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
+
+ /* Wait up to one second for other CPUs to stop */
+ timeout = USEC_PER_SEC;
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
+ udelay(1);
+
+ if (atomic_read(&waiting_for_crash_ipi) > 0)
+ pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+}
+
+bool smp_crash_stop_failed(void)
+{
+ return (atomic_read(&waiting_for_crash_ipi) > 0);
+}
+#endif
+
/*
* not supported here
*/
@@ -823,3 +958,21 @@ int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
+
+static bool have_cpu_die(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int any_cpu = raw_smp_processor_id();
+
+ if (cpu_ops[any_cpu]->cpu_die)
+ return true;
+#endif
+ return false;
+}
+
+bool cpus_are_stuck_in_kernel(void)
+{
+ bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
+
+ return !!cpus_stuck_in_kernel || smp_spin_tables;
+}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 75728047b60b..b2adff43bdaa 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -29,11 +29,14 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/sort.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
#include <linux/swiotlb.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -75,6 +78,142 @@ static int __init early_initrd(char *p)
early_param("initrd", early_initrd);
#endif
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long crash_base, crash_size;
+ int ret;
+
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
+ /* no crashkernel= or invalid value specified */
+ if (ret || !crash_size)
+ return;
+
+ crash_size = PAGE_ALIGN(crash_size);
+
+ if (crash_base == 0) {
+ /* Current arm64 boot protocol requires 2MB alignment */
+ crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+ crash_size, SZ_2M);
+ if (crash_base == 0) {
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+ crash_size);
+ return;
+ }
+ } else {
+ /* User specifies base address explicitly. */
+ if (!memblock_is_region_memory(crash_base, crash_size)) {
+ pr_warn("cannot reserve crashkernel: region is not memory\n");
+ return;
+ }
+
+ if (memblock_is_region_reserved(crash_base, crash_size)) {
+ pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
+ return;
+ }
+
+ if (!IS_ALIGNED(crash_base, SZ_2M)) {
+ pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
+ return;
+ }
+ }
+ memblock_reserve(crash_base, crash_size);
+
+ pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+ crash_base, crash_base + crash_size, crash_size >> 20);
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+}
+
+static void __init kexec_reserve_crashkres_pages(void)
+{
+#ifdef CONFIG_HIBERNATION
+ phys_addr_t addr;
+ struct page *page;
+
+ if (!crashk_res.end)
+ return;
+
+ /*
+ * To reduce the size of hibernation image, all the pages are
+ * marked as Reserved initially.
+ */
+ for (addr = crashk_res.start; addr < (crashk_res.end + 1);
+ addr += PAGE_SIZE) {
+ page = phys_to_page(addr);
+ SetPageReserved(page);
+ }
+#endif
+}
+#else
+static void __init reserve_crashkernel(void)
+{
+}
+
+static void __init kexec_reserve_crashkres_pages(void)
+{
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_DUMP
+static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ const __be32 *reg;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "chosen") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
+ if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+ return 1;
+
+ elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, &reg);
+ elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+ return 1;
+}
+
+/*
+ * reserve_elfcorehdr() - reserves memory for elf core header
+ *
+ * This function reserves the memory occupied by an elf core header
+ * described in the device tree. This region contains all the
+ * information about primary kernel's core image and is used by a dump
+ * capture kernel to access the system memory on primary kernel.
+ */
+static void __init reserve_elfcorehdr(void)
+{
+ of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL);
+
+ if (!elfcorehdr_size)
+ return;
+
+ if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+ pr_warn("elfcorehdr is overlapped\n");
+ return;
+ }
+
+ memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+ pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+ elfcorehdr_size >> 10, elfcorehdr_addr);
+}
+#else
+static void __init reserve_elfcorehdr(void)
+{
+}
+#endif /* CONFIG_CRASH_DUMP */
/*
* Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
* currently assumes that for memory starting above 4G, 32-bit devices will
@@ -166,10 +305,45 @@ static int __init early_mem(char *p)
}
early_param("mem", early_mem);
+static int __init early_init_dt_scan_usablemem(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ struct memblock_region *usablemem = data;
+ const __be32 *reg;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "chosen") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
+ if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+ return 1;
+
+ usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+ usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+ return 1;
+}
+
+static void __init fdt_enforce_memory_region(void)
+{
+ struct memblock_region reg = {
+ .size = 0,
+ };
+
+ of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
+
+ if (reg.size)
+ memblock_cap_memory_range(reg.base, reg.size);
+}
+
void __init arm64_memblock_init(void)
{
const s64 linear_region_size = -(s64)PAGE_OFFSET;
+ /* Handle linux,usable-memory-range property */
+ fdt_enforce_memory_region();
+
/*
* Ensure that the linear region takes up exactly half of the kernel
* virtual address space. This way, we can distinguish a linear address
@@ -242,6 +416,11 @@ void __init arm64_memblock_init(void)
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
+
+ reserve_crashkernel();
+
+ reserve_elfcorehdr();
+
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@@ -355,6 +534,8 @@ void __init mem_init(void)
/* this will put all unused low memory onto the freelists */
free_all_bootmem();
+ kexec_reserve_crashkres_pages();
+
mem_init_print_info(NULL);
#define MLK(b, t) b, t, ((t) - (b)) >> 10
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8fc302d84e1f..d23342804230 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -21,6 +21,8 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kexec.h>
#include <linux/libfdt.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
@@ -156,29 +158,10 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
} while (pmd++, i++, i < PTRS_PER_PMD);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
-{
-
- /*
- * If debug_page_alloc is enabled we must map the linear map
- * using pages. However, other mappings created by
- * create_mapping_noalloc must use sections in some cases. Allow
- * sections to be used in those cases, where no pgtable_alloc
- * function is provided.
- */
- return !pgtable_alloc || !debug_pagealloc_enabled();
-}
-#else
-static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
-{
- return true;
-}
-#endif
-
static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void))
+ phys_addr_t (*pgtable_alloc)(void),
+ bool allow_block_mappings)
{
pmd_t *pmd;
unsigned long next;
@@ -209,7 +192,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end);
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
- block_mappings_allowed(pgtable_alloc)) {
+ (!pgtable_alloc || allow_block_mappings)) {
pmd_t old_pmd =*pmd;
pmd_set_huge(pmd, phys, prot);
/*
@@ -248,7 +231,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void))
+ phys_addr_t (*pgtable_alloc)(void),
+ bool allow_block_mappings)
{
pud_t *pud;
unsigned long next;
@@ -269,7 +253,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
* For 4K granule only, attempt to put down a 1GB block
*/
if (use_1G_block(addr, next, phys) &&
- block_mappings_allowed(pgtable_alloc)) {
+ (!pgtable_alloc || allow_block_mappings)) {
pud_t old_pud = *pud;
pud_set_huge(pud, phys, prot);
@@ -290,7 +274,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
}
} else {
alloc_init_pmd(pud, addr, next, phys, prot,
- pgtable_alloc);
+ pgtable_alloc, allow_block_mappings);
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
@@ -304,7 +288,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
*/
static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void))
+ phys_addr_t (*pgtable_alloc)(void),
+ bool allow_block_mappings)
{
unsigned long addr, length, end, next;
@@ -322,7 +307,8 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
+ alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+ (!pgtable_alloc || allow_block_mappings));
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
@@ -340,9 +326,11 @@ static phys_addr_t late_pgtable_alloc(void)
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
- phys_addr_t (*alloc)(void))
+ phys_addr_t (*alloc)(void),
+ bool allow_block_mappings)
{
- init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
+ init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc,
+ allow_block_mappings);
}
/*
@@ -358,16 +346,15 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
- NULL);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
- pgprot_t prot)
+ pgprot_t prot, bool allow_block_mappings)
{
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- late_pgtable_alloc);
+ late_pgtable_alloc, allow_block_mappings);
}
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -380,41 +367,49 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
}
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
- late_pgtable_alloc);
+ late_pgtable_alloc, !debug_pagealloc_enabled());
+}
+
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+ phys_addr_t end, pgprot_t prot,
+ bool allow_block_mappings)
+{
+ __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+ prot, early_pgtable_alloc, allow_block_mappings);
}
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
+static void __init map_mem(pgd_t *pgd)
{
unsigned long kernel_start = __pa(_text);
unsigned long kernel_end = __pa(_etext);
+ struct memblock_region *reg;
/*
* Take care not to create a writable alias for the
* read-only text and rodata sections of the kernel image.
+ * So temporarily mark them as NOMAP to skip mappings in
+ * the following for-loop
*/
+ memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+ if (crashk_res.end)
+ memblock_mark_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+#endif
- /* No overlap with the kernel text */
- if (end < kernel_start || start >= kernel_end) {
- __create_pgd_mapping(pgd, start, __phys_to_virt(start),
- end - start, PAGE_KERNEL,
- early_pgtable_alloc);
- return;
- }
+ /* map all the memory banks */
+ for_each_memblock(memory, reg) {
+ phys_addr_t start = reg->base;
+ phys_addr_t end = start + reg->size;
- /*
- * This block overlaps the kernel text mapping.
- * Map the portion(s) which don't overlap.
- */
- if (start < kernel_start)
- __create_pgd_mapping(pgd, start,
- __phys_to_virt(start),
- kernel_start - start, PAGE_KERNEL,
- early_pgtable_alloc);
- if (kernel_end < end)
- __create_pgd_mapping(pgd, kernel_end,
- __phys_to_virt(kernel_end),
- end - kernel_end, PAGE_KERNEL,
- early_pgtable_alloc);
+ if (start >= end)
+ break;
+ if (memblock_is_nomap(reg))
+ continue;
+
+ __map_memblock(pgd, start, end,
+ PAGE_KERNEL, !debug_pagealloc_enabled());
+ }
/*
* Map the linear alias of the [_text, _etext) interval as
@@ -422,25 +417,23 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
* region accessible to subsystems such as hibernate, but
* protects it from inadvertent modification or execution.
*/
- __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
- kernel_end - kernel_start, PAGE_KERNEL_RO,
- early_pgtable_alloc);
-}
-
-static void __init map_mem(pgd_t *pgd)
-{
- struct memblock_region *reg;
+ __map_memblock(pgd, kernel_start, kernel_end,
+ PAGE_KERNEL_RO, !debug_pagealloc_enabled());
+ memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
- /* map all the memory banks */
- for_each_memblock(memory, reg) {
- phys_addr_t start = reg->base;
- phys_addr_t end = start + reg->size;
-
- if (start >= end)
- break;
-
- __map_memblock(pgd, start, end);
+#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use page-level mappings here so that we can shrink the region
+ * in page granularity and put back unused memory to buddy system
+ * through /sys/kernel/kexec_crash_size interface.
+ */
+ if (crashk_res.end) {
+ __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+ PAGE_KERNEL, false);
+ memblock_clear_nomap(crashk_res.start,
+ resource_size(&crashk_res));
}
+#endif
}
void mark_rodata_ro(void)
@@ -479,7 +472,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(size));
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
- early_pgtable_alloc);
+ early_pgtable_alloc, !debug_pagealloc_enabled());
vma->addr = va_start;
vma->phys_addr = pa_start;
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index ca6d268e3313..f4e39dbdd36b 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -125,6 +125,19 @@ int set_memory_x(unsigned long addr, int numpages)
}
EXPORT_SYMBOL_GPL(set_memory_x);
+
+int set_memory_valid(unsigned long addr, int numpages, int enable)
+{
+ if (enable)
+ return __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(PTE_VALID),
+ __pgprot(0));
+ else
+ return __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(0),
+ __pgprot(PTE_VALID));
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index b1c22cf18f7d..9ee9973b5c3e 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -16,6 +16,22 @@
#include "efistub.h"
+#define EFI_DT_ADDR_CELLS_DEFAULT 2
+#define EFI_DT_SIZE_CELLS_DEFAULT 2
+
+static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt)
+{
+ int offset;
+
+ offset = fdt_path_offset(fdt, "/");
+ /* Set the #address-cells and #size-cells values for an empty tree */
+
+ fdt_setprop_u32(fdt, offset, "#address-cells",
+ EFI_DT_ADDR_CELLS_DEFAULT);
+
+ fdt_setprop_u32(fdt, offset, "#size-cells", EFI_DT_SIZE_CELLS_DEFAULT);
+}
+
efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
unsigned long orig_fdt_size,
void *fdt, int new_fdt_size, char *cmdline_ptr,
@@ -45,10 +61,18 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
}
}
- if (orig_fdt)
+ if (orig_fdt) {
status = fdt_open_into(orig_fdt, fdt, new_fdt_size);
- else
+ } else {
status = fdt_create_empty_tree(fdt, new_fdt_size);
+ if (status == 0) {
+ /*
+ * Any failure from the following function is non
+ * critical
+ */
+ fdt_update_cell_size(sys_table, fdt);
+ }
+ }
if (status != 0)
goto fdt_set_fail;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 76b502c6258f..e3d461f19cbc 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -25,6 +25,7 @@ enum {
MEMBLOCK_NONE = 0x0, /* No special request */
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
+ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
};
struct memblock_region {
@@ -82,6 +83,8 @@ bool memblock_overlaps_region(struct memblock_type *type,
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
+int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
+int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
ulong choose_memblock_flags(void);
/* Low level functions */
@@ -184,6 +187,11 @@ static inline bool memblock_is_mirror(struct memblock_region *m)
return m->flags & MEMBLOCK_MIRROR;
}
+static inline bool memblock_is_nomap(struct memblock_region *m)
+{
+ return m->flags & MEMBLOCK_NOMAP;
+}
+
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
unsigned long *end_pfn);
@@ -318,9 +326,12 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
-int memblock_is_memory(phys_addr_t addr);
+void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
+void memblock_mem_limit_remove_map(phys_addr_t limit);
+bool memblock_is_memory(phys_addr_t addr);
+int memblock_is_map_memory(phys_addr_t addr);
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
-int memblock_is_reserved(phys_addr_t addr);
+bool memblock_is_reserved(phys_addr_t addr);
bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
extern void __memblock_dump_all(void);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0ffa01c90d9..b93d147439f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2148,14 +2148,18 @@ kernel_map_pages(struct page *page, int numpages, int enable)
}
#ifdef CONFIG_HIBERNATION
extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
-#else
+#endif /* CONFIG_HIBERNATION */
+#else /* CONFIG_DEBUG_PAGEALLOC */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
#ifdef CONFIG_HIBERNATION
static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
-#endif
+#endif /* CONFIG_HIBERNATION */
+static inline bool debug_pagealloc_enabled(void)
+{
+ return false;
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e501b88..aae5ebf2022b 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
#define KEXEC_ARCH_SH (42 << 16)
#define KEXEC_ARCH_MIPS_LE (10 << 16)
#define KEXEC_ARCH_MIPS ( 8 << 16)
+#define KEXEC_ARCH_AARCH64 (183 << 16)
/* The artificial cap on the number of segments passed to kexec_load. */
#define KEXEC_SEGMENT_MAX 16
diff --git a/mm/memblock.c b/mm/memblock.c
index f8fab45bfdb7..843c7d15fe53 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -822,6 +822,29 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
}
+/**
+ * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * Return 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
+{
+ return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
+}
+
+/**
+ * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * Return 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
+{
+ return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
+}
/**
* __next_reserved_mem_region - next function for for_each_reserved_region()
@@ -913,6 +936,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
continue;
+ /* skip nomap memory unless we were asked for it explicitly */
+ if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ continue;
+
if (!type_b) {
if (out_start)
*out_start = m_start;
@@ -1022,6 +1049,10 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
continue;
+ /* skip nomap memory unless we were asked for it explicitly */
+ if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ continue;
+
if (!type_b) {
if (out_start)
*out_start = m_start;
@@ -1467,15 +1498,16 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
}
-void __init memblock_enforce_memory_limit(phys_addr_t limit)
+static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
{
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
struct memblock_region *r;
- if (!limit)
- return;
-
- /* find out max address */
+ /*
+ * translate the memory @limit size into the max address within one of
+ * the memory memblock regions, if the @limit exceeds the total size
+ * of those regions, max_addr will keep original value ULLONG_MAX
+ */
for_each_memblock(memory, r) {
if (limit <= r->size) {
max_addr = r->base + limit;
@@ -1484,6 +1516,22 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
limit -= r->size;
}
+ return max_addr;
+}
+
+void __init memblock_enforce_memory_limit(phys_addr_t limit)
+{
+ phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
+
+ if (!limit)
+ return;
+
+ max_addr = __find_max_addr(limit);
+
+ /* @limit exceeds the total size of the memory, do nothing */
+ if (max_addr == (phys_addr_t)ULLONG_MAX)
+ return;
+
/* truncate both memory and reserved regions */
memblock_remove_range(&memblock.memory, max_addr,
(phys_addr_t)ULLONG_MAX);
@@ -1491,6 +1539,50 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
(phys_addr_t)ULLONG_MAX);
}
+void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
+{
+ int start_rgn, end_rgn;
+ int i, ret;
+
+ if (!size)
+ return;
+
+ ret = memblock_isolate_range(&memblock.memory, base, size,
+ &start_rgn, &end_rgn);
+ if (ret)
+ return;
+
+ /* remove all the MAP regions */
+ for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
+ if (!memblock_is_nomap(&memblock.memory.regions[i]))
+ memblock_remove_region(&memblock.memory, i);
+
+ for (i = start_rgn - 1; i >= 0; i--)
+ if (!memblock_is_nomap(&memblock.memory.regions[i]))
+ memblock_remove_region(&memblock.memory, i);
+
+ /* truncate the reserved regions */
+ memblock_remove_range(&memblock.reserved, 0, base);
+ memblock_remove_range(&memblock.reserved,
+ base + size, (phys_addr_t)ULLONG_MAX);
+}
+
+void __init memblock_mem_limit_remove_map(phys_addr_t limit)
+{
+ phys_addr_t max_addr;
+
+ if (!limit)
+ return;
+
+ max_addr = __find_max_addr(limit);
+
+ /* @limit exceeds the total size of the memory, do nothing */
+ if (max_addr == (phys_addr_t)ULLONG_MAX)
+ return;
+
+ memblock_cap_memory_range(0, max_addr);
+}
+
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
{
unsigned int left = 0, right = type->cnt;
@@ -1509,16 +1601,25 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
return -1;
}
-int __init memblock_is_reserved(phys_addr_t addr)
+bool __init memblock_is_reserved(phys_addr_t addr)
{
return memblock_search(&memblock.reserved, addr) != -1;
}
-int __init_memblock memblock_is_memory(phys_addr_t addr)
+bool __init_memblock memblock_is_memory(phys_addr_t addr)
{
return memblock_search(&memblock.memory, addr) != -1;
}
+int __init_memblock memblock_is_map_memory(phys_addr_t addr)
+{
+ int i = memblock_search(&memblock.memory, addr);
+
+ if (i == -1)
+ return false;
+ return !memblock_is_nomap(&memblock.memory.regions[i]);
+}
+
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
unsigned long *start_pfn, unsigned long *end_pfn)
diff --git a/mm/slab.c b/mm/slab.c
index 24a615d42d74..462938fc7cb9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1670,6 +1670,14 @@ static void kmem_rcu_free(struct rcu_head *head)
}
#if DEBUG
+static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
+{
+ if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
+ (cachep->size % PAGE_SIZE) == 0)
+ return true;
+
+ return false;
+}
#ifdef CONFIG_DEBUG_PAGEALLOC
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
@@ -1703,6 +1711,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
}
*addr++ = 0x87654321;
}
+
+static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
+ int map, unsigned long caller)
+{
+ if (!is_debug_pagealloc_cache(cachep))
+ return;
+
+ if (caller)
+ store_stackinfo(cachep, objp, caller);
+
+ kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
+}
+
+#else
+static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
+ int map, unsigned long caller) {}
+
#endif
static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
@@ -1781,6 +1806,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
int size, i;
int lines = 0;
+ if (is_debug_pagealloc_cache(cachep))
+ return;
+
realobj = (char *)objp + obj_offset(cachep);
size = cachep->object_size;
@@ -1846,16 +1874,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
void *objp = index_to_obj(cachep, page, i);
if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (cachep->size % PAGE_SIZE == 0 &&
- OFF_SLAB(cachep))
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 1);
- else
- check_poison_obj(cachep, objp);
-#else
check_poison_obj(cachep, objp);
-#endif
+ slab_kernel_map(cachep, objp, 1, 0);
}
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
@@ -2179,7 +2199,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
else
size += BYTES_PER_WORD;
}
-#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
/*
* To activate debug pagealloc, off-slab management is necessary
* requirement. In early phase of initialization, small sized slab
@@ -2187,14 +2206,14 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
- if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
+ if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
+ !slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
#endif
-#endif
/*
* Determine if the slab management is 'on' or 'off' slab.
@@ -2237,15 +2256,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
freelist_size = calculate_freelist_size(cachep->num, 0);
-
-#ifdef CONFIG_PAGE_POISONING
- /* If we're going to use the generic kernel_map_pages()
- * poisoning, then it's going to smash the contents of
- * the redzone and userword anyhow, so switch them off.
- */
- if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
- flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
-#endif
}
cachep->colour_off = cache_line_size();
@@ -2261,7 +2271,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
cachep->size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);
- if (flags & CFLGS_OFF_SLAB) {
+#if DEBUG
+ /*
+ * If we're going to use the generic kernel_map_pages()
+ * poisoning, then it's going to smash the contents of
+ * the redzone and userword anyhow, so switch them off.
+ */
+ if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
+ (cachep->flags & SLAB_POISON) &&
+ is_debug_pagealloc_cache(cachep))
+ cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
+#endif
+
+ if (OFF_SLAB(cachep)) {
cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
/*
* This is a possibility for one of the kmalloc_{dma,}_caches.
@@ -2488,9 +2510,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
#if DEBUG
- /* need to poison the objs? */
- if (cachep->flags & SLAB_POISON)
- poison_obj(cachep, objp, POISON_FREE);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
@@ -2514,10 +2533,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
- if ((cachep->size % PAGE_SIZE) == 0 &&
- OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 0);
+ /* need to poison the objs? */
+ if (cachep->flags & SLAB_POISON) {
+ poison_obj(cachep, objp, POISON_FREE);
+ slab_kernel_map(cachep, objp, 0, 0);
+ }
#else
if (cachep->ctor)
cachep->ctor(objp);
@@ -2736,17 +2756,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
- store_stackinfo(cachep, objp, caller);
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 0);
- } else {
- poison_obj(cachep, objp, POISON_FREE);
- }
-#else
poison_obj(cachep, objp, POISON_FREE);
-#endif
+ slab_kernel_map(cachep, objp, 0, caller);
}
return objp;
}
@@ -2873,15 +2884,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
- kernel_map_pages(virt_to_page(objp),
- cachep->size / PAGE_SIZE, 1);
- else
- check_poison_obj(cachep, objp);
-#else
check_poison_obj(cachep, objp);
-#endif
+ slab_kernel_map(cachep, objp, 1, 0);
poison_obj(cachep, objp, POISON_INUSE);
}
if (cachep->flags & SLAB_STORE_USER)