diff options
Diffstat (limited to 'arch/arm64')
116 files changed, 5261 insertions, 2595 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1d5e13f7a298..a11b5550bd68 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,7 +23,7 @@ config ARM64 select DCACHE_WORD_ACCESS select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IOMAP @@ -37,9 +37,13 @@ config ARM64 select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB + select HAVE_ARCH_MMAP_RND_BITS + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -88,9 +92,40 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 + +# max bits determined by the following formula: +# VA_BITS - PAGE_SHIFT - 3 +config ARCH_MMAP_RND_BITS_MAX + default 19 if ARM64_VA_BITS=36 + default 24 if ARM64_VA_BITS=39 + default 27 if ARM64_VA_BITS=42 + default 30 if ARM64_VA_BITS=47 + default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES + default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES + default 33 if ARM64_VA_BITS=48 + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 7 if ARM64_64K_PAGES + default 9 if ARM64_16K_PAGES + default 11 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config NO_IOPORT_MAP def_bool y if !PCI +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config STACKTRACE_SUPPORT def_bool y @@ -131,6 +166,9 @@ config NEED_DMA_MAP_STATE config NEED_SG_DMA_LENGTH def_bool y +config SMP + def_bool y + config SWIOTLB def_bool y @@ -143,6 +181,13 @@ config KERNEL_MODE_NEON config FIX_EARLYCON_MEM def_bool y +config PGTABLE_LEVELS + int + default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 + default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 + default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 + default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -379,34 +424,13 @@ config ARM64_VA_BITS default 42 if ARM64_VA_BITS_42 default 48 if ARM64_VA_BITS_48 -config ARM64_PGTABLE_LEVELS - int - default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 - default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 - default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 - default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 - config CPU_BIG_ENDIAN bool "Build big-endian kernel" help Say Y if you plan on running a kernel in big-endian mode. -config SMP - bool "Symmetric Multi-Processing" - help - This enables support for systems with more than one CPU. If - you say N here, the kernel will run on single and - multiprocessor machines, but will use only one CPU of a - multiprocessor machine. If you say Y here, the kernel will run - on many, but not all, single processor machines. On a single - processor machine, the kernel will run faster if you say N - here. - - If you don't know what to do here, say N. - config SCHED_MC bool "Multi-core scheduler support" - depends on SMP help Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly @@ -414,7 +438,6 @@ config SCHED_MC config SCHED_SMT bool "SMT scheduler support" - depends on SMP help Improves the CPU scheduler's decision making when dealing with MultiThreading at a cost of slightly increased overhead in some @@ -422,14 +445,11 @@ config SCHED_SMT config NR_CPUS int "Maximum number of CPUs (2-64)" - range 2 64 - depends on SMP # These have to remain sorted largest to smallest default "64" config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" - depends on SMP help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -481,6 +501,19 @@ config ARCH_HAS_CACHE_LINE_SIZE source "mm/Kconfig" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + config XEN_DOM0 def_bool y depends on XEN @@ -497,20 +530,6 @@ config FORCE_MAX_ZONEORDER default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "11" -config ARM64_PAN - bool "Enable support for Privileged Access Never (PAN)" - default y - help - Privileged Access Never (PAN; part of the ARMv8.1 Extensions) - prevents the kernel or hypervisor from accessing user-space (EL0) - memory directly. - - Choosing this option will cause any unprotected (not using - copy_to_user et al) memory access to fail with a permission fault. - - The feature is detected at runtime, and will remain as a 'nop' - instruction if the cpu does not implement the feature. - menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT @@ -576,8 +595,56 @@ config SETEND_EMULATION be unexpected results in the applications. If unsure, say Y + endif +config ARM64_SW_TTBR0_PAN + bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + help + Enabling this option prevents the kernel from accessing + user-space memory directly by pointing TTBR0_EL1 to a reserved + zeroed area and reserved ASID. The user access routines + restore the valid TTBR0_EL1 temporarily. + +menu "ARMv8.1 architectural features" + +config ARM64_PAN + bool "Enable support for Privileged Access Never (PAN)" + default y + help + Privileged Access Never (PAN; part of the ARMv8.1 Extensions) + prevents the kernel or hypervisor from accessing user-space (EL0) + memory directly. + + Choosing this option will cause any unprotected (not using + copy_to_user et al) memory access to fail with a permission fault. + + The feature is detected at runtime, and will remain as a 'nop' + instruction if the cpu does not implement the feature. + +endmenu + +config ARM64_UAO + bool "Enable support for User Access Override (UAO)" + default y + help + User Access Override (UAO; part of the ARMv8.2 Extensions) + causes the 'unprivileged' variant of the load/store instructions to + be overriden to be privileged. + + This option changes get_user() and friends to use the 'unprivileged' + variant of the load/store instructions. This ensures that user-space + really did have access to the supplied memory. When addr_limit is + set to kernel memory the UAO bit will be set, allowing privileged + access to kernel memory. + + Choosing this option will cause copy_to_user() et al to use user-space + memory permissions. + + The feature is detected at runtime, the kernel will use the + regular load/store instructions if the cpu does not implement the + feature. + endmenu menu "Boot options" @@ -590,6 +657,23 @@ config CMDLINE entering them here. As a minimum, you should specify the the root device (e.g. root=/dev/nfs). +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + config CMDLINE_FORCE bool "Always use the default kernel command string" help @@ -597,6 +681,7 @@ config CMDLINE_FORCE loader passes other arguments to the kernel. This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +endchoice config EFI_STUB bool @@ -618,6 +703,32 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config BUILD_ARM64_APPENDED_DTB_IMAGE + bool "Build a concatenated Image.gz/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated Image.gz and list of + DTBs to be built by default (instead of a standalone Image.gz.) + The image will built in arch/arm64/boot/Image.gz-dtb + +config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated Image.gz-dtb. + +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 8dd3a551c170..d6285ef9b5f9 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,6 +6,18 @@ config FRAME_POINTER bool default y +config ARM64_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + help + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU @@ -54,6 +66,29 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. + + If in doubt, say Y + +config DEBUG_ALIGN_RODATA + depends on DEBUG_RODATA && !ARM64_64K_PAGES + bool "Align linker sections up to SECTION_SIZE" + help + If this option is enabled, sections that may potentially be marked as + read only or non-executable will be aligned up to the section size of + the kernel. This prevents sections from being split into pages and + avoids a potential TLB penalty. The downside is an increase in + alignment and potentially wasted space. Turn on this option if + performance is more important than memory pressure. + + If in doubt, say N + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index fa985ad3defc..e30eaed439d9 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,6 +18,7 @@ GZFLAGS :=-9 KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only +KBUILD_CFLAGS += -fno-pic KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) @@ -64,7 +65,12 @@ libs-y := arch/arm64/lib/ $(libs-y) libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/ # Default target when executing plain make +ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := Image.gz-dtb +else KBUILD_IMAGE := Image.gz +endif + KBUILD_DTBS := dtbs all: $(KBUILD_IMAGE) $(KBUILD_DTBS) @@ -88,6 +94,9 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts +Image-dtb Image.gz-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ @@ -97,6 +106,16 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 8dab0bb6ae66..34e35209fc2e 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,2 +1,4 @@ Image +Image-dtb Image.gz +Image.gz-dtb diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 5a0e3ab854a5..aa24ed42b42b 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,14 +14,30 @@ # Based on the ia64 boot/Makefile. # +include $(srctree)/arch/arm64/boot/dts/Makefile + targets := Image Image.gz +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) +else +DTB_OBJS := $(shell find $(obj)/dts/ -name \*.dtb) +endif + $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) +$(obj)/Image-dtb: $(obj)/Image $(DTB_OBJS) FORCE + $(call if_changed,cat) + $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) +$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE + $(call if_changed,cat) + install: $(obj)/Image $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6738cb24c058..b65419693e49 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -47,6 +47,22 @@ CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_ADVANCED_DEBUG=y +CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y +CONFIG_CPU_IDLE=y +CONFIG_ARM_CPUIDLE=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT_DETAILS=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPUFREQ_DT=y +CONFIG_ARM_BIG_LITTLE_CPUFREQ=y +CONFIG_ARM_SCPI_CPUFREQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -66,6 +82,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_DMA_CMA=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y +CONFIG_SRAM=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set @@ -99,6 +116,8 @@ CONFIG_SPI_PL022=y CONFIG_GPIO_PL061=y CONFIG_GPIO_XGENE=y # CONFIG_HWMON is not set +CONFIG_SENSORS_ARM_SCPI=y +CONFIG_SENSORS_V2M_JUNO=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_FB=y @@ -122,11 +141,20 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SPI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_PL030=y +CONFIG_RTC_DRV_PL031=y CONFIG_RTC_DRV_XGENE=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y +CONFIG_COMMON_CLK_SCPI=y +CONFIG_ARM_TIMER_SP804=y +CONFIG_MAILBOX=y +CONFIG_ARM_MHU=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_PHY_XGENE=y +CONFIG_ARM_SCPI_PROTOCOL=y +CONFIG_DMI_SYSFS=y +CONFIG_EFI_VARS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -147,8 +175,13 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y +CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y CONFIG_DEBUG_FS=y +CONFIG_HEADERS_CHECK=y +CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_LOCKUP_DETECTOR=y diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 4e3d4c8b50d1..fe34ae0bed91 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -1,8 +1,12 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H +#include <asm/cpufeature.h> +#include <asm/insn.h> + #ifndef __ASSEMBLY__ +#include <linux/init.h> #include <linux/kconfig.h> #include <linux/types.h> #include <linux/stddef.h> @@ -16,7 +20,8 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; -void apply_alternatives(void); +void __init apply_alternatives_all(void); +void apply_alternatives(void *start, size_t length); void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ @@ -62,6 +67,8 @@ void free_alternatives_memory(void); #else +#include <asm/assembler.h> + .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len .word \orig_offset - . .word \alt_offset - . @@ -85,34 +92,55 @@ void free_alternatives_memory(void); .endm /* - * Begin an alternative code sequence. + * Alternative sequences * - * The code that follows this macro will be assembled and linked as - * normal. There are no restrictions on this code. + * The code for the case where the capability is not present will be + * assembled and linked as normal. There are no restrictions on this + * code. + * + * The code for the case where the capability is present will be + * assembled into a special section to be used for dynamic patching. + * Code for that case must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. + * + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ + +/* + * Begin an alternative code sequence. */ .macro alternative_if_not cap + .set .Lasm_alt_mode, 0 .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: .endm +.macro alternative_if cap + .set .Lasm_alt_mode, 1 + .pushsection .altinstructions, "a" + altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f + .popsection + .pushsection .altinstr_replacement, "ax" + .align 2 /* So GAS knows label 661 is suitably aligned */ +661: +.endm + /* - * Provide the alternative code sequence. - * - * The code that follows this macro is assembled into a special - * section to be used for dynamic patching. Code that follows this - * macro must: - * - * 1. Be exactly the same length (in bytes) as the default code - * sequence. - * - * 2. Not contain a branch target that is used outside of the - * alternative sequence it is defined in (branches into an - * alternative sequence are not fixed up). + * Provide the other half of the alternative code sequence. */ .macro alternative_else -662: .pushsection .altinstr_replacement, "ax" +662: + .if .Lasm_alt_mode==0 + .pushsection .altinstr_replacement, "ax" + .else + .popsection + .endif 663: .endm @@ -120,15 +148,97 @@ void free_alternatives_memory(void); * Complete an alternative code sequence. */ .macro alternative_endif -664: .popsection +664: + .if .Lasm_alt_mode==0 + .popsection + .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endm +/* + * Provides a trivial alternative or default sequence consisting solely + * of NOPs. The number of NOPs is chosen automatically to match the + * previous case. + */ +.macro alternative_else_nop_endif +alternative_else + nops (662b-661b) / AARCH64_INSN_SIZE +alternative_endif +.endm + #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +/* + * Generate the assembly for UAO alternatives with exception table entries. + * This is complicated as there is no post-increment or pair versions of the + * unprivileged instructions, and USER() only works for single instructions. + */ +#ifdef CONFIG_ARM64_UAO + .macro uao_ldp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: ldp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + ldtr \reg1, [\addr]; + ldtr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + .section __ex_table,"a"; + .align 3; + .quad 8888b,\l; + .quad 8889b,\l; + .previous; + .endm + + .macro uao_stp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: stp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + sttr \reg1, [\addr]; + sttr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + .section __ex_table,"a"; + .align 3; + .quad 8888b,\l; + .quad 8889b,\l; + .previous + .endm + + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: \inst \reg, [\addr], \post_inc; + nop; + alternative_else + \alt_inst \reg, [\addr]; + add \addr, \addr, \post_inc; + alternative_endif + + .section __ex_table,"a"; + .align 3; + .quad 8888b,\l; + .previous + .endm +#else + .macro uao_ldp l, reg1, reg2, addr, post_inc + USER(\l, ldp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_stp l, reg1, reg2, addr, post_inc + USER(\l, stp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + USER(\l, \inst \reg, [\addr], \post_inc) + .endm +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 3579988b23f9..deb27cf40254 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -1,5 +1,5 @@ /* - * Based on arch/arm/include/asm/assembler.h + * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S * * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. @@ -23,6 +23,8 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H +#include <asm/asm-offsets.h> +#include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -49,6 +51,15 @@ msr daifclr, #2 .endm + .macro save_and_disable_irq, flags + mrs \flags, daif + msr daifset, #2 + .endm + + .macro restore_irq, flags + msr daif, \flags + .endm + /* * Save/disable and restore interrupts. */ @@ -103,9 +114,16 @@ * SMP data memory barrier */ .macro smp_dmb, opt -#ifdef CONFIG_SMP dmb \opt -#endif + .endm + +/* + * NOP sequence + */ + .macro nops, num + .rept \num + nop + .endr .endm #define USER(l, x...) \ @@ -218,4 +236,89 @@ lr .req x30 // link register .size __pi_##x, . - x; \ ENDPROC(x) +/* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register. + */ + .macro icache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + */ + .macro tcr_set_idmap_t0sz, valreg, tmpreg +#ifndef CONFIG_ARM64_VA_BITS_48 + ldr_l \tmpreg, idmap_t0sz + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH +#endif + .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm + +/* + * Return the current thread_info. + */ + .macro get_thread_info, rd + mrs \rd, sp_el0 + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index d3d49531ce45..01df231d6220 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -20,6 +20,9 @@ #ifndef __ASSEMBLY__ +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) asm volatile(__nops(n)) + #define sev() asm volatile("sev" : : : "memory") #define wfe() asm volatile("wfe" : : : "memory") #define wfi() asm volatile("wfi" : : : "memory") @@ -32,27 +35,8 @@ #define rmb() dsb(ld) #define wmb() dsb(st) -#ifndef CONFIG_SMP -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() - -#define smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ACCESS_ONCE(*p) = (v); \ -} while (0) - -#define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ___p1; \ -}) - -#else +#define dma_rmb() dmb(oshld) +#define dma_wmb() dmb(oshst) #define smp_mb() dmb(ish) #define smp_rmb() dmb(ishld) @@ -96,8 +80,6 @@ do { \ ___p1; \ }) -#endif - #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 689b6379188c..5d46c694e097 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -40,10 +40,6 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT or ASID-tagged VIVT I-cache. * - * flush_cache_all() - * - * Unconditionally clean and invalidate the entire cache. - * * flush_cache_mm(mm) * * Clean and invalidate all user space cache entries @@ -69,7 +65,6 @@ * - kaddr - page address * - size - region size */ -extern void flush_cache_all(void); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); @@ -120,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +static inline void __local_flush_icache_all(void) +{ + asm("ic iallu"); + dsb(nsh); + isb(); +} + static inline void __flush_icache_all(void) { asm("ic ialluis"); @@ -152,4 +154,5 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); + #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 56de5aadede2..3fb053fa6e98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -205,6 +205,13 @@ typedef struct compat_siginfo { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 056443086019..13a6103130cd 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -30,13 +30,17 @@ struct cpuinfo_arm64 { u32 reg_dczid; u32 reg_midr; + u64 reg_id_aa64dfr0; + u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; + u64 reg_id_aa64mmfr2; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; + u32 reg_id_dfr0; u32 reg_id_isar0; u32 reg_id_isar1; u32 reg_id_isar2; @@ -49,6 +53,10 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr3; u32 reg_id_pfr0; u32 reg_id_pfr1; + + u32 reg_mvfr0; + u32 reg_mvfr1; + u32 reg_mvfr2; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); @@ -56,4 +64,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); void cpuinfo_store_cpu(void); void __init cpuinfo_store_boot_cpu(void); +void __init init_cpu_features(struct cpuinfo_arm64 *info); +void update_cpu_features(int cpu, struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot); + #endif /* __ASM_CPU_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d71140b76773..a0789bfc4ac6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include <asm/hwcap.h> +#include <asm/sysreg.h> /* * In the arm64 world (as in the ARM world), elf_hwcap is used both internally @@ -26,16 +27,55 @@ #define ARM64_WORKAROUND_845719 2 #define ARM64_HAS_SYSREG_GIC_CPUIF 3 #define ARM64_HAS_PAN 4 +#define ARM64_HAS_UAO 5 +#define ARM64_ALT_PAN_NOT_UAO 6 -#define ARM64_NCAPS 5 +#define ARM64_NCAPS 7 #ifndef __ASSEMBLY__ +#include <linux/kernel.h> + +/* CPU feature register tracking */ +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE,/* Bigger value is safe */ +}; + +#define FTR_STRICT true /* SANITY check strict matching required */ +#define FTR_NONSTRICT false /* SANITY check ignored */ + +#define FTR_SIGNED true /* Value should be treated as signed */ +#define FTR_UNSIGNED false /* Value should be treated as unsigned */ + +struct arm64_ftr_bits { + bool sign; /* Value is signed ? */ + bool strict; /* CPU Sanity check: strict matching required ? */ + enum ftr_type type; + u8 shift; + u8 width; + s64 safe_val; /* safe value for discrete features */ +}; + +/* + * @arm64_ftr_reg - Feature register + * @strict_mask Bits which should match across all CPUs for sanity. + * @sys_val Safe value across the CPUs (system view) + */ +struct arm64_ftr_reg { + u32 sys_id; + const char *name; + u64 strict_mask; + u64 sys_val; + struct arm64_ftr_bits *ftr_bits; +}; + struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); - void (*enable)(void); + void (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; @@ -43,8 +83,11 @@ struct arm64_cpu_capabilities { }; struct { /* Feature register checking */ + u32 sys_reg; int field_pos; int min_field_value; + int hwcap_type; + unsigned long hwcap; }; }; }; @@ -72,19 +115,79 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } -static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, - int field) +static inline int __attribute_const__ +cpuid_feature_extract_field_width(u64 features, int field, int width) { - return (s64)(features << (64 - 4 - field)) >> (64 - 4); + return (s64)(features << (64 - width - field)) >> (64 - width); } +static inline int __attribute_const__ +cpuid_feature_extract_field(u64 features, int field) +{ + return cpuid_feature_extract_field_width(features, field, 4); +} + +static inline unsigned int __attribute_const__ +cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width) +{ + return (u64)(features << (64 - width - field)) >> (64 - width); +} + +static inline unsigned int __attribute_const__ +cpuid_feature_extract_unsigned_field(u64 features, int field) +{ + return cpuid_feature_extract_unsigned_field_width(features, field, 4); +} + +static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +{ + return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); +} -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +{ + return ftrp->sign ? + cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) : + cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width); +} + +static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) +{ + return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || + cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; +} + +void __init setup_cpu_features(void); + +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); -void check_local_cpu_features(void); -bool cpu_supports_mixed_endian_el0(void); -bool system_supports_mixed_endian_el0(void); + +#ifdef CONFIG_HOTPLUG_CPU +void verify_local_cpu_capabilities(void); +#else +static inline void verify_local_cpu_capabilities(void) +{ +} +#endif + +u64 read_system_reg(u32 id); + +static inline bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(SYS_ID_AA64MMFR0_EL1)); +} + +static inline bool system_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); +} + +static inline bool system_uses_ttbr0_pan(void) +{ + return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && + !cpus_have_cap(ARM64_HAS_PAN); +} #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h deleted file mode 100644 index e3bd983d3661..000000000000 --- a/arch/arm64/include/asm/cputable.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * arch/arm64/include/asm/cputable.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_CPUTABLE_H -#define __ASM_CPUTABLE_H - -struct cpu_info { - unsigned int cpu_id_val; - unsigned int cpu_id_mask; - const char *cpu_name; - unsigned long (*cpu_setup)(void); -}; - -extern struct cpu_info *lookup_processor_type(unsigned int); - -#endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ee6403df9fe4..29dd9c7bc721 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -32,12 +32,6 @@ #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs %0, " #reg : "=r" (__val)); \ - __val; \ -}) - #define MIDR_REVISION_MASK 0xf #define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) #define MIDR_PARTNUM_SHIFT 4 @@ -72,17 +66,16 @@ #define APM_CPU_PART_POTENZA 0x000 -#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGEND_SHIFT 8 -#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) -#define ID_AA64MMFR0_BIGEND(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) - #ifndef __ASSEMBLY__ +#include <asm/sysreg.h> + +#define read_cpuid(reg) ({ \ + u64 __val; \ + asm("mrs_s %0, " __stringify(reg) : "=r" (__val)); \ + __val; \ +}) + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID @@ -90,12 +83,12 @@ */ static inline u32 __attribute_const__ read_cpuid_id(void) { - return read_cpuid(MIDR_EL1); + return read_cpuid(SYS_MIDR_EL1); } static inline u64 __attribute_const__ read_cpuid_mpidr(void) { - return read_cpuid(MPIDR_EL1); + return read_cpuid(SYS_MPIDR_EL1); } static inline unsigned int __attribute_const__ read_cpuid_implementor(void) @@ -110,13 +103,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void) static inline u32 __attribute_const__ read_cpuid_cachetype(void) { - return read_cpuid(CTR_EL0); -} - -static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) -{ - return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) || - (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1); + return read_cpuid(SYS_CTR_EL0); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h new file mode 100644 index 000000000000..69d37d87b159 --- /dev/null +++ b/arch/arm64/include/asm/dmi.h @@ -0,0 +1,31 @@ +/* + * arch/arm64/include/asm/dmi.h + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Yi Li (yi.li@linaro.org) + * + * based on arch/ia64/include/asm/dmi.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include <linux/io.h> +#include <linux/slab.h> + +/* + * According to section 2.3.6 of the UEFI spec, the firmware should not + * request a virtual mapping for configuration tables such as SMBIOS. + * This means we have to map them before use. + */ +#define dmi_early_remap(x, l) ioremap_cache(x, l) +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap(x, l) ioremap_cache(x, l) +#define dmi_unmap(x) iounmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a34fd3b12e2b..180fe12bb334 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -1,34 +1,39 @@ #ifndef _ASM_EFI_H #define _ASM_EFI_H +#include <asm/cpufeature.h> #include <asm/io.h> #include <asm/neon.h> #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_idmap_init(void); #else #define efi_init() -#define efi_idmap_init() #endif #define efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ efi_status_t __s; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ __s; \ }) #define __efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ }) @@ -44,4 +49,22 @@ extern void efi_idmap_init(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define EFI_ALLOC_ALIGN SZ_64K + +/* + * On ARM systems, virtually remapped UEFI runtime services are set up in two + * distinct stages: + * - The stub retrieves the final version of the memory map from UEFI, populates + * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime + * service to communicate the new mapping to the firmware (Note that the new + * mapping is not live at this time) + * - During an early initcall(), the EFI system table is permanently remapped + * and the virtual remapping of the UEFI Runtime Services regions is loaded + * into a private set of page tables. If this all succeeds, the Runtime + * Services are enabled and the EFI_RUNTIME_SERVICES bit set. + */ + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 72674f4c3871..7cff572082d0 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -54,4 +54,90 @@ #define ESR_EL1_EC_BKPT32 (0x38) #define ESR_EL1_EC_BRK64 (0x3C) +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) +/* Unallocated EC: 0x09 - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) +#define ESR_ELx_EC_SMC32 (0x13) +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) +#define ESR_ELx_EC_SMC64 (0x17) +#define ESR_ELx_EC_SYS64 (0x18) +/* Unallocated EC: 0x19 - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) +/* Unallocted EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL (UL(1) << 25) +#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISV (UL(1) << 24) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE (UL(1) << 21) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF (UL(1) << 15) +#define ESR_ELx_AR (UL(1) << 14) +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_CM (UL(1) << 8) +#define ESR_ELx_S1PTW (UL(1) << 7) +#define ESR_ELx_WNR (UL(1) << 6) +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) + +#ifndef __ASSEMBLY__ +#include <asm/types.h> + +const char *esr_get_class_string(u32 esr); +#endif /* __ASSEMBLY */ + #endif /* __ASM_ESR_H */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 5f7bfe6df723..defa0ff98250 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -31,6 +31,7 @@ * */ enum fixed_addresses { + FIX_HOLE, FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, @@ -48,6 +49,7 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + FIX_TEXT_POKE0, __end_of_fixed_addresses }; @@ -56,10 +58,11 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) -extern void __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); +void __init early_fixmap_init(void); -#define __set_fixmap __early_set_fixmap +#define __early_set_fixmap __set_fixmap + +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); #include <asm-generic/fixmap.h> diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 667346273d9b..e8272c475bb5 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -21,15 +21,12 @@ #include <linux/futex.h> #include <linux/uaccess.h> -#include <asm/alternative.h> -#include <asm/cpufeature.h> #include <asm/errno.h> -#include <asm/sysreg.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ +do { \ + uaccess_enable(); \ asm volatile( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ @@ -45,11 +42,11 @@ " .align 3\n" \ " .quad 1b, 4b, 2b, 4b\n" \ " .popsection\n" \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -119,6 +116,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + uaccess_enable(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -138,6 +136,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); + uaccess_disable(); *uval = val; return ret; diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index e8a3268a891c..0af4cdb4b5a9 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -24,9 +24,7 @@ typedef struct { unsigned int __softirq_pending; -#ifdef CONFIG_SMP unsigned int ipi_irqs[NR_IPI]; -#endif } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ @@ -34,10 +32,8 @@ typedef struct { #define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ #define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) -#ifdef CONFIG_SMP u64 smp_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu smp_irq_stat_cpu -#endif #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 52b484b6aa1a..77667c30a9ad 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -65,7 +65,11 @@ static inline void decode_ctrl_reg(u32 reg, /* Lengths */ #define ARM_BREAKPOINT_LEN_1 0x1 #define ARM_BREAKPOINT_LEN_2 0x3 +#define ARM_BREAKPOINT_LEN_3 0x7 #define ARM_BREAKPOINT_LEN_4 0xf +#define ARM_BREAKPOINT_LEN_5 0x1f +#define ARM_BREAKPOINT_LEN_6 0x3f +#define ARM_BREAKPOINT_LEN_7 0x7f #define ARM_BREAKPOINT_LEN_8 0xff /* Kernel stepping */ @@ -107,7 +111,7 @@ struct perf_event; struct pmu; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type); + int *gen_len, int *gen_type, int *offset); extern int arch_check_bp_in_kernelspace(struct perf_event *bp); extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 0ad735166d9f..400b80b49595 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -52,6 +52,14 @@ extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; #endif +enum { + CAP_HWCAP = 1, +#ifdef CONFIG_COMPAT + CAP_COMPAT_HWCAP, + CAP_COMPAT_HWCAP2, +#endif +}; + extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index e2ff32a93b5c..30e50eb54a67 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -264,8 +264,10 @@ __AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000) __AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000) __AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000) __AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000) -__AARCH64_INSN_FUNCS(cbz, 0xFE000000, 0x34000000) -__AARCH64_INSN_FUNCS(cbnz, 0xFE000000, 0x35000000) +__AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) +__AARCH64_INSN_FUNCS(cbnz, 0x7F000000, 0x35000000) +__AARCH64_INSN_FUNCS(tbz, 0x7F000000, 0x36000000) +__AARCH64_INSN_FUNCS(tbnz, 0x7F000000, 0x37000000) __AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000) __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) @@ -279,10 +281,12 @@ __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) #undef __AARCH64_INSN_FUNCS bool aarch64_insn_is_nop(u32 insn); +bool aarch64_insn_is_branch_imm(u32 insn); int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); +u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, @@ -348,6 +352,8 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +s32 aarch64_get_branch_offset(u32 insn); +u32 aarch64_set_branch_offset(u32 insn, s32 offset); bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index e1f7ecdde11f..1eebf5bb0b58 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,7 +3,6 @@ #include <asm-generic/irq.h> -extern void (*handle_arch_irq)(struct pt_regs *); extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index b4f6b19a8a68..8e24ef3f7c82 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -1,8 +1,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -#ifdef CONFIG_SMP - #include <asm/smp.h> static inline bool arch_irq_work_has_interrupt(void) @@ -10,13 +8,4 @@ static inline bool arch_irq_work_has_interrupt(void) return !!__smp_cross_call; } -#else - -static inline bool arch_irq_work_has_interrupt(void) -{ - return false; -} - -#endif - #endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h new file mode 100644 index 000000000000..d06d7f06a583 --- /dev/null +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -0,0 +1,74 @@ +/* + * Kernel page table mapping + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_KERNEL_PGTABLE_H +#define __ASM_KERNEL_PGTABLE_H + +#include <asm/page.h> +#include <asm/pgtable.h> + +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). Note that the number of ID map translation levels + * could be increased on the fly if system RAM is out of reach for the default + * VA range, so 3 pages are reserved in all cases. + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#else +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#endif + +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +#define RESERVED_TTBR0_SIZE (PAGE_SIZE) +#else +#define RESERVED_TTBR0_SIZE (0) +#endif + +/* Initial memory map size */ +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_BLOCK_SIZE PAGE_SIZE +#define SWAPPER_TABLE_SHIFT PMD_SHIFT +#else +#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT +#define SWAPPER_BLOCK_SIZE SECTION_SIZE +#define SWAPPER_TABLE_SHIFT PUD_SHIFT +#endif + + +/* + * Initial memory map attributes. + */ +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#else +#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#endif + + +#endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 865a7e28ea2d..3cb4c856b10d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -45,6 +45,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_RW; } +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hcr_el2; +} + +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) +{ + vcpu->arch.hcr_el2 = hcr; +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index dbd32127dbb6..806a12128193 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -116,9 +116,6 @@ struct kvm_vcpu_arch { * Anything that is not used directly from assembly code goes * here. */ - /* dcache set/way operation pending */ - int last_pcpu; - cpumask_t require_dcache_flush; /* Don't run the guest */ bool pause; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a205e957d5c4..390bf1230c69 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -142,12 +142,12 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) /* * If we are concatenating first level stage-2 page tables, we would have less * than or equal to 16 pointers in the fake PGD, because that's what the - * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS) + * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) * represents the first level for the host, and we add 1 to go to the next * level (which uses contatenation) for the stage-2 tables. */ #if PTRS_PER_S2_PGD <= 16 -#define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1) +#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) #else #define KVM_PREALLOC_LEVEL (0) #endif @@ -245,7 +245,8 @@ static inline void __kvm_flush_dcache_pud(pud_t pud) #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) -void stage2_flush_vm(struct kvm *kvm); +void kvm_set_way_flush(struct kvm_vcpu *vcpu); +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index e4a2ef9cf998..fe3d97595c1e 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -75,12 +75,6 @@ #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) @@ -147,7 +141,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ + _virt_addr_valid(kaddr)) #endif diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c2f006c48bdb..77c3851deae1 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,21 +17,22 @@ #define __ASM_MMU_H typedef struct { - unsigned int id; - raw_spinlock_t id_lock; - void *vdso; + atomic64_t id; + void *vdso; } mm_context_t; -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), - -#define ASID(mm) ((mm)->context.id & 0xffff) +/* + * This macro is only used by the TLBI code, which cannot race with an + * ASID change and therefore doesn't need to reload the counter using + * atomic64_read. + */ +#define ASID(mm) ((mm)->context.id.counter & 0xffff) extern void paging_init(void); -extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); -/* create an identity mapping for memory (or io if map_io is true) */ -extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); +extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot); #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 101a42bde728..b61e47aea3a1 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -23,18 +23,12 @@ #include <linux/sched.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/proc-fns.h> #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> -#define MAX_ASID_BITS 16 - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); - #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) { @@ -55,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = page_to_phys(empty_zero_page); + unsigned long ttbr = virt_to_phys(empty_zero_page); asm( " msr ttbr0_el1, %0 // set TTBR0\n" @@ -64,68 +58,53 @@ static inline void cpu_set_reserved_ttbr0(void) : "r" (ttbr)); } -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} +/* + * TCR.T0SZ value to use when the ID map is active. Usually equals + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in + * physical memory, in which case it will be smaller. + */ +extern u64 idmap_t0sz; -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) +static inline bool __cpu_uses_extended_idmap(void) { - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled. - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); + return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) && + unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) -#define destroy_context(mm) do { } while(0) - -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) +/* + * Set TCR.T0SZ to its default value (based on VA_BITS) + */ +static inline void cpu_set_default_tcr_t0sz(void) { - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - unsigned long flags; + unsigned long tcr; - __new_context(mm); + if (!__cpu_uses_extended_idmap()) + return; - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); - } + asm volatile ( + " mrs %0, tcr_el1 ;" + " bfi %0, %1, %2, %3 ;" + " msr tcr_el1, %0 ;" + " isb" + : "=&r" (tcr) + : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } /* + * It would be nice to return ASIDs back to the allocator, but unfortunately + * that introduces a race with a generation rollover where we could erroneously + * free an ASID allocated in a future generation. We could workaround this by + * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap), + * but we'd then need to make sure that we didn't dirty any TLBs afterwards. + * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you + * take CPU migration into account. + */ +#define destroy_context(mm) do { } while(0) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); + +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) + +/* * This is called when "tsk" is about to enter lazy TLB mode. * * mm: describes the currently active mm context @@ -139,15 +118,24 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -/* - * This is the actual mm switch as far as the scheduler - * is concerned. No registers are touched. We avoid - * calling the CPU specific function when the mm hasn't - * actually changed. - */ -static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) +{ + if (system_uses_ttbr0_pan()) { + BUG_ON(mm->pgd == swapper_pg_dir); + task_thread_info(tsk)->ttbr0 = + virt_to_phys(mm->pgd) | ASID(mm) << 48; + } +} +#else +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) +{ +} +#endif + +static inline void __switch_mm(struct mm_struct *next) { unsigned int cpu = smp_processor_id(); @@ -160,11 +148,28 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, return; } - if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) - check_and_switch_context(next, tsk); + check_and_switch_context(next, cpu); +} + +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (prev != next) + __switch_mm(next); + + /* + * Update the saved TTBR0_EL1 of the scheduled-in task as the previous + * value may have not been initialised yet (activate_mm caller) or the + * ASID has changed since the last run (following the context switch + * of another thread of the same process). Avoid setting the reserved + * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit). + */ + if (next != &init_mm) + update_saved_ttbr0(tsk, next); } #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) +#define activate_mm(prev,next) switch_mm(prev, next, current) #endif diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 22b16232bd60..da3235494ffd 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -20,29 +20,19 @@ #define __ASM_PAGE_H /* PAGE_SHIFT determines the page size */ +/* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES #define PAGE_SHIFT 16 +#define CONT_SHIFT 5 #else #define PAGE_SHIFT 12 +#define CONT_SHIFT 4 #endif -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). - */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) -#else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) -#endif - -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE) +#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) +#define CONT_MASK (~(CONT_SIZE-1)) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 5279e5733386..63e2c49793e7 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,8 +16,6 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H -#ifdef CONFIG_SMP - static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); @@ -38,12 +36,6 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() -#else /* !CONFIG_SMP */ - -#define set_my_cpu_offset(x) do { } while (0) - -#endif /* CONFIG_SMP */ - #include <asm-generic/percpu.h> #endif /* __ASM_PERCPU_H */ diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index d26d1d53c0d7..6471773db6fd 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -24,4 +24,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #endif +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->pc = (__ip); \ + (regs)->regs[29] = (unsigned long) __builtin_frame_address(0); \ + (regs)->sp = current_stack_pointer; \ + (regs)->pstate = PSR_MODE_EL1h; \ +} + #endif diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e838b9adc4d6..c15053902942 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -29,7 +29,7 @@ #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -47,9 +47,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -67,7 +67,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 31e6b0477e60..5e50782f625d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -21,7 +21,7 @@ /* * PMD_SHIFT determines the size a level 2 page table entry can map. */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 #define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) @@ -31,7 +31,7 @@ /* * PUD_SHIFT determines the size a level 1 page table entry can map. */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 #define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) @@ -42,7 +42,7 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ -#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) +#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) @@ -141,7 +141,12 @@ /* * TCR flags. */ -#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0)) +#define TCR_T0SZ_OFFSET 0 +#define TCR_T1SZ_OFFSET 16 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) +#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) +#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) +#define TCR_TxSZ_WIDTH 6 #define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) #define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) #define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index ca9df80af896..2b1bd7e52c3b 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -38,13 +38,13 @@ typedef struct { pteval_t pte; } pte_t; #define pte_val(x) ((x).pte) #define __pte(x) ((pte_t) { (x) } ) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; #define pud_val(x) ((x).pud) #define __pud(x) ((pud_t) { (x) } ) @@ -64,13 +64,13 @@ typedef pteval_t pte_t; #define pte_val(x) (x) #define __pte(x) (x) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 typedef pmdval_t pmd_t; #define pmd_val(x) (x) #define __pmd(x) (x) #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 typedef pudval_t pud_t; #define pud_val(x) (x) #define __pud(x) (x) @@ -86,9 +86,9 @@ typedef pteval_t pgprot_t; #endif /* STRICT_MM_TYPECHECKS */ -#if CONFIG_ARM64_PGTABLE_LEVELS == 2 +#if CONFIG_PGTABLE_LEVELS == 2 #include <asm-generic/pgtable-nopmd.h> -#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 +#elif CONFIG_PGTABLE_LEVELS == 3 #include <asm-generic/pgtable-nopud.h> #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 239192d72a7b..abe6186dc975 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -63,13 +63,8 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#ifdef CONFIG_SMP #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#else -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) -#endif #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) @@ -120,8 +115,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) @@ -273,6 +268,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pgprot_t mk_sect_prot(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); +} + /* * THP definitions. */ @@ -347,9 +347,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #ifdef CONFIG_ARM64_64K_PAGES #define pud_sect(pud) (0) +#define pud_table(pud) (1) #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) +#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ + PUD_TYPE_TABLE) #endif static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) @@ -377,7 +380,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) */ #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) @@ -412,9 +415,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pud_page(pud) pmd_page(pud_pmd(pud)) -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) @@ -446,7 +449,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); } -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -519,6 +522,21 @@ extern int kern_addr_valid(unsigned long addr); #define pgtable_cache_init() do { } while (0) +/* + * On AArch64, the cache coherency is handled via the set_pte_at() function. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + /* + * We don't do anything here, so there's a very small chance of + * us retaking a user fault which we just fixed up. The alternative + * is doing a dsb(ishst), but that penalises the fastpath. + */ +} + +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 9a8fd84f8fb2..b5240e868784 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -28,18 +28,18 @@ struct mm_struct; struct cpu_suspend_ctx; -extern void cpu_cache_off(void); extern void cpu_do_idle(void); extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); -extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); -void cpu_soft_restart(phys_addr_t cpu_reset, - unsigned long addr) __attribute__((noreturn)); extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr); extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include <asm/memory.h> -#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) +#define cpu_switch_mm(pgd,mm) \ +do { \ + BUG_ON(pgd == swapper_pg_dir); \ + cpu_do_switch_mm(virt_to_phys(pgd),mm); \ +} while (0) #define cpu_get_pgd() \ ({ \ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f3a965ed5259..f0854ea2ff04 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -166,6 +166,7 @@ static inline void spin_lock_prefetch(const void *x) #endif -void cpu_enable_pan(void); +void cpu_enable_pan(void *__unused); +void cpu_enable_uao(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index d6dd9fdbc3be..bc358b525695 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -116,6 +116,8 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; #define arch_has_single_step() (1) @@ -183,11 +185,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs) #define instruction_pointer(regs) ((unsigned long)(regs)->pc) -#ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h new file mode 100644 index 000000000000..c76fac979629 --- /dev/null +++ b/arch/arm64/include/asm/seccomp.h @@ -0,0 +1,25 @@ +/* + * arch/arm64/include/asm/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include <asm/unistd.h> + +#ifdef CONFIG_COMPAT +#define __NR_seccomp_read_32 __NR_compat_read +#define __NR_seccomp_write_32 __NR_compat_write +#define __NR_seccomp_exit_32 __NR_compat_exit +#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#endif /* CONFIG_COMPAT */ + +#include <asm-generic/seccomp.h> + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 780f82c827b6..295cd8b0ef06 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -20,10 +20,6 @@ #include <linux/cpumask.h> #include <linux/thread_info.h> -#ifndef CONFIG_SMP -# error "<asm/smp.h> included in non-SMP build" -#endif - #define raw_smp_processor_id() (current_thread_info()->cpu) struct seq_file; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a7f3d4b2514d..0cc436bf726b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,10 +20,9 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include <asm/opcodes.h> +#include <linux/stringify.h> -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) +#include <asm/opcodes.h> /* * ARMv8 ARM reserves the following encoding for system registers: @@ -38,11 +37,146 @@ #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define SCTLR_EL1_SPAN (1 << 23) +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ + (!!x)<<8 | 0x1f) + +/* SCTLR_EL1 */ +#define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) +#define SCTLR_EL1_SPAN (0x1 << 23) + + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_UAO_SHIFT 4 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + #ifdef __ASSEMBLY__ @@ -61,6 +195,8 @@ #else +#include <linux/types.h> + asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ __reg_num_x\\num, \\num\n" @@ -85,6 +221,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set) val |= set; asm volatile("msr sctlr_el1, %0" : : "r" (val)); } + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr " __stringify(r) ", %0" \ + : : "r" (__val)); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 7a18fabbe0f6..659fbf5925de 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -41,7 +41,6 @@ struct mm_struct; extern void show_pte(struct mm_struct *mm, unsigned long addr); extern void __show_regs(struct pt_regs *); -void soft_restart(unsigned long); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); #define UDBG_UNDEFINED (1 << 0) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 459bf8e53208..1b0c8e12e79f 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -48,7 +48,9 @@ struct thread_info { mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - struct restart_block restart_block; +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + u64 ttbr0; /* saved TTBR0_EL1 */ +#endif int preempt_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ }; @@ -60,9 +62,6 @@ struct thread_info { .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) @@ -78,10 +77,16 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +/* + * struct thread_info can be accessed directly via sp_el0. + */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct thread_info *)sp_el0; } #define thread_saved_pc(tsk) \ @@ -118,7 +123,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ -#define TIF_SWITCH_MM 23 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 53d9c354219f..ffdaea7954bb 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,12 +37,21 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - if (tlb->fullmm) { - flush_tlb_mm(tlb->mm); - } else { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; - flush_tlb_range(&vma, tlb->start, tlb->end); - } + struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + + /* + * The ASID allocator will either invalidate the ASID or mark + * it as used. + */ + if (tlb->fullmm) + return; + + /* + * The intermediate page table levels are already handled by + * the __(pte|pmd|pud)_free_tlb() functions, so last level + * TLBI is sufficient here. + */ + __flush_tlb_range(&vma, tlb->start, tlb->end, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, @@ -53,7 +62,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb_remove_entry(tlb, pte); } -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { @@ -62,7 +71,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 8b8d8cb46e01..b460ae28e346 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -24,17 +24,10 @@ #include <linux/sched.h> #include <asm/cputype.h> -extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); -extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long); - -extern struct cpu_tlb_fns cpu_tlb; - /* * TLB Management * ============== * - * The arch/arm64/mm/tlb.S files implement these methods. - * * The TLB specific code is expected to perform whatever tests it needs * to determine if it should invalidate the TLB for each call. Start * addresses are inclusive and end addresses are exclusive; it is safe to @@ -70,6 +63,14 @@ extern struct cpu_tlb_fns cpu_tlb; * only require the D-TLB to be invalidated. * - kaddr - Kernel virtual memory address */ +static inline void local_flush_tlb_all(void) +{ + dsb(nshst); + asm("tlbi vmalle1"); + dsb(nsh); + isb(); +} + static inline void flush_tlb_all(void) { dsb(ishst); @@ -80,7 +81,7 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = (unsigned long)ASID(mm) << 48; + unsigned long asid = ASID(mm) << 48; dsb(ishst); asm("tlbi aside1is, %0" : : "r" (asid)); @@ -90,31 +91,59 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | - ((unsigned long)ASID(vma->vm_mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); - asm("tlbi vae1is, %0" : : "r" (addr)); + asm("tlbi vale1is, %0" : : "r" (addr)); dsb(ish); } +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + static inline void __flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + bool last_level) { - unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; + unsigned long asid = ASID(vma->vm_mm) << 48; unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_mm(vma->vm_mm); + return; + } + start = asid | (start >> 12); end = asid | (end >> 12); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vae1is, %0" : : "r"(addr)); + for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + if (last_level) + asm("tlbi vale1is, %0" : : "r"(addr)); + else + asm("tlbi vae1is, %0" : : "r"(addr)); + } dsb(ish); } -static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + __flush_tlb_range(vma, start, end, false); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_all(); + return; + } + start >>= 12; end >>= 12; @@ -126,55 +155,17 @@ static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long e } /* - * This is meant to avoid soft lock-ups on large TLB flushing ranges and not - * necessarily a performance improvement. - */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_range(vma, start, end); - else - flush_tlb_mm(vma->vm_mm); -} - -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_kernel_range(start, end); - else - flush_tlb_all(); -} - -/* * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); dsb(ish); } -/* - * On AArch64, the cache coherency is handled via the set_pte_at() function. - */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - /* - * set_pte() does not have a DSB for user mappings, so make sure that - * the page table write is visible. - */ - dsb(ishst); -} - -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #endif diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 7ebcd31ce51c..3a5bbb45537f 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,6 @@ #ifndef __ASM_TOPOLOGY_H #define __ASM_TOPOLOGY_H -#ifdef CONFIG_SMP - #include <linux/cpumask.h> struct cpu_topology { @@ -24,12 +22,14 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); -#else - -static inline void init_cpu_topology(void) { } -static inline void store_cpu_topology(unsigned int cpuid) { } - +struct sched_domain; +#ifdef CONFIG_CPU_FREQ +#define arch_scale_freq_capacity cpufreq_scale_freq_capacity +extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); +extern unsigned long cpufreq_scale_max_freq_capacity(int cpu); #endif +#define arch_scale_cpu_capacity scale_cpu_capacity +extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); #include <asm-generic/topology.h> diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 14450f742554..21963026efd0 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,12 @@ #ifndef __ASM_UACCESS_H #define __ASM_UACCESS_H +#include <asm/alternative.h> +#include <asm/kernel-pgtable.h> +#include <asm/sysreg.h> + +#ifndef __ASSEMBLY__ + /* * User space memory access functions */ @@ -25,10 +31,8 @@ #include <linux/string.h> #include <linux/thread_info.h> -#include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/ptrace.h> -#include <asm/sysreg.h> #include <asm/errno.h> #include <asm/memory.h> #include <asm/compiler.h> @@ -65,6 +69,16 @@ extern int fixup_exception(struct pt_regs *regs); static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + + /* + * Enable/disable UAO so that copy_to_user() etc can access + * kernel memory with the unprivileged instructions. + */ + if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, + CONFIG_ARM64_UAO)); } #define segment_eq(a,b) ((a) == (b)) @@ -115,6 +129,99 @@ static inline void set_fs(mm_segment_t fs) #define user_addr_max get_fs /* + * User access enabling/disabling. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void __uaccess_ttbr0_disable(void) +{ + unsigned long ttbr; + + /* reserved_ttbr0 placed at the end of swapper_pg_dir */ + ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; + write_sysreg(ttbr, ttbr0_el1); + isb(); +} + +static inline void __uaccess_ttbr0_enable(void) +{ + unsigned long flags; + + /* + * Disable interrupts to avoid preemption between reading the 'ttbr0' + * variable and the MSR. A context switch could trigger an ASID + * roll-over and an update of 'ttbr0'. + */ + local_irq_save(flags); + write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + isb(); + local_irq_restore(flags); +} + +static inline bool uaccess_ttbr0_disable(void) +{ + if (!system_uses_ttbr0_pan()) + return false; + __uaccess_ttbr0_disable(); + return true; +} + +static inline bool uaccess_ttbr0_enable(void) +{ + if (!system_uses_ttbr0_pan()) + return false; + __uaccess_ttbr0_enable(); + return true; +} +#else +static inline bool uaccess_ttbr0_disable(void) +{ + return false; +} + +static inline bool uaccess_ttbr0_enable(void) +{ + return false; +} +#endif + +#define __uaccess_disable(alt) \ +do { \ + if (!uaccess_ttbr0_disable()) \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +#define __uaccess_enable(alt) \ +do { \ + if (!uaccess_ttbr0_enable()) \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +static inline void uaccess_disable(void) +{ + __uaccess_disable(ARM64_HAS_PAN); +} + +static inline void uaccess_enable(void) +{ + __uaccess_enable(ARM64_HAS_PAN); +} + +/* + * These functions are no-ops when UAO is present. + */ +static inline void uaccess_disable_not_uao(void) +{ + __uaccess_disable(ARM64_ALT_PAN_NOT_UAO); +} + +static inline void uaccess_enable_not_uao(void) +{ + __uaccess_enable(ARM64_ALT_PAN_NOT_UAO); +} + +/* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" * call. @@ -122,9 +229,10 @@ static inline void set_fs(mm_segment_t fs) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_user_asm(instr, reg, x, addr, err) \ +#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup, \"ax\"\n" \ " .align 2\n" \ @@ -143,27 +251,29 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ } \ + uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -190,9 +300,10 @@ do { \ ((x) = 0, -EFAULT); \ }) -#define __put_user_asm(instr, reg, x, addr, err) \ +#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -210,26 +321,28 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __put_user_asm("str", "%", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN)); \ + uaccess_disable_not_uao(); \ } while (0) #define __put_user(x, ptr) \ @@ -256,24 +369,39 @@ do { \ -EFAULT; \ }) -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + return __arch_copy_to_user(to, from, n); +} + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); - else /* security hole - plug it */ + if (access_ok(VERIFY_READ, from, n)) { + check_object_size(to, n, false); + n = __arch_copy_from_user(to, from, n); + } else /* security hole - plug it */ memset(to, 0, n); return n; } static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + if (access_ok(VERIFY_WRITE, to, n)) { + check_object_size(from, n, true); + n = __arch_copy_to_user(to, from, n); + } return n; } @@ -299,4 +427,66 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +#else /* __ASSEMBLY__ */ + +#include <asm/assembler.h> + +/* + * User access enabling/disabling macros. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + .macro __uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro __uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_disable, tmp1 +alternative_if_not ARM64_HAS_PAN + __uaccess_ttbr0_disable \tmp1 +alternative_else_nop_endif + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + __uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else_nop_endif + .endm +#else + .macro uaccess_ttbr0_disable, tmp1 + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 + .endm +#endif + +/* + * These macros are no-ops when UAO is present. + */ + .macro uaccess_disable_not_uao, tmp1 + uaccess_ttbr0_disable \tmp1 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(1) +alternative_else_nop_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 + uaccess_ttbr0_enable \tmp1, \tmp2 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(0) +alternative_else_nop_endif + .endm + +#endif /* __ASSEMBLY__ */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 6d2bf419431d..49c9aefd24a5 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -31,6 +31,9 @@ * Compat syscall numbers used by the AArch64 kernel. */ #define __NR_compat_restart_syscall 0 +#define __NR_compat_exit 1 +#define __NR_compat_read 3 +#define __NR_compat_write 4 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 9dfdac4a74a1..8893cebcea5b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -787,7 +787,8 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 382 __SYSCALL(__NR_renameat2, sys_renameat2) - /* 383 for seccomp */ +#define __NR_seccomp 383 +__SYSCALL(__NR_seccomp, sys_seccomp) #define __NR_getrandom 384 __SYSCALL(__NR_getrandom, sys_getrandom) #define __NR_memfd_create 385 diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index de66199673d7..2b9a63771eda 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -22,6 +22,8 @@ struct vdso_data { __u64 cs_cycle_last; /* Timebase at clocksource init */ + __u64 raw_time_sec; /* Raw time */ + __u64 raw_time_nsec; __u64 xtime_clock_sec; /* Kernel time */ __u64 xtime_clock_nsec; __u64 xtime_coarse_sec; /* Coarse time */ @@ -29,8 +31,10 @@ struct vdso_data { __u64 wtm_clock_sec; /* Wall to monotonic time */ __u64 wtm_clock_nsec; __u32 tb_seq_count; /* Timebase sequence counter */ - __u32 cs_mult; /* Clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift */ + /* cs_* members must be adjacent and in this order (ldp accesses) */ + __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ + __u32 cs_shift; /* Clocksource shift (mono = raw) */ + __u32 cs_raw_mult; /* Raw clocksource multiplier */ __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 208db3df135a..b5c3933ed441 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -45,6 +45,7 @@ #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 #define PSR_PAN_BIT 0x00400000 +#define PSR_UAO_BIT 0x00800000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index fd65134d1053..29065295fbdd 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,18 +14,19 @@ CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg # Object file lists. -arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ +arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \ - return_address.o cpuinfo.o cpu_errata.o cpufeature.o alternative.o + return_address.o cpuinfo.o cpu_errata.o \ + cpufeature.o alternative.o \ + smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o \ + sys_compat.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o @@ -41,7 +42,3 @@ obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) head-y := head.o extra-y := $(head-y) vmlinux.lds - -# vDSO - this must be built first to generate the symbol offsets -$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h -$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 1a3badab800a..392c9193da54 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -24,16 +24,77 @@ #include <asm/cacheflush.h> #include <asm/alternative.h> #include <asm/cpufeature.h> +#include <asm/insn.h> #include <linux/stop_machine.h> +#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) +#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) +#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -static int __apply_alternatives(void *dummy) +struct alt_region { + struct alt_instr *begin; + struct alt_instr *end; +}; + +/* + * Check if the target PC is within an alternative block. + */ +static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +{ + unsigned long replptr; + + if (kernel_text_address(pc)) + return 1; + + replptr = (unsigned long)ALT_REPL_PTR(alt); + if (pc >= replptr && pc <= (replptr + alt->alt_len)) + return 0; + + /* + * Branching into *another* alternate sequence is doomed, and + * we're not even trying to fix it up. + */ + BUG(); +} + +static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) +{ + u32 insn; + + insn = le32_to_cpu(*altinsnptr); + + if (aarch64_insn_is_branch_imm(insn)) { + s32 offset = aarch64_get_branch_offset(insn); + unsigned long target; + + target = (unsigned long)altinsnptr + offset; + + /* + * If we're branching inside the alternate sequence, + * do not rewrite the instruction, as it is already + * correct. Otherwise, generate the new instruction. + */ + if (branch_insn_requires_update(alt, target)) { + offset = target - (unsigned long)insnptr; + insn = aarch64_set_branch_offset(insn, offset); + } + } + + return insn; +} + +static void __apply_alternatives(void *alt_region) { struct alt_instr *alt; - u8 *origptr, *replptr; + struct alt_region *region = alt_region; + u32 *origptr, *replptr; + + for (alt = region->begin; alt < region->end; alt++) { + u32 insn; + int i, nr_inst; - for (alt = __alt_instructions; alt < __alt_instructions_end; alt++) { if (!cpus_have_cap(alt->cpufeature)) continue; @@ -41,20 +102,60 @@ static int __apply_alternatives(void *dummy) pr_info_once("patching kernel code\n"); - origptr = (u8 *)&alt->orig_offset + alt->orig_offset; - replptr = (u8 *)&alt->alt_offset + alt->alt_offset; - memcpy(origptr, replptr, alt->alt_len); + origptr = ALT_ORIG_PTR(alt); + replptr = ALT_REPL_PTR(alt); + nr_inst = alt->alt_len / sizeof(insn); + + for (i = 0; i < nr_inst; i++) { + insn = get_alt_insn(alt, origptr + i, replptr + i); + *(origptr + i) = cpu_to_le32(insn); + } + flush_icache_range((uintptr_t)origptr, - (uintptr_t)(origptr + alt->alt_len)); + (uintptr_t)(origptr + nr_inst)); + } +} + +/* + * We might be patching the stop_machine state machine, so implement a + * really simple polling protocol here. + */ +static int __apply_alternatives_multi_stop(void *unused) +{ + static int patched = 0; + struct alt_region region = { + .begin = __alt_instructions, + .end = __alt_instructions_end, + }; + + /* We always have a CPU 0 at this point (__init) */ + if (smp_processor_id()) { + while (!READ_ONCE(patched)) + cpu_relax(); + } else { + BUG_ON(patched); + __apply_alternatives(®ion); + /* Barriers provided by the cache flushing */ + WRITE_ONCE(patched, 1); } return 0; } -void apply_alternatives(void) +void __init apply_alternatives_all(void) { /* better not try code patching on a live SMP system */ - stop_machine(__apply_alternatives, NULL, NULL); + stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); +} + +void apply_alternatives(void *start, size_t length) +{ + struct alt_region region = { + .begin = start, + .end = start + length, + }; + + __apply_alternatives(®ion); } void free_alternatives_memory(void) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 3b6d8cc9dfe0..c654df05b7d7 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -33,8 +33,8 @@ EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); /* user mem (segment) */ -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__arch_copy_from_user); +EXPORT_SYMBOL(__arch_copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_in_user); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index bcee7abac68e..61668a44666d 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -14,7 +14,6 @@ #include <linux/slab.h> #include <linux/sysctl.h> -#include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/insn.h> #include <asm/opcodes.h> @@ -62,7 +61,7 @@ struct insn_emulation { }; static LIST_HEAD(insn_emulation); -static int nr_insn_emulated; +static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); static void register_emulation_hooks(struct insn_emulation_ops *ops) @@ -173,7 +172,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static void register_insn_emulation(struct insn_emulation_ops *ops) +static void __init register_insn_emulation(struct insn_emulation_ops *ops) { unsigned long flags; struct insn_emulation *insn; @@ -237,7 +236,7 @@ static struct ctl_table ctl_abi[] = { { } }; -static void register_insn_emulation_sysctl(struct ctl_table *table) +static void __init register_insn_emulation_sysctl(struct ctl_table *table) { unsigned long flags; int i = 0; @@ -281,9 +280,9 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) * Error-checking SWP macros implemented using ldxr{b}/stxr{b} */ #define __user_swpX_asm(data, addr, res, temp, B) \ +do { \ + uaccess_enable(); \ __asm__ __volatile__( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ " mov %w2, %w1\n" \ "0: ldxr"B" %w1, [%3]\n" \ "1: stxr"B" %w0, %w2, [%3]\n" \ @@ -300,11 +299,11 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) " .quad 0b, 3b\n" \ " .quad 1b, 3b\n" \ " .popsection\n" \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) #define __user_swp_asm(data, addr, res, temp) \ __user_swpX_asm(data, addr, res, temp, "") diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9a9fce090d58..92a5d25bff1a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -24,7 +24,6 @@ #include <linux/kvm_host.h> #include <asm/thread_info.h> #include <asm/memory.h> -#include <asm/cputable.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/vdso_datapage.h> @@ -40,6 +39,9 @@ int main(void) DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + DEFINE(TSK_TI_TTBR0, offsetof(struct thread_info, ttbr0)); +#endif BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); @@ -60,9 +62,10 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); @@ -71,15 +74,13 @@ int main(void) BLANK(); DEFINE(PAGE_SZ, PAGE_SIZE); BLANK(); - DEFINE(CPU_INFO_SZ, sizeof(struct cpu_info)); - DEFINE(CPU_INFO_SETUP, offsetof(struct cpu_info, cpu_setup)); - BLANK(); DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); @@ -87,6 +88,8 @@ int main(void) DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); + DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); @@ -94,7 +97,8 @@ int main(void) DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); + DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6ffd91438560..09eab326ef93 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -88,5 +88,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = { void check_local_cpu_errata(void) { - check_cpu_capabilities(arm64_errata, "enabling workaround for"); + update_cpu_capabilities(arm64_errata, "enabling workaround for"); } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index cce952440c64..d63576c0a17a 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -28,9 +28,7 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { -#ifdef CONFIG_SMP &smp_spin_table_ops, -#endif &cpu_psci_ops, NULL, }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 978fa169d3c3..58347534d765 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -16,36 +16,604 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#define pr_fmt(fmt) "alternatives: " fmt +#define pr_fmt(fmt) "CPU features: " fmt +#include <linux/bsearch.h> +#include <linux/sort.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cpufeature.h> +#include <asm/cpu_ops.h> #include <asm/processor.h> +#include <asm/sysreg.h> -static bool -feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) +unsigned long elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_LPAE) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + +DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcaps); + +#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + { \ + .sign = SIGNED, \ + .strict = STRICT, \ + .type = TYPE, \ + .shift = SHIFT, \ + .width = WIDTH, \ + .safe_val = SAFE_VAL, \ + } + +/* Define a feature with signed values */ +#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + +/* Define a feature with unsigned value */ +#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + +#define ARM64_FTR_END \ + { \ + .width = 0, \ + } + +/* meta feature for alternatives */ +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); + +static struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + /* Linux doesn't care about the EL3 */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + /* Linux shouldn't care about secure memory */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + /* + * Differing PARange is fine as long as all peripherals and memory are mapped + * within the minimum PARange of all CPUs + */ + U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_ctr[] = { + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + /* + * Linux can handle differing I-cache policies. Userspace JITs will + * make use of *minLine + */ + U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_mmfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_mvfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_dczid[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_END, +}; + + +static struct arm64_ftr_bits ftr_id_isar5[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_mmfr4[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_pfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_END, +}; + +/* + * Common ftr bits for a 32bit register with all hidden, strict + * attributes, with 4bit feature fields and a default safe value of + * 0. Covers the following 32bit registers: + * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + */ +static struct arm64_ftr_bits ftr_generic_32bits[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_generic[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_generic32[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_aa64raz[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), + ARM64_FTR_END, +}; + +#define ARM64_FTR_REG(id, table) \ + { \ + .sys_id = id, \ + .name = #id, \ + .ftr_bits = &((table)[0]), \ + } + +static struct arm64_ftr_reg arm64_ftr_regs[] = { + + /* Op1 = 0, CRn = 0, CRm = 1 */ + ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), + ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), + ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), + + /* Op1 = 0, CRn = 0, CRm = 2 */ + ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), + ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), + + /* Op1 = 0, CRn = 0, CRm = 3 */ + ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), + + /* Op1 = 0, CRn = 0, CRm = 4 */ + ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz), + + /* Op1 = 0, CRn = 0, CRm = 5 */ + ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), + ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic), + + /* Op1 = 0, CRn = 0, CRm = 6 */ + ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz), + + /* Op1 = 0, CRn = 0, CRm = 7 */ + ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), + ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), + + /* Op1 = 3, CRn = 0, CRm = 0 */ + ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), + ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), + + /* Op1 = 3, CRn = 14, CRm = 0 */ + ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32), +}; + +static int search_cmp_ftr_reg(const void *id, const void *regp) { - int val = cpuid_feature_extract_field(reg, entry->field_pos); + return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id; +} - return val >= entry->min_field_value; +/* + * get_arm64_ftr_reg - Lookup a feature register entry using its + * sys_reg() encoding. With the array arm64_ftr_regs sorted in the + * ascending order of sys_id , we use binary search to find a matching + * entry. + * + * returns - Upon success, matching ftr_reg entry for id. + * - NULL on failure. It is upto the caller to decide + * the impact of a failure. + */ +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +{ + return bsearch((const void *)(unsigned long)sys_id, + arm64_ftr_regs, + ARRAY_SIZE(arm64_ftr_regs), + sizeof(arm64_ftr_regs[0]), + search_cmp_ftr_reg); +} + +static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) +{ + u64 mask = arm64_ftr_mask(ftrp); + + reg &= ~mask; + reg |= (ftr_val << ftrp->shift) & mask; + return reg; +} + +static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) +{ + s64 ret = 0; + + switch (ftrp->type) { + case FTR_EXACT: + ret = ftrp->safe_val; + break; + case FTR_LOWER_SAFE: + ret = new < cur ? new : cur; + break; + case FTR_HIGHER_SAFE: + ret = new > cur ? new : cur; + break; + default: + BUG(); + } + + return ret; +} + +static int __init sort_cmp_ftr_regs(const void *a, const void *b) +{ + return ((const struct arm64_ftr_reg *)a)->sys_id - + ((const struct arm64_ftr_reg *)b)->sys_id; +} + +static void __init swap_ftr_regs(void *a, void *b, int size) +{ + struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a; + *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b; + *(struct arm64_ftr_reg *)b = tmp; +} + +static void __init sort_ftr_regs(void) +{ + /* Keep the array sorted so that we can do the binary search */ + sort(arm64_ftr_regs, + ARRAY_SIZE(arm64_ftr_regs), + sizeof(arm64_ftr_regs[0]), + sort_cmp_ftr_regs, + swap_ftr_regs); +} + +/* + * Initialise the CPU feature register from Boot CPU values. + * Also initiliases the strict_mask for the register. + */ +static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) +{ + u64 val = 0; + u64 strict_mask = ~0x0ULL; + struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); + + BUG_ON(!reg); + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + s64 ftr_new = arm64_ftr_value(ftrp, new); + + val = arm64_ftr_set_value(ftrp, val, ftr_new); + if (!ftrp->strict) + strict_mask &= ~arm64_ftr_mask(ftrp); + } + reg->sys_val = val; + reg->strict_mask = strict_mask; +} + +void __init init_cpu_features(struct cpuinfo_arm64 *info) +{ + /* Before we start using the tables, make sure it is sorted */ + sort_ftr_regs(); + + init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); + init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); + init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); + init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); + init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); + init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); + init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); + init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); + init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); +} + +static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) +{ + struct arm64_ftr_bits *ftrp; + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); + s64 ftr_new = arm64_ftr_value(ftrp, new); + + if (ftr_cur == ftr_new) + continue; + /* Find a safe value */ + ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur); + reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new); + } + +} + +static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); + + BUG_ON(!regp); + update_cpu_ftr_reg(regp, val); + if ((boot & regp->strict_mask) == (val & regp->strict_mask)) + return 0; + pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n", + regp->name, boot, cpu, val); + return 1; +} + +/* + * Update system wide CPU feature registers with the values from a + * non-boot CPU. Also performs SANITY checks to make sure that there + * aren't any insane variations from that of the boot CPU. + */ +void update_cpu_features(int cpu, + struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + int taint = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu, + info->reg_ctr, boot->reg_ctr); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, + info->reg_dczid, boot->reg_dczid); + + /* If different, timekeeping will be broken (especially with KVM) */ + taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, + info->reg_cntfrq, boot->reg_cntfrq); + + /* + * The kernel uses self-hosted debug features and expects CPUs to + * support identical debug features. We presently need CTX_CMPs, WRPs, + * and BRPs to be identical. + * ID_AA64DFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu, + info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu, + info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1); + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu, + info->reg_id_aa64isar0, boot->reg_id_aa64isar0); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, + info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu, + info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, + info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, + info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, + info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, + info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); + + /* + * If we have AArch32, we care about 32-bit features for compat. These + * registers should be RES0 otherwise. + */ + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + info->reg_id_dfr0, boot->reg_id_dfr0); + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + info->reg_id_isar0, boot->reg_id_isar0); + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + info->reg_id_isar1, boot->reg_id_isar1); + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + info->reg_id_isar2, boot->reg_id_isar2); + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + info->reg_id_isar3, boot->reg_id_isar3); + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + info->reg_id_isar4, boot->reg_id_isar4); + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + info->reg_id_isar5, boot->reg_id_isar5); + + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + info->reg_id_mmfr0, boot->reg_id_mmfr0); + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + info->reg_id_mmfr1, boot->reg_id_mmfr1); + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + info->reg_id_mmfr2, boot->reg_id_mmfr2); + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + info->reg_id_pfr0, boot->reg_id_pfr0); + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + info->reg_id_pfr1, boot->reg_id_pfr1); + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + info->reg_mvfr0, boot->reg_mvfr0); + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + info->reg_mvfr1, boot->reg_mvfr1); + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + info->reg_mvfr2, boot->reg_mvfr2); + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, + "Unsupported CPU feature variation.\n"); +} + +u64 read_system_reg(u32 id) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); + + /* We shouldn't get a request for an unsupported register */ + BUG_ON(!regp); + return regp->sys_val; } static bool -has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry) +feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - u64 val; + int val = cpuid_feature_extract_field(reg, entry->field_pos); - val = read_cpuid(id_aa64pfr0_el1); - return feature_matches(val, entry); + return val >= entry->min_field_value; } -static bool __maybe_unused -has_id_aa64mmfr1_feature(const struct arm64_cpu_capabilities *entry) +static bool +has_cpuid_feature(const struct arm64_cpu_capabilities *entry) { u64 val; - val = read_cpuid(id_aa64mmfr1_el1); + val = read_system_reg(entry->sys_reg); return feature_matches(val, entry); } @@ -53,45 +621,319 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .matches = has_id_aa64pfr0_feature, - .field_pos = 24, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_GIC_SHIFT, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .matches = has_id_aa64mmfr1_feature, - .field_pos = 20, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, .min_field_value = 1, .enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_UAO + { + .desc = "User Access Override", + .capability = ARM64_HAS_UAO, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .field_pos = ID_AA64MMFR2_UAO_SHIFT, + .min_field_value = 1, + .enable = cpu_enable_uao, + }, +#endif /* CONFIG_ARM64_UAO */ +#ifdef CONFIG_ARM64_PAN + { + .capability = ARM64_ALT_PAN_NOT_UAO, + .matches = cpufeature_pan_not_uao, + }, +#endif /* CONFIG_ARM64_PAN */ {}, }; -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +#define HWCAP_CAP(reg, field, min_value, type, cap) \ + { \ + .desc = #cap, \ + .matches = has_cpuid_feature, \ + .sys_reg = reg, \ + .field_pos = field, \ + .min_field_value = min_value, \ + .hwcap_type = type, \ + .hwcap = cap, \ + } + +static const struct arm64_cpu_capabilities arm64_hwcaps[] = { + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD), +#ifdef CONFIG_COMPAT + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), +#endif + {}, +}; + +static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +{ + switch (cap->hwcap_type) { + case CAP_HWCAP: + elf_hwcap |= cap->hwcap; + break; +#ifdef CONFIG_COMPAT + case CAP_COMPAT_HWCAP: + compat_elf_hwcap |= (u32)cap->hwcap; + break; + case CAP_COMPAT_HWCAP2: + compat_elf_hwcap2 |= (u32)cap->hwcap; + break; +#endif + default: + WARN_ON(1); + break; + } +} + +/* Check if we have a particular HWCAP enabled */ +static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +{ + bool rc; + + switch (cap->hwcap_type) { + case CAP_HWCAP: + rc = (elf_hwcap & cap->hwcap) != 0; + break; +#ifdef CONFIG_COMPAT + case CAP_COMPAT_HWCAP: + rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0; + break; + case CAP_COMPAT_HWCAP2: + rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0; + break; +#endif + default: + WARN_ON(1); + rc = false; + } + + return rc; +} + +static void __init setup_cpu_hwcaps(void) +{ + int i; + const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; + + for (i = 0; hwcaps[i].matches; i++) + if (hwcaps[i].matches(&hwcaps[i])) + cap_set_hwcap(&hwcaps[i]); +} + +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { int i; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!caps[i].matches(&caps[i])) continue; - if (!cpus_have_cap(caps[i].capability)) + if (!cpus_have_cap(caps[i].capability) && caps[i].desc) pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } +} + +/* + * Run through the enabled capabilities and enable() it on all active + * CPUs + */ +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +{ + int i; + + for (i = 0; caps[i].matches; i++) + if (caps[i].enable && cpus_have_cap(caps[i].capability)) + on_each_cpu(caps[i].enable, NULL, true); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Flag to indicate if we have computed the system wide + * capabilities based on the boot time active CPUs. This + * will be used to determine if a new booting CPU should + * go through the verification process to make sure that it + * supports the system capabilities, without using a hotplug + * notifier. + */ +static bool sys_caps_initialised; + +static inline void set_sys_caps_initialised(void) +{ + sys_caps_initialised = true; +} - /* second pass allows enable() to consider interacting capabilities */ - for (i = 0; caps[i].desc; i++) { - if (cpus_have_cap(caps[i].capability) && caps[i].enable) - caps[i].enable(); +/* + * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + */ +static u64 __raw_read_system_reg(u32 sys_id) +{ + switch (sys_id) { + case SYS_ID_PFR0_EL1: return read_cpuid(SYS_ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(SYS_ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(SYS_ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(SYS_ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(SYS_ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(SYS_ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(SYS_ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(SYS_ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(SYS_ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(SYS_ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(SYS_ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(SYS_ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(SYS_ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(SYS_MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(SYS_MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(SYS_MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(SYS_ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(SYS_ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(SYS_ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(SYS_ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(SYS_ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(SYS_ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(SYS_ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(SYS_CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(SYS_CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(SYS_DCZID_EL0); + default: + BUG(); + return 0; } } -void check_local_cpu_features(void) +/* + * Park the CPU which doesn't have the capability as advertised + * by the system. + */ +static void fail_incapable_cpu(char *cap_type, + const struct arm64_cpu_capabilities *cap) +{ + int cpu = smp_processor_id(); + + pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc); + /* Mark this CPU absent */ + set_cpu_present(cpu, 0); + + /* Check if we can park ourselves */ + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); + asm( + "1: wfe\n" + " wfi\n" + " b 1b"); +} + +/* + * Run through the enabled system capabilities and enable() it on this CPU. + * The capabilities were decided based on the available CPUs at the boot time. + * Any new CPU should match the system wide status of the capability. If the + * new CPU doesn't have a capability which the system now has enabled, we + * cannot do anything to fix it up and could cause unexpected failures. So + * we park the CPU. + */ +void verify_local_cpu_capabilities(void) +{ + int i; + const struct arm64_cpu_capabilities *caps; + + /* + * If we haven't computed the system capabilities, there is nothing + * to verify. + */ + if (!sys_caps_initialised) + return; + + caps = arm64_features; + for (i = 0; caps[i].matches; i++) { + if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) + continue; + /* + * If the new CPU misses an advertised feature, we cannot proceed + * further, park the cpu. + */ + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) + fail_incapable_cpu("arm64_features", &caps[i]); + if (caps[i].enable) + caps[i].enable(NULL); + } + + for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { + if (!cpus_have_hwcap(&caps[i])) + continue; + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) + fail_incapable_cpu("arm64_hwcaps", &caps[i]); + } +} + +#else /* !CONFIG_HOTPLUG_CPU */ + +static inline void set_sys_caps_initialised(void) +{ +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __init setup_feature_capabilities(void) +{ + update_cpu_capabilities(arm64_features, "detected feature:"); + enable_cpu_capabilities(arm64_features); +} + +void __init setup_cpu_features(void) +{ + u32 cwg; + int cls; + + /* Set the CPU feature capabilies */ + setup_feature_capabilities(); + setup_cpu_hwcaps(); + + /* Advertise that we have computed the system capabilities */ + set_sys_caps_initialised(); + + /* + * Check for sane CTR_EL0.CWG value. + */ + cwg = cache_type_cwg(); + cls = cache_line_size(); + if (!cwg) + pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", + cls); + if (L1_CACHE_BYTES < cls) + pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", + L1_CACHE_BYTES, cls); +} + +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) { - check_cpu_capabilities(arm64_features, "detected feature"); + return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index faf5cadbd391..ac60eae5d8d6 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -22,10 +22,14 @@ #include <linux/bitops.h> #include <linux/bug.h> +#include <linux/delay.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/personality.h> #include <linux/preempt.h> #include <linux/printk.h> +#include <linux/seq_file.h> +#include <linux/sched.h> #include <linux/smp.h> /* @@ -35,7 +39,6 @@ */ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static bool mixed_endian_el0 = true; static char *icache_policy_str[] = { [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", @@ -46,179 +49,200 @@ static char *icache_policy_str[] = { unsigned long __icache_flags; -static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) +static const char *hwcap_str[] = { + "fp", + "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + NULL +}; + +#ifdef CONFIG_COMPAT +static const char *compat_hwcap_str[] = { + "swp", + "half", + "thumb", + "26bit", + "fastmult", + "fpa", + "vfp", + "edsp", + "java", + "iwmmxt", + "crunch", + "thumbee", + "neon", + "vfpv3", + "vfpv3d16", + "tls", + "vfpv4", + "idiva", + "idivt", + "vfpd32", + "lpae", + "evtstrm" +}; + +static const char *compat_hwcap2_str[] = { + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + NULL +}; +#endif /* CONFIG_COMPAT */ + +static int c_show(struct seq_file *m, void *v) { - unsigned int cpu = smp_processor_id(); - u32 l1ip = CTR_L1IP(info->reg_ctr); + int i, j; + + for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; - if (l1ip != ICACHE_POLICY_PIPT) { /* - * VIPT caches are non-aliasing if the VA always equals the PA - * in all bit positions that are covered by the index. This is - * the case if the size of a way (# of sets * line size) does - * not exceed PAGE_SIZE. + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. */ - u32 waysize = icache_get_numsets() * icache_get_linesize(); +#ifdef CONFIG_SMP + seq_printf(m, "processor\t: %d\n", i); +#endif - if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) - set_bit(ICACHEF_ALIASING, &__icache_flags); + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + + /* + * Dump out the common processor features in a single line. + * Userspace should read the hwcaps with getauxval(AT_HWCAP) + * rather than attempting to parse this, but there's a body of + * software which does already (at least for 32-bit). + */ + seq_puts(m, "Features\t:"); + if (personality(current->personality) == PER_LINUX32) { +#ifdef CONFIG_COMPAT + for (j = 0; compat_hwcap_str[j]; j++) + if (compat_elf_hwcap & (1 << j)) + seq_printf(m, " %s", compat_hwcap_str[j]); + + for (j = 0; compat_hwcap2_str[j]; j++) + if (compat_elf_hwcap2 & (1 << j)) + seq_printf(m, " %s", compat_hwcap2_str[j]); +#endif /* CONFIG_COMPAT */ + } else { + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, " %s", hwcap_str[j]); + } + seq_puts(m, "\n"); + + seq_printf(m, "CPU implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); } - if (l1ip == ICACHE_POLICY_AIVIVT) - set_bit(ICACHEF_AIVIVT, &__icache_flags); - pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); + return 0; } -bool cpu_supports_mixed_endian_el0(void) +static void *c_start(struct seq_file *m, loff_t *pos) { - return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); + return *pos < 1 ? (void *)1 : NULL; } -bool system_supports_mixed_endian_el0(void) +static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - return mixed_endian_el0; + ++*pos; + return NULL; } -static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) +static void c_stop(struct seq_file *m, void *v) { - mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); } -static void update_cpu_features(struct cpuinfo_arm64 *info) -{ - update_mixed_endian_el0_support(info); -} +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; -static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) +static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { - if ((boot & mask) == (cur & mask)) - return 0; - - pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n", - name, (unsigned long)boot, cpu, (unsigned long)cur); - - return 1; -} + unsigned int cpu = smp_processor_id(); + u32 l1ip = CTR_L1IP(info->reg_ctr); -#define CHECK_MASK(field, mask, boot, cur, cpu) \ - check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) + if (l1ip != ICACHE_POLICY_PIPT) { + /* + * VIPT caches are non-aliasing if the VA always equals the PA + * in all bit positions that are covered by the index. This is + * the case if the size of a way (# of sets * line size) does + * not exceed PAGE_SIZE. + */ + u32 waysize = icache_get_numsets() * icache_get_linesize(); -#define CHECK(field, boot, cur, cpu) \ - CHECK_MASK(field, ~0ULL, boot, cur, cpu) + if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) + set_bit(ICACHEF_ALIASING, &__icache_flags); + } + if (l1ip == ICACHE_POLICY_AIVIVT) + set_bit(ICACHEF_AIVIVT, &__icache_flags); -/* - * Verify that CPUs don't have unexpected differences that will cause problems. - */ -static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) -{ - unsigned int cpu = smp_processor_id(); - struct cpuinfo_arm64 *boot = &boot_cpu_data; - unsigned int diff = 0; - - /* - * The kernel can handle differing I-cache policies, but otherwise - * caches should look identical. Userspace JITs will make use of - * *minLine. - */ - diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu); - - /* - * Userspace may perform DC ZVA instructions. Mismatched block sizes - * could result in too much or too little memory being zeroed if a - * process is preempted and migrated between CPUs. - */ - diff |= CHECK(dczid, boot, cur, cpu); - - /* If different, timekeeping will be broken (especially with KVM) */ - diff |= CHECK(cntfrq, boot, cur, cpu); - - /* - * Even in big.LITTLE, processors should be identical instruction-set - * wise. - */ - diff |= CHECK(id_aa64isar0, boot, cur, cpu); - diff |= CHECK(id_aa64isar1, boot, cur, cpu); - - /* - * Differing PARange support is fine as long as all peripherals and - * memory are mapped within the minimum PARange of all CPUs. - * Linux should not care about secure memory. - * ID_AA64MMFR1 is currently RES0. - */ - diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu); - diff |= CHECK(id_aa64mmfr1, boot, cur, cpu); - - /* - * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. - */ - diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu); - diff |= CHECK(id_aa64pfr1, boot, cur, cpu); - - /* - * If we have AArch32, we care about 32-bit features for compat. These - * registers should be RES0 otherwise. - */ - diff |= CHECK(id_isar0, boot, cur, cpu); - diff |= CHECK(id_isar1, boot, cur, cpu); - diff |= CHECK(id_isar2, boot, cur, cpu); - diff |= CHECK(id_isar3, boot, cur, cpu); - diff |= CHECK(id_isar4, boot, cur, cpu); - diff |= CHECK(id_isar5, boot, cur, cpu); - diff |= CHECK(id_mmfr0, boot, cur, cpu); - diff |= CHECK(id_mmfr1, boot, cur, cpu); - diff |= CHECK(id_mmfr2, boot, cur, cpu); - diff |= CHECK(id_mmfr3, boot, cur, cpu); - diff |= CHECK(id_pfr0, boot, cur, cpu); - diff |= CHECK(id_pfr1, boot, cur, cpu); - - /* - * Mismatched CPU features are a recipe for disaster. Don't even - * pretend to support them. - */ - WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation."); + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); info->reg_ctr = read_cpuid_cachetype(); - info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_dczid = read_cpuid(SYS_DCZID_EL0); info->reg_midr = read_cpuid_id(); - info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); - info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); - info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); - info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); - info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); - info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); - - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_id_aa64dfr0 = read_cpuid(SYS_ID_AA64DFR0_EL1); + info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1); + info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1); + info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1); + info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1); + + info->reg_id_dfr0 = read_cpuid(SYS_ID_DFR0_EL1); + info->reg_id_isar0 = read_cpuid(SYS_ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(SYS_ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(SYS_ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(SYS_ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(SYS_ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(SYS_ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(SYS_ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(SYS_ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(SYS_ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(SYS_ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(SYS_ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(SYS_ID_PFR1_EL1); + + info->reg_mvfr0 = read_cpuid(SYS_MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(SYS_MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(SYS_MVFR2_EL1); cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); - check_local_cpu_features(); - update_cpu_features(info); } void cpuinfo_store_cpu(void) { struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); __cpuinfo_store_cpu(info); - cpuinfo_sanity_check(info); + update_cpu_features(smp_processor_id(), info, &boot_cpu_data); } void __init cpuinfo_store_boot_cpu(void) @@ -227,6 +251,7 @@ void __init cpuinfo_store_boot_cpu(void) __cpuinfo_store_cpu(info); boot_cpu_data = *info; + init_cpu_features(&boot_cpu_data); } u64 __attribute_const__ icache_get_ccsidr(void) diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c deleted file mode 100644 index fd3993cb060f..000000000000 --- a/arch/arm64/kernel/cputable.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * arch/arm64/kernel/cputable.c - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> - -#include <asm/cputable.h> - -extern unsigned long __cpu_setup(void); - -struct cpu_info cpu_table[] = { - { - .cpu_id_val = 0x000f0000, - .cpu_id_mask = 0x000f0000, - .cpu_name = "AArch64 Processor", - .cpu_setup = __cpu_setup, - }, - { /* Empty */ }, -}; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 62c91b3b42e8..d35057c09297 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -26,14 +26,16 @@ #include <linux/stat.h> #include <linux/uaccess.h> -#include <asm/debug-monitors.h> +#include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/debug-monitors.h> #include <asm/system_misc.h> /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return read_cpuid(ID_AA64DFR0_EL1) & 0xf; + return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_DEBUGVER_SHIFT); } /* @@ -134,7 +136,7 @@ static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, clear_os_lock, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index d18a44940968..8ce9b0577442 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,8 @@ ENTRY(efi_stub_entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - mov x21, x0 + ldr x21, =stext_offset + add x21, x0, x21 /* * Calculate size of the kernel Image (same for original and copy). diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 1d85a7c5a850..7d13642092d1 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,26 +11,45 @@ * */ +#include <linux/atomic.h> +#include <linux/dmi.h> #include <linux/efi.h> #include <linux/export.h> #include <linux/memblock.h> +#include <linux/mm_types.h> #include <linux/bootmem.h> #include <linux/of.h> #include <linux/of_fdt.h> +#include <linux/preempt.h> +#include <linux/rbtree.h> +#include <linux/rwsem.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <asm/cacheflush.h> #include <asm/efi.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> struct efi_memory_map memmap; -static efi_runtime_services_t *runtime; - static u64 efi_system_table; +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), +}; + static int uefi_debug __initdata; static int __init uefi_debug_setup(char *str) { @@ -47,30 +66,33 @@ static int __init is_normal_ram(efi_memory_desc_t *md) return 0; } -static void __init efi_setup_idmap(void) +/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t efi_to_phys(unsigned long addr) { - struct memblock_region *r; efi_memory_desc_t *md; - u64 paddr, npages, size; - - for_each_memblock(memory, r) - create_id_mapping(r->base, r->size, 0); - /* map runtime io spaces */ for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - create_id_mapping(paddr, size, 1); + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr <= addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; } + return addr; } static int __init uefi_init(void) { efi_char16_t *c16; + void *config_tables; + u64 table_size; char vendor[100] = "unknown"; int i, retval; @@ -98,7 +120,7 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi.systab->fw_vendor, + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), sizeof(vendor)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) @@ -111,10 +133,14 @@ static int __init uefi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - retval = efi_config_init(NULL); - if (retval == 0) - set_bit(EFI_CONFIG_TABLES, &efi.flags); + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + + retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + sizeof(efi_config_table_64_t), NULL); + early_memunmap(config_tables, table_size); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); return retval; @@ -125,17 +151,17 @@ out: */ static __init int is_reserve_region(efi_memory_desc_t *md) { - if (!is_normal_ram(md)) + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: return 0; - - if (md->attribute & EFI_MEMORY_RUNTIME) - return 1; - - if (md->type == EFI_ACPI_RECLAIM_MEMORY || - md->type == EFI_RESERVED_TYPE) - return 1; - - return 0; + default: + break; + } + return is_normal_ram(md); } static __init void reserve_regions(void) @@ -164,9 +190,7 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md) || - md->type == EFI_BOOT_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_DATA) { + if (is_reserve_region(md)) { memblock_reserve(paddr, size); if (uefi_debug) pr_cont("*"); @@ -179,123 +203,6 @@ static __init void reserve_regions(void) set_bit(EFI_MEMMAP, &efi.flags); } - -static u64 __init free_one_region(u64 start, u64 end) -{ - u64 size = end - start; - - if (uefi_debug) - pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); - - free_bootmem_late(start, size); - return size; -} - -static u64 __init free_region(u64 start, u64 end) -{ - u64 map_start, map_end, total = 0; - - if (end <= start) - return total; - - map_start = (u64)memmap.phys_map; - map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); - map_start &= PAGE_MASK; - - if (start < map_end && end > map_start) { - /* region overlaps UEFI memmap */ - if (start < map_start) - total += free_one_region(start, map_start); - - if (map_end < end) - total += free_one_region(map_end, end); - } else - total += free_one_region(start, end); - - return total; -} - -static void __init free_boot_services(void) -{ - u64 total_freed = 0; - u64 keep_end, free_start, free_end; - efi_memory_desc_t *md; - - /* - * If kernel uses larger pages than UEFI, we have to be careful - * not to inadvertantly free memory we want to keep if there is - * overlap at the kernel page size alignment. We do not want to - * free is_reserve_region() memory nor the UEFI memmap itself. - * - * The memory map is sorted, so we keep track of the end of - * any previous region we want to keep, remember any region - * we want to free and defer freeing it until we encounter - * the next region we want to keep. This way, before freeing - * it, we can clip it as needed to avoid freeing memory we - * want to keep for UEFI. - */ - - keep_end = 0; - free_start = 0; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - - if (is_reserve_region(md)) { - /* - * We don't want to free any memory from this region. - */ - if (free_start) { - /* adjust free_end then free region */ - if (free_end > md->phys_addr) - free_end -= PAGE_SIZE; - total_freed += free_region(free_start, free_end); - free_start = 0; - } - keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - continue; - } - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) { - /* no need to free this region */ - continue; - } - - /* - * We want to free memory from this region. - */ - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (free_start) { - if (paddr <= free_end) - free_end = paddr + size; - else { - total_freed += free_region(free_start, free_end); - free_start = paddr; - free_end = paddr + size; - } - } else { - free_start = paddr; - free_end = paddr + size; - } - if (free_start < keep_end) { - free_start += PAGE_SIZE; - if (free_start >= free_end) - free_start = 0; - } - } - if (free_start) - total_freed += free_region(free_start, free_end); - - if (total_freed) - pr_info("Freed 0x%llx bytes of EFI boot services memory", - total_freed); -} - void __init efi_init(void) { struct efi_fdt_params params; @@ -318,156 +225,154 @@ void __init efi_init(void) return; reserve_regions(); + early_memunmap(memmap.map, params.mmap_size); } -void __init efi_idmap_init(void) +static bool __init efi_virtmap_init(void) { - if (!efi_enabled(EFI_BOOT)) - return; - - /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ - efi_setup_idmap(); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} - -static int __init remap_region(efi_memory_desc_t *md, void **new) -{ - u64 paddr, vaddr, npages, size; + efi_memory_desc_t *md; - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; + init_new_context(NULL, &efi_mm); - if (is_normal_ram(md)) - vaddr = (__force u64)ioremap_cache(paddr, size); - else - vaddr = (__force u64)ioremap(paddr, size); + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; - if (!vaddr) { - pr_err("Unable to remap 0x%llx pages @ %p\n", - npages, (void *)paddr); - return 0; - } + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + return false; - /* adjust for any rounding when EFI and system pagesize differs */ - md->virt_addr = vaddr + (md->phys_addr - paddr); + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; - if (uefi_debug) - pr_info(" EFI remap 0x%012llx => %p\n", + pr_info(" EFI remap 0x%016llx => %p\n", md->phys_addr, (void *)md->virt_addr); - memcpy(*new, md, memmap.desc_size); - *new += memmap.desc_size; - - return 1; + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, + __pgprot(pgprot_val(prot) | PTE_NG)); + } + return true; } /* - * Switch UEFI from an identity map to a kernel virtual map + * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., + * non-early mapping of the UEFI system table and virtual mappings for all + * EFI_MEMORY_RUNTIME regions. */ -static int __init arm64_enter_virtual_mode(void) +static int __init arm64_enable_runtime_services(void) { - efi_memory_desc_t *md; - phys_addr_t virtmap_phys; - void *virtmap, *virt_md; - efi_status_t status; u64 mapsize; - int count = 0; - unsigned long flags; if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); return -1; } - mapsize = memmap.map_end - memmap.map; - if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return -1; } pr_info("Remapping and enabling EFI services.\n"); - /* replace early memmap mapping with permanent mapping */ + + mapsize = memmap.map_end - memmap.map; memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, mapsize); - memmap.map_end = memmap.map + mapsize; - - efi.memmap = &memmap; - - /* Map the runtime regions */ - virtmap = kmalloc(mapsize, GFP_KERNEL); - if (!virtmap) { - pr_err("Failed to allocate EFI virtual memmap\n"); + if (!memmap.map) { + pr_err("Failed to remap EFI memory map\n"); return -1; } - virtmap_phys = virt_to_phys(virtmap); - virt_md = virtmap; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (!remap_region(md, &virt_md)) - goto err_unmap; - ++count; - } + memmap.map_end = memmap.map + mapsize; + efi.memmap = &memmap; - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); if (!efi.systab) { - /* - * If we have no virtual mapping for the System Table at this - * point, the memory map doesn't cover the physical offset where - * it resides. This means the System Table will be inaccessible - * to Runtime Services themselves once the virtual mapping is - * installed. - */ - pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); - goto err_unmap; + pr_err("Failed to remap EFI System Table\n"); + return -1; } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - local_irq_save(flags); - cpu_switch_mm(idmap_pg_dir, &init_mm); - - /* Call SetVirtualAddressMap with the physical address of the map */ - runtime = efi.systab->runtime; - efi.set_virtual_address_map = runtime->set_virtual_address_map; - - status = efi.set_virtual_address_map(count * memmap.desc_size, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)virtmap_phys); - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - local_irq_restore(flags); - - kfree(virtmap); - - free_boot_services(); - - if (status != EFI_SUCCESS) { - pr_err("Failed to set EFI virtual address map! [%lx]\n", - status); + if (!efi_virtmap_init()) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; } /* Set up runtime services function pointers */ - runtime = efi.systab->runtime; efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi.runtime_version = efi.systab->hdr.revision; return 0; +} +early_initcall(arm64_enable_runtime_services); -err_unmap: - /* unmap all mappings that succeeded: there are 'count' of those */ - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { - md = virt_md; - iounmap((__force void __iomem *)md->virt_addr); +static int __init arm64_dmi_init(void) +{ + /* + * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to + * be called early because dmi_id_init(), which is an arch_initcall + * itself, depends on dmi_scan_machine() having been called already. + */ + dmi_scan_machine(); + if (dmi_available) + dmi_set_dump_stack_arch_desc(); + return 0; +} +core_initcall(arm64_dmi_init); + +static void efi_set_pgd(struct mm_struct *mm) +{ + __switch_mm(mm); + + if (system_uses_ttbr0_pan()) { + if (mm != current->active_mm) { + /* + * Update the current thread's saved ttbr0 since it is + * restored as part of a return from exception. Set + * the hardware TTBR0_EL1 using cpu_switch_mm() + * directly to enable potential errata workarounds. + */ + update_saved_ttbr0(current, mm); + cpu_switch_mm(mm->pgd, mm); + } else { + /* + * Defer the switch to the current thread's TTBR0_EL1 + * until uaccess_enable(). Restore the current + * thread's saved ttbr0 corresponding to its active_mm + * (if different from init_mm). + */ + cpu_set_reserved_ttbr0(); + if (current->active_mm != &init_mm) + update_saved_ttbr0(current, current->active_mm); + } } - kfree(virtmap); - return -1; } -early_initcall(arm64_enter_virtual_mode); + +void efi_virtmap_load(void) +{ + preempt_disable(); + efi_set_pgd(&efi_mm); +} + +void efi_virtmap_unload(void) +{ + efi_set_pgd(current->active_mm); + preempt_enable(); +} diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 075e42e74cc9..d16c670e83ff 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,7 +27,10 @@ #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> +#include <asm/memory.h> +#include <asm/ptrace.h> #include <asm/thread_info.h> +#include <asm/uaccess.h> #include <asm/asm-uaccess.h> #include <asm/unistd.h> @@ -67,36 +70,71 @@ #define BAD_ERROR 3 .macro kernel_entry, el, regsize = 64 - sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR + sub sp, sp, #S_FRAME_SIZE .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif - push x28, x29 - push x26, x27 - push x24, x25 - push x22, x23 - push x20, x21 - push x18, x19 - push x16, x17 - push x14, x15 - push x12, x13 - push x10, x11 - push x8, x9 - push x6, x7 - push x4, x5 - push x2, x3 - push x0, x1 + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + .if \el == 0 mrs x21, sp_el0 - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + mov tsk, sp + and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Set the TTBR0 PAN bit in SPSR. When the exception is taken from + * EL0, there is no need to check the state of TTBR0_EL1 since + * accesses are always enabled. + * Note that the meaning of this bit differs from the ARMv8.1 PAN + * feature as all TTBR0_EL1 accesses are disabled, not just those to + * user mappings. + */ +alternative_if ARM64_HAS_PAN + b 1f // skip TTBR0 PAN +alternative_else_nop_endif + + .if \el != 0 + mrs x21, ttbr0_el1 + tst x21, #0xffff << 48 // Check for the reserved ASID + orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR + b.eq 1f // TTBR0 access already disabled + and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR + .endif + + __uaccess_ttbr0_disable x21 +1: +#endif + stp x22, x23, [sp, #S_PC] /* @@ -108,6 +146,13 @@ .endif /* + * Set sp_el0 to current thread_info. + */ + .if \el == 0 + msr sp_el0, tsk + .endif + + /* * Registers that may be useful after this macro is invoked: * * x21 - aborted SP @@ -116,64 +161,86 @@ */ .endm - .macro kernel_exit, el, ret = 0 + .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter - ldr x23, [sp, #S_SP] // load return stack pointer + .endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR + * PAN bit checking. + */ +alternative_if ARM64_HAS_PAN + b 2f // skip TTBR0 PAN +alternative_else_nop_endif + + .if \el != 0 + tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set + .endif + __uaccess_ttbr0_enable x0 +1: + .if \el != 0 + and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit + .endif +2: +#endif + + .if \el == 0 + ldr x23, [sp, #S_SP] // load return stack pointer + msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 - alternative_insn \ - "nop", \ - "tbz x22, #4, 1f", \ - ARM64_WORKAROUND_845719 +alternative_if_not ARM64_WORKAROUND_845719 + nop + nop #ifdef CONFIG_PID_IN_CONTEXTIDR - alternative_insn \ - "nop; nop", \ - "mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:", \ - ARM64_WORKAROUND_845719 + nop +#endif +alternative_else + tbz x22, #4, 1f +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrs x29, contextidr_el1 + msr contextidr_el1, x29 #else - alternative_insn \ - "nop", \ - "msr contextidr_el1, xzr; 1:", \ - ARM64_WORKAROUND_845719 + msr contextidr_el1, xzr #endif +1: +alternative_endif #endif .endif - .if \ret - ldr x1, [sp, #S_X1] // preserve x0 (syscall return) - add sp, sp, S_X2 - .else - pop x0, x1 - .endif - pop x2, x3 // load the rest of the registers - pop x4, x5 - pop x6, x7 - pop x8, x9 + msr elr_el1, x21 // set up the return data msr spsr_el1, x22 - .if \el == 0 - msr sp_el0, x23 - .endif - pop x10, x11 - pop x12, x13 - pop x14, x15 - pop x16, x17 - pop x18, x19 - pop x20, x21 - pop x22, x23 - pop x24, x25 - pop x26, x27 - pop x28, x29 - ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + ldp x16, x17, [sp, #16 * 8] + ldp x18, x19, [sp, #16 * 9] + ldp x20, x21, [sp, #16 * 10] + ldp x22, x23, [sp, #16 * 11] + ldp x24, x25, [sp, #16 * 12] + ldp x26, x27, [sp, #16 * 13] + ldp x28, x29, [sp, #16 * 14] + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp eret // return to kernel .endm - .macro get_thread_info, rd - mov \rd, sp - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack - .endm - /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - x0 to x6. @@ -189,7 +256,8 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - ldr x1, handle_arch_irq + adrp x1, handle_arch_irq + ldr x1, [x1, #:lo12:handle_arch_irq] mov x0, sp blr x1 .endm @@ -234,7 +302,7 @@ END(vectors) * Invalid mode handlers */ .macro inv_entry, el, reason, regsize = 64 - kernel_entry el, \regsize + kernel_entry \el, \regsize mov x0, sp mov x1, #\reason mrs x2, esr_el1 @@ -290,20 +358,27 @@ ENDPROC(el1_error_invalid) el1_sync: kernel_entry 1 mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_DABT_EL1 // data abort in EL1 + lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 b.eq el1_da - cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 + b.eq el1_ia + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef - cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el1_sp_pc - cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception b.eq el1_sp_pc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL1 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 b.eq el1_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL1 // debug exception in EL1 + cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 b.ge el1_dbg b el1_inv + +el1_ia: + /* + * Fall through to the Data abort case + */ el1_da: /* * Data abort handling @@ -340,7 +415,7 @@ el1_dbg: /* * Debug exception handling */ - cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 @@ -397,26 +472,26 @@ el1_preempt: el0_sync: kernel_entry 0 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 b.eq el0_da - cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia - cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception + cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc - cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception b.eq el0_sp_pc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 b.ge el0_dbg b el0_inv @@ -425,30 +500,30 @@ el0_sync: el0_sync_compat: kernel_entry 0, 32 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 b.eq el0_da - cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia - cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception + cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP15_32 // CP15 MRC/MCR trap + cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP15_64 // CP15 MRRC/MCRR trap + cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_MR // CP14 MRC/MCR trap + cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_LS // CP14 LDC/STC trap + cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_64 // CP14 MRRC/MCRR trap + cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 b.ge el0_dbg b el0_inv el0_svc_compat: @@ -477,8 +552,8 @@ el0_da: clear_address_tag x0, x26 mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_mem_abort + bl do_mem_abort + b ret_to_user el0_ia: /* * Instruction abort handling @@ -488,10 +563,10 @@ el0_ia: enable_dbg_and_irq ct_user_exit mov x0, x26 - orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_mem_abort + bl do_mem_abort + b ret_to_user el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access @@ -500,8 +575,8 @@ el0_fpsimd_acc: ct_user_exit mov x0, x25 mov x1, sp - adr lr, ret_to_user - b do_fpsimd_acc + bl do_fpsimd_acc + b ret_to_user el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception @@ -510,8 +585,8 @@ el0_fpsimd_exc: ct_user_exit mov x0, x25 mov x1, sp - adr lr, ret_to_user - b do_fpsimd_exc + bl do_fpsimd_exc + b ret_to_user el0_sp_pc: /* * Stack or PC alignment exception handling @@ -523,8 +598,8 @@ el0_sp_pc: mov x0, x26 mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_sp_pc_abort + bl do_sp_pc_abort + b ret_to_user el0_undef: /* * Undefined instruction @@ -533,8 +608,8 @@ el0_undef: enable_dbg_and_irq ct_user_exit mov x0, sp - adr lr, ret_to_user - b do_undefinstr + bl do_undefinstr + b ret_to_user el0_dbg: /* * Debug exception handling @@ -602,6 +677,8 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 + and x9, x9, #~(THREAD_SIZE - 1) + msr sp_el0, x9 ret ENDPROC(cpu_switch_to) @@ -611,17 +688,21 @@ ENDPROC(cpu_switch_to) */ ret_fast_syscall: disable_irq // disable interrupts - ldr x1, [tsk, #TI_FLAGS] + str x0, [sp, #S_X0] // returned x0 + ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing + and x2, x1, #_TIF_SYSCALL_WORK + cbnz x2, ret_fast_syscall_trace and x2, x1, #_TIF_WORK_MASK - cbnz x2, fast_work_pending + cbnz x2, work_pending enable_step_tsk x1, x2 - kernel_exit 0, ret = 1 + kernel_exit 0 +ret_fast_syscall_trace: + enable_irq // enable interrupts + b __sys_trace_return_skipped // we already saved x0 /* * Ok, we need to do extra processing, enter the slow path. */ -fast_work_pending: - str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ @@ -645,7 +726,7 @@ ret_to_user: cbnz x2, work_pending enable_step_tsk x1, x2 no_work_pending: - kernel_exit 0, ret = 0 + kernel_exit 0 ENDPROC(ret_to_user) /* @@ -676,14 +757,15 @@ el0_svc_naked: // compat entry point ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace - adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys ldr x16, [stbl, scno, lsl #3] // address in the syscall table - br x16 // call sys_* routine + blr x16 // call sys_* routine + b ret_fast_syscall ni_sys: mov x0, sp - b do_ni_syscall + bl do_ni_syscall + b ret_fast_syscall ENDPROC(el0_svc) /* @@ -691,26 +773,38 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x0, sp + mov w0, #-1 // set default errno for + cmp scno, x0 // user-issued syscall(-1) + b.ne 1f + mov x0, #-ENOSYS + str x0, [sp, #S_X0] +1: mov x0, sp bl syscall_trace_enter - adr lr, __sys_trace_return // return address + cmp w0, #-1 // skip the syscall? + b.eq __sys_trace_return_skipped uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs cmp scno, sc_nr // check upper syscall limit - b.hs ni_sys + b.hs __ni_sys_trace ldp x0, x1, [sp] // restore the syscall args ldp x2, x3, [sp, #S_X2] ldp x4, x5, [sp, #S_X4] ldp x6, x7, [sp, #S_X6] ldr x16, [stbl, scno, lsl #3] // address in the syscall table - br x16 // call sys_* routine + blr x16 // call sys_* routine __sys_trace_return: - str x0, [sp] // save returned x0 + str x0, [sp, #S_X0] // save returned x0 +__sys_trace_return_skipped: mov x0, sp bl syscall_trace_exit b ret_to_user +__ni_sys_trace: + mov x0, sp + bl do_ni_syscall + b __sys_trace_return + /* * Special system call wrappers. */ @@ -718,6 +812,3 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) - -ENTRY(handle_arch_irq) - .quad 0 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 7b4e9ea0b1a4..f995dae1c8fd 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cpu.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/init.h> @@ -290,7 +291,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = { .notifier_call = fpsimd_cpu_pm_notifier, }; -static void fpsimd_pm_init(void) +static void __init fpsimd_pm_init(void) { cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); } @@ -299,25 +300,49 @@ static void fpsimd_pm_init(void) static inline void fpsimd_pm_init(void) { } #endif /* CONFIG_CPU_PM */ +#ifdef CONFIG_HOTPLUG_CPU +static int fpsimd_cpu_hotplug_notifier(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + per_cpu(fpsimd_last_state, cpu) = NULL; + break; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_hotplug_notifier_block = { + .notifier_call = fpsimd_cpu_hotplug_notifier, +}; + +static inline void fpsimd_hotplug_init(void) +{ + register_cpu_notifier(&fpsimd_cpu_hotplug_notifier_block); +} + +#else +static inline void fpsimd_hotplug_init(void) { } +#endif + /* * FP/SIMD support code initialisation. */ static int __init fpsimd_init(void) { - u64 pfr = read_cpuid(ID_AA64PFR0_EL1); - - if (pfr & (0xf << 16)) { + if (elf_hwcap & HWCAP_FP) { + fpsimd_pm_init(); + fpsimd_hotplug_init(); + } else { pr_notice("Floating-point is not implemented\n"); - return 0; } - elf_hwcap |= HWCAP_FP; - if (pfr & (0xf << 20)) + if (!(elf_hwcap & HWCAP_ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); - else - elf_hwcap |= HWCAP_ASIMD; - - fpsimd_pm_init(); return 0; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4f9a2b83dc8e..a00093a34d46 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,6 +29,7 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/kernel-pgtable.h> #include <asm/memory.h> #include <asm/thread_info.h> #include <asm/pgtable-hwdef.h> @@ -36,7 +37,7 @@ #include <asm/page.h> #include <asm/virt.h> -#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) +#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 #error TEXT_OFFSET must be at least 4KB aligned @@ -46,44 +47,10 @@ #error TEXT_OFFSET must be less than 2MB #endif - .macro pgtbl, ttb0, ttb1, virt_to_phys - ldr \ttb1, =swapper_pg_dir - ldr \ttb0, =idmap_pg_dir - add \ttb1, \ttb1, \virt_to_phys - add \ttb0, \ttb0, \virt_to_phys - .endm - -#ifdef CONFIG_ARM64_64K_PAGES -#define BLOCK_SHIFT PAGE_SHIFT -#define BLOCK_SIZE PAGE_SIZE -#define TABLE_SHIFT PMD_SHIFT -#else -#define BLOCK_SHIFT SECTION_SHIFT -#define BLOCK_SIZE SECTION_SIZE -#define TABLE_SHIFT PUD_SHIFT -#endif - -#define KERNEL_START KERNEL_RAM_VADDR +#define KERNEL_START _text #define KERNEL_END _end /* - * Initial memory map attributes. - */ -#ifndef CONFIG_SMP -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF -#else -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S -#endif - -#ifdef CONFIG_ARM64_64K_PAGES -#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS -#else -#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS -#endif - -/* * Kernel startup entry point. * --------------------------- * @@ -132,6 +99,8 @@ efi_head: #endif #ifdef CONFIG_EFI + .globl stext_offset + .set stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -155,12 +124,12 @@ optional_header: .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext - efi_head // BaseOfCode + .long stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase - .long 0x20 // SectionAlignment - .long 0x8 // FileAlignment + .long 0x1000 // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion .short 0 // MajorImageVersion @@ -172,7 +141,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext - efi_head // SizeOfHeaders + .long stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -217,53 +186,64 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext - efi_head // VirtualAddress + .long stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext - efi_head // PointerToRawData + .long stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) .short 0 // NumberOfRelocations (0 for executables) .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) - .align 5 + + /* + * EFI will load stext onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that stext is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 #endif ENTRY(stext) - mov x21, x0 // x21=FDT + bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag - mrs x22, midr_el1 // x22=cpuid - mov x0, x22 - bl lookup_processor_type - mov x23, x0 // x23=current cpu_table - /* - * __error_p may end up out of range for cbz if text areas are - * aligned up to section sizes. - */ - cbnz x23, 1f // invalid processor (x23=0)? - b __error_p -1: + bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* - * The following calls CPU specific code in a position independent - * manner. See arch/arm64/mm/proc.S for details. x23 = base of - * cpu_info structure selected by lookup_processor_type above. + * The following calls CPU setup code, see arch/arm64/mm/proc.S for + * details. * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, __switch_data // address to jump to after + ldr x27, =__mmap_switched // address to jump to after // MMU has been enabled - adrp lr, __enable_mmu // return (PIC) address - add lr, lr, #:lo12:__enable_mmu - ldr x12, [x23, #CPU_INFO_SETUP] - add x12, x12, x28 // __virt_to_phys - br x12 // initialise processor + adr_l lr, __enable_mmu // return (PIC) address + b __cpu_setup // initialise processor ENDPROC(stext) /* + * Preserve the arguments passed by the bootloader in x0 .. x3 + */ +preserve_boot_args: + mov x21, x0 // x21=FDT + + adr_l x0, boot_args // record the contents of + stp x21, x1, [x0] // x0 .. x3 at kernel entry + stp x2, x3, [x0, #16] + + dmb sy // needed before dc ivac with + // MMU off + + add x1, x0, #0x20 // 4 x 8 bytes + b __inval_cache_range // tail call +ENDPROC(preserve_boot_args) + +/* * Determine validity of the x21 FDT pointer. * The dtb must be 8-byte aligned and live in the first 512M of memory. */ @@ -312,7 +292,7 @@ ENDPROC(__vet_fdt) .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm @@ -324,15 +304,15 @@ ENDPROC(__vet_fdt) * Corrupts: phys, start, end, pstate */ .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT + lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT + lsr \start, \start, #SWAPPER_BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT + orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry + lsr \end, \end, #SWAPPER_BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index 9999: str \phys, [\tbl, \start, lsl #3] // store the entry add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block + add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block cmp \start, \end b.ls 9999b .endm @@ -346,7 +326,8 @@ ENDPROC(__vet_fdt) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir mov x27, lr /* @@ -354,14 +335,14 @@ __create_page_tables: * dirty cache lines being evicted. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + add x6, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -369,18 +350,56 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =MM_MMUFLAGS + ldr x7, =SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. */ mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) + adrp x3, KERNEL_START // __pa(KERNEL_START) + +#ifndef CONFIG_ARM64_VA_BITS_48 +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) + + /* + * If VA_BITS < 48, it may be too small to allow for an ID mapping to be + * created that covers system RAM if that is located sufficiently high + * in the physical address space. So for the ID map, use an extended + * virtual range in that case, by configuring an additional translation + * level. + * First, we have to verify our assumption that the current value of + * VA_BITS was chosen such that all translation levels are fully + * utilised, and that lowering T0SZ will always result in an additional + * translation level to be configured. + */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels" +#endif + + /* + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of KERNEL_END. + */ + adrp x5, KERNEL_END + clz x5, x5 + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + b.ge 1f // .. then skip additional level + + adr_l x6, idmap_t0sz + str x5, [x6] + dmb sy + dc ivac, x6 // Invalidate potentially stale cache line + + create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 +1: +#endif + create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) + adr_l x6, KERNEL_END // __pa(KERNEL_END) create_block_map x0, x7, x3, x5, x6 /* @@ -389,7 +408,7 @@ __create_page_tables: mov x0, x26 // swapper_pg_dir mov x5, #PAGE_OFFSET create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END + ldr x6, =KERNEL_END // __va(KERNEL_END) mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -415,7 +434,8 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE + dmb sy bl __inval_cache_range mov lr, x27 @@ -423,37 +443,25 @@ __create_page_tables: ENDPROC(__create_page_tables) .ltorg - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - /* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. + * The following fragment of code is executed with the MMU enabled. */ + .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset + dsb ishst // Make zero page visible to PTW + + adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info + str_l x21, __fdt_pointer, x5 // Save FDT pointer + str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init @@ -547,7 +555,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr vttbr_el2, xzr /* Hypervisor stub */ - adr x0, __hyp_stub_vectors + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors msr vbar_el2, x0 /* spsr */ @@ -564,8 +573,7 @@ ENDPROC(el2_setup) * in x20. See arch/arm64/include/asm/virt.h for more info. */ ENTRY(set_cpu_boot_mode_flag) - ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode - add x1, x1, x28 + adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 @@ -589,7 +597,6 @@ ENTRY(__boot_cpu_mode) .long 0 .popsection -#ifdef CONFIG_SMP .align 3 1: .quad . .quad secondary_holding_pen_release @@ -600,15 +607,11 @@ ENTRY(__boot_cpu_mode) */ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 - adr x1, 1b - ldp x2, x3, [x1] - sub x1, x1, x2 - add x3, x3, x1 + adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] cmp x4, x0 b.eq secondary_startup @@ -622,7 +625,6 @@ ENDPROC(secondary_holding_pen) */ ENTRY(secondary_entry) bl el2_setup // Drop to EL1 - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET bl set_cpu_boot_mode_flag b secondary_startup ENDPROC(secondary_entry) @@ -631,16 +633,9 @@ ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. */ - mrs x22, midr_el1 // x22=cpuid - mov x0, x22 - bl lookup_processor_type - mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? - - pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 - ldr x12, [x23, #CPU_INFO_SETUP] - add x12, x12, x28 // __virt_to_phys - blr x12 // initialise processor + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + bl __cpu_setup // initialise processor ldr x21, =secondary_data ldr x27, =__secondary_switched // address to jump to after enabling the MMU @@ -650,17 +645,19 @@ ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) -#endif /* CONFIG_SMP */ /* - * Setup common bits before finally enabling the MMU. Essentially this is just - * loading the page table pointer and vector base registers. + * Enable the MMU. * - * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on - * the MMU. + * x0 = SCTLR_EL1 value for turning on the MMU. + * x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion */ __enable_mmu: ldr x5, =vectors @@ -668,89 +665,15 @@ __enable_mmu: msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb - b __turn_mmu_on -ENDPROC(__enable_mmu) - -/* - * Enable the MMU. This completely changes the structure of the visible memory - * space. You will not be able to trace execution through this. - * - * x0 = system control register - * x27 = *virtual* address to jump to upon completion - * - * other registers depend on the function called upon completion - * - * We align the entire function to the smallest power of two larger than it to - * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET - * close to the end of a 512MB or 1GB block we might require an additional - * table to map the entire function. - */ - .align 4 -__turn_mmu_on: msr sctlr_el1, x0 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb br x27 -ENDPROC(__turn_mmu_on) - -/* - * Calculate the start of physical memory. - */ -__calc_phys_offset: - adr x0, 1f - ldp x1, x2, [x0] - sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET - add x24, x2, x28 // x24 = PHYS_OFFSET - ret -ENDPROC(__calc_phys_offset) - - .align 3 -1: .quad . - .quad PAGE_OFFSET - -/* - * Exception handling. Something went wrong and we can't proceed. We ought to - * tell the user, but since we don't have any guarantee that we're even - * running on the right architecture, we do virtually nothing. - */ -__error_p: -ENDPROC(__error_p) - -__error: -1: nop - b 1b -ENDPROC(__error) - -/* - * This function gets the processor ID in w0 and searches the cpu_table[] for - * a match. It returns a pointer to the struct cpu_info it found. The - * cpu_table[] must end with an empty (all zeros) structure. - * - * This routine can be called via C code and it needs to work with the MMU - * both disabled and enabled (the offset is calculated automatically). - */ -ENTRY(lookup_processor_type) - adr x1, __lookup_processor_type_data - ldp x2, x3, [x1] - sub x1, x1, x2 // get offset between VA and PA - add x3, x3, x1 // convert VA to PA -1: - ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask - cbz w5, 2f // end of list? - and w6, w6, w0 - cmp w5, w6 - b.eq 3f - add x3, x3, #CPU_INFO_SZ - b 1b -2: - mov x3, #0 // unknown processor -3: - mov x0, x3 - ret -ENDPROC(lookup_processor_type) - - .align 3 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .quad . - .quad cpu_table - .size __lookup_processor_type_data, . - __lookup_processor_type_data +ENDPROC(__enable_mmu) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 8e20edfc5d13..b2a5ac76db76 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -33,7 +33,9 @@ #include <asm/hw_breakpoint.h> #include <asm/kdebug.h> #include <asm/traps.h> +#include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/sysreg.h> #include <asm/system_misc.h> #include <asm/uaccess.h> @@ -53,13 +55,17 @@ static int core_num_wrps; /* Determine number of BRP registers available. */ static int get_num_brps(void) { - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; + return 1 + + cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_BRPS_SHIFT); } /* Determine number of WRP registers available. */ static int get_num_wrps(void) { - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; + return 1 + + cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_WRPS_SHIFT); } int hw_breakpoint_slots(int type) @@ -312,9 +318,21 @@ static int get_hbp_len(u8 hbp_len) case ARM_BREAKPOINT_LEN_2: len_in_bytes = 2; break; + case ARM_BREAKPOINT_LEN_3: + len_in_bytes = 3; + break; case ARM_BREAKPOINT_LEN_4: len_in_bytes = 4; break; + case ARM_BREAKPOINT_LEN_5: + len_in_bytes = 5; + break; + case ARM_BREAKPOINT_LEN_6: + len_in_bytes = 6; + break; + case ARM_BREAKPOINT_LEN_7: + len_in_bytes = 7; + break; case ARM_BREAKPOINT_LEN_8: len_in_bytes = 8; break; @@ -344,7 +362,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) * to generic breakpoint descriptions. */ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type) + int *gen_len, int *gen_type, int *offset) { /* Type */ switch (ctrl.type) { @@ -364,17 +382,33 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, return -EINVAL; } + if (!ctrl.len) + return -EINVAL; + *offset = __ffs(ctrl.len); + /* Len */ - switch (ctrl.len) { + switch (ctrl.len >> *offset) { case ARM_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; case ARM_BREAKPOINT_LEN_2: *gen_len = HW_BREAKPOINT_LEN_2; break; + case ARM_BREAKPOINT_LEN_3: + *gen_len = HW_BREAKPOINT_LEN_3; + break; case ARM_BREAKPOINT_LEN_4: *gen_len = HW_BREAKPOINT_LEN_4; break; + case ARM_BREAKPOINT_LEN_5: + *gen_len = HW_BREAKPOINT_LEN_5; + break; + case ARM_BREAKPOINT_LEN_6: + *gen_len = HW_BREAKPOINT_LEN_6; + break; + case ARM_BREAKPOINT_LEN_7: + *gen_len = HW_BREAKPOINT_LEN_7; + break; case ARM_BREAKPOINT_LEN_8: *gen_len = HW_BREAKPOINT_LEN_8; break; @@ -418,9 +452,21 @@ static int arch_build_bp_info(struct perf_event *bp) case HW_BREAKPOINT_LEN_2: info->ctrl.len = ARM_BREAKPOINT_LEN_2; break; + case HW_BREAKPOINT_LEN_3: + info->ctrl.len = ARM_BREAKPOINT_LEN_3; + break; case HW_BREAKPOINT_LEN_4: info->ctrl.len = ARM_BREAKPOINT_LEN_4; break; + case HW_BREAKPOINT_LEN_5: + info->ctrl.len = ARM_BREAKPOINT_LEN_5; + break; + case HW_BREAKPOINT_LEN_6: + info->ctrl.len = ARM_BREAKPOINT_LEN_6; + break; + case HW_BREAKPOINT_LEN_7: + info->ctrl.len = ARM_BREAKPOINT_LEN_7; + break; case HW_BREAKPOINT_LEN_8: info->ctrl.len = ARM_BREAKPOINT_LEN_8; break; @@ -512,18 +558,17 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) default: return -EINVAL; } - - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; } else { if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) alignment_mask = 0x3; else alignment_mask = 0x7; - if (info->address & alignment_mask) - return -EINVAL; + offset = info->address & alignment_mask; } + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + /* * Disallow per-task kernel breakpoints since these would * complicate the stepping code. @@ -654,12 +699,47 @@ unlock: return 0; } +/* + * Arm64 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See Section D2.10.5 "Determining the memory location that caused a Watchpoint + * exception" of ARMv8 Architecture Reference Manual for details. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u64 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - int i, step = 0, *kernel_step, access; + int i, step = 0, *kernel_step, access, closest_match = 0; + u64 min_dist = -1, dist; u32 ctrl_reg; - u64 val, alignment_mask; + u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; struct arch_hw_breakpoint *info; @@ -668,35 +748,15 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, slots = this_cpu_ptr(wp_on_reg); debug_info = ¤t->thread.debug; + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; - - info = counter_arch_bp(wp); - /* AArch32 watchpoints are either 4 or 8 bytes aligned. */ - if (is_compat_task()) { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - } else { - alignment_mask = 0x7; - } - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(AARCH64_DBG_REG_WVR, i); - if (val != (untagged_addr(addr) & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select to confirm. */ - ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; + continue; /* * Check that the access type matches. @@ -705,18 +765,41 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; + /* Check if the watchpoint value and byte select match. */ + val = read_wb_reg(AARCH64_DBG_REG_WVR, i); + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + + info = counter_arch_bp(wp); info->trigger = addr; perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ if (!wp->overflow_handler) step = 1; + } + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + perf_bp_event(wp, regs); -unlock: - rcu_read_unlock(); + /* Do we need to handle the stepping? */ + if (!wp->overflow_handler) + step = 1; } + rcu_read_unlock(); if (!step) return 0; @@ -886,7 +969,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, void *hcpu) { int cpu = (long)hcpu; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7e9327a0986d..dd9671cd0bb2 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -17,14 +17,19 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/bitops.h> +#include <linux/bug.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/mm.h> #include <linux/smp.h> +#include <linux/spinlock.h> #include <linux/stop_machine.h> +#include <linux/types.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> +#include <asm/fixmap.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -72,6 +77,39 @@ bool __kprobes aarch64_insn_is_nop(u32 insn) } } +bool aarch64_insn_is_branch_imm(u32 insn) +{ + return (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) || + aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_bcond(insn)); +} + +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + page = virt_to_page(addr); + else + return addr; + + BUG_ON(!page); + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} /* * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always * little-endian. @@ -88,10 +126,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } +static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +{ + void *waddr = addr; + unsigned long flags = 0; + int ret; + + spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + + patch_unmap(FIX_TEXT_POKE0); + spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { insn = cpu_to_le32(insn); - return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); + return __aarch64_insn_write(addr, insn); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) @@ -218,23 +273,13 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) return aarch64_insn_patch_text_sync(addrs, insns, cnt); } -u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, - u32 insn, u64 imm) +static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, + u32 *maskp, int *shiftp) { - u32 immlo, immhi, lomask, himask, mask; + u32 mask; int shift; switch (type) { - case AARCH64_INSN_IMM_ADR: - lomask = 0x3; - himask = 0x7ffff; - immlo = imm & lomask; - imm >>= 2; - immhi = imm & himask; - imm = (immlo << 24) | (immhi); - mask = (lomask << 24) | (himask); - shift = 5; - break; case AARCH64_INSN_IMM_26: mask = BIT(26) - 1; shift = 0; @@ -273,9 +318,68 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, shift = 16; break; default: - pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", - type); - return 0; + return -EINVAL; + } + + *maskp = mask; + *shiftp = shift; + + return 0; +} + +#define ADR_IMM_HILOSPLIT 2 +#define ADR_IMM_SIZE SZ_2M +#define ADR_IMM_LOMASK ((1 << ADR_IMM_HILOSPLIT) - 1) +#define ADR_IMM_HIMASK ((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1) +#define ADR_IMM_LOSHIFT 29 +#define ADR_IMM_HISHIFT 5 + +u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn) +{ + u32 immlo, immhi, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + shift = 0; + immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK; + immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK; + insn = (immhi << ADR_IMM_HILOSPLIT) | immlo; + mask = ADR_IMM_SIZE - 1; + break; + default: + if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) { + pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + } + + return (insn >> shift) & mask; +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm) +{ + u32 immlo, immhi, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + shift = 0; + immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT; + imm >>= ADR_IMM_HILOSPLIT; + immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT; + imm = immlo | immhi; + mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) | + (ADR_IMM_HIMASK << ADR_IMM_HISHIFT)); + break; + default: + if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) { + pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } } /* Update the immediate field. */ @@ -961,6 +1065,58 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift); } +/* + * Decode the imm field of a branch, and return the byte offset as a + * signed value (so it can be used when computing a new branch + * target). + */ +s32 aarch64_get_branch_offset(u32 insn) +{ + s32 imm; + + if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) { + imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn); + return (imm << 6) >> 4; + } + + if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_bcond(insn)) { + imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_19, insn); + return (imm << 13) >> 11; + } + + if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn)) { + imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_14, insn); + return (imm << 18) >> 16; + } + + /* Unhandled instruction */ + BUG(); +} + +/* + * Encode the displacement of a branch in the imm field and return the + * updated instruction. + */ +u32 aarch64_set_branch_offset(u32 insn, s32 offset) +{ + if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, + offset >> 2); + + if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_bcond(insn)) + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, + offset >> 2); + + if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn)) + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_14, insn, + offset >> 2); + + /* Unhandled instruction */ + BUG(); +} + bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 071a6ec13bd8..de99d4bd31bb 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -33,13 +33,13 @@ unsigned long irq_err_count; int arch_show_interrupts(struct seq_file *p, int prec) { -#ifdef CONFIG_SMP show_ipi_list(p, prec); -#endif seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } +void (*handle_arch_irq)(struct pt_regs *) = NULL; + void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { if (handle_arch_irq) diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 263a166291fb..4f1fec7a46db 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -22,9 +22,8 @@ #ifdef HAVE_JUMP_LABEL -static void __arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - bool is_static) +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)entry->code; u32 insn; @@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry, insn = aarch64_insn_gen_nop(); } - if (is_static) - aarch64_insn_patch_text_nosync(addr, insn); - else - aarch64_insn_patch_text(&addr, &insn, 1); -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - __arch_jump_label_transform(entry, type, false); + aarch64_insn_patch_text(&addr, &insn, 1); } void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - __arch_jump_label_transform(entry, type, true); + /* + * We use the architected A64 NOP in arch_static_branch, so there's no + * need to patch an identical A64 NOP over the top of it here. The core + * will call arch_jump_label_transform from a module notifier if the + * NOP needs to be replaced by a branch. + */ } #endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 51128018b907..ca450c42b83a 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -27,6 +27,7 @@ #include <linux/moduleloader.h> #include <linux/vmalloc.h> #include <asm/insn.h> +#include <asm/sections.h> #define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX #define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 @@ -406,3 +407,20 @@ overflow: me->name, (int)ELF64_R_TYPE(rel[i].r_info), val); return -ENOEXEC; } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { + apply_alternatives((void *)s->sh_addr, s->sh_size); + return 0; + } + } + + return 0; +} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 78a5894b1621..dfbd178f198e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1332,9 +1332,9 @@ static void __init cpu_pmu_init(struct arm_pmu *armpmu) static int __init init_hw_perf_events(void) { - u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); - switch ((dfr >> 8) & 0xf) { + switch (cpuid_feature_extract_field(dfr, ID_AA64DFR0_PMUVER_SHIFT)) { case 0x1: /* PMUv3 */ cpu_pmu = armv8_pmuv3_pmu_init(); break; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fde9923af859..365dc554594b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -44,6 +44,7 @@ #include <linux/personality.h> #include <linux/notifier.h> +#include <asm/alternative.h> #include <asm/compat.h> #include <asm/cacheflush.h> #include <asm/fpsimd.h> @@ -57,14 +58,6 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -void soft_restart(unsigned long addr) -{ - setup_mm_for_reboot(); - cpu_soft_restart(virt_to_phys(cpu_reset), addr); - /* Should never get here */ - BUG(); -} - /* * Function pointers to optional machine specific functions */ @@ -135,9 +128,7 @@ void machine_power_off(void) /* * Restart requires that the secondary CPUs stop performing any activity - * while the primary CPU resets the system. Systems with a single CPU can - * use soft_restart() as their machine descriptor's .restart hook, since that - * will cause the only available CPU to reset. Systems with multiple CPUs must + * while the primary CPU resets the system. Systems with multiple CPUs must * provide a HW restart implementation, to ensure that all CPUs reset at once. * This is required so that any code running after reset on the primary CPU * doesn't have to co-ordinate with other CPUs to ensure they aren't still @@ -163,6 +154,70 @@ void machine_restart(char *cmd) while (1); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + unsigned int i; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->pc - nbytes, nbytes * 2, "PC"); + show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); + show_data(regs->sp - nbytes, nbytes * 2, "SP"); + for (i = 0; i < 30; i++) { + char name[4]; + snprintf(name, sizeof(name), "X%u", i); + show_data(regs->regs[i] - nbytes, nbytes * 2, name); + } + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { int i, top_reg; @@ -189,6 +244,8 @@ void __show_regs(struct pt_regs *regs) if (i % 2 == 0) printk("\n"); } + if (!user_mode(regs)) + show_extra_register_data(regs, 128); printk("\n"); } @@ -235,7 +292,8 @@ void release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_preserve_current_state(); + if (current->mm) + fpsimd_preserve_current_state(); *dst = *src; return 0; } @@ -278,6 +336,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; + if (IS_ENABLED(CONFIG_ARM64_UAO) && + cpus_have_cap(ARM64_HAS_UAO)) + childregs->pstate |= PSR_UAO_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } @@ -313,6 +374,17 @@ static void tls_thread_switch(struct task_struct *next) : : "r" (tpidr), "r" (tpidrro)); } +/* Restore the UAO state depending on next's addr_limit */ +static void uao_thread_switch(struct task_struct *next) +{ + if (IS_ENABLED(CONFIG_ARM64_UAO)) { + if (task_thread_info(next)->addr_limit == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); + } +} + /* * Thread switching. */ @@ -325,6 +397,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + uao_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 81c081eaca42..7a57cf5c3441 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -396,8 +396,6 @@ int __init psci_init(void) return init_fn(np); } -#ifdef CONFIG_SMP - static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) { return 0; @@ -476,7 +474,6 @@ static int cpu_psci_cpu_kill(unsigned int cpu) return 0; } #endif -#endif static int psci_suspend_finisher(unsigned long index) { @@ -511,7 +508,6 @@ const struct cpu_operations cpu_psci_ops = { .cpu_init_idle = cpu_psci_cpu_init_idle, .cpu_suspend = cpu_psci_cpu_suspend, #endif -#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, @@ -520,6 +516,5 @@ const struct cpu_operations cpu_psci_ops = { .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif -#endif }; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8a4ae8e73213..95fcbd53802d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/seccomp.h> #include <linux/security.h> #include <linux/init.h> #include <linux/signal.h> @@ -219,13 +220,13 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, struct arch_hw_breakpoint_ctrl ctrl, struct perf_event_attr *attr) { - int err, len, type, disabled = !ctrl.enabled; + int err, len, type, offset, disabled = !ctrl.enabled; attr->disabled = disabled; if (disabled) return 0; - err = arch_bp_generic_fields(ctrl, &len, &type); + err = arch_bp_generic_fields(ctrl, &len, &type, &offset); if (err) return err; @@ -244,6 +245,7 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, attr->bp_len = len; attr->bp_type = type; + attr->bp_addr += offset; return 0; } @@ -296,7 +298,7 @@ static int ptrace_hbp_get_addr(unsigned int note_type, if (IS_ERR(bp)) return PTR_ERR(bp); - *addr = bp ? bp->attr.bp_addr : 0; + *addr = bp ? counter_arch_bp(bp)->address : 0; return 0; } @@ -551,6 +553,32 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, return ret; } +static int system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int syscallno = task_pt_regs(target)->syscallno; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &syscallno, 0, -1); +} + +static int system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int syscallno, ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); + if (ret) + return ret; + + task_pt_regs(target)->syscallno = syscallno; + return ret; +} + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -559,6 +587,7 @@ enum aarch64_regset { REGSET_HW_BREAK, REGSET_HW_WATCH, #endif + REGSET_SYSTEM_CALL, }; static const struct user_regset aarch64_regsets[] = { @@ -608,6 +637,14 @@ static const struct user_regset aarch64_regsets[] = { .set = hw_break_set, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_ARM_SYSTEM_CALL, + .n = 1, + .size = sizeof(int), + .align = sizeof(int), + .get = system_call_get, + .set = system_call_set, + }, }; static const struct user_regset_view user_aarch64_view = { @@ -1114,6 +1151,10 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return -1; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3843e546ab3d..c31d8f706cb5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -27,7 +27,6 @@ #include <linux/console.h> #include <linux/cache.h> #include <linux/bootmem.h> -#include <linux/seq_file.h> #include <linux/screen_info.h> #include <linux/init.h> #include <linux/kexec.h> @@ -43,13 +42,11 @@ #include <linux/of_fdt.h> #include <linux/of_platform.h> #include <linux/efi.h> -#include <linux/personality.h> #include <asm/fixmap.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/elf.h> -#include <asm/cputable.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/kasan.h> @@ -63,27 +60,6 @@ #include <asm/psci.h> #include <asm/efi.h> -unsigned int processor_id; -EXPORT_SYMBOL(processor_id); - -unsigned long elf_hwcap __read_mostly; -EXPORT_SYMBOL_GPL(elf_hwcap); - -#ifdef CONFIG_COMPAT -#define COMPAT_ELF_HWCAP_DEFAULT \ - (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ - COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ - COMPAT_HWCAP_LPAE) -unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; -unsigned int compat_elf_hwcap2 __read_mostly; -#endif - -DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); - -static const char *cpu_name; phys_addr_t __fdt_pointer __initdata; /* @@ -119,6 +95,11 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +/* + * The recorded values of x0 .. x3 upon kernel entry. + */ +u64 __cacheline_aligned boot_args[4]; + void __init smp_setup_processor_id(void) { /* @@ -135,7 +116,6 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) } struct mpidr_hash mpidr_hash; -#ifdef CONFIG_SMP /** * smp_build_mpidr_hash - Pre-compute shifts required at each affinity * level in order to build a linear index from an @@ -201,107 +181,6 @@ static void __init smp_build_mpidr_hash(void) pr_warn("Large number of MPIDR hash buckets detected\n"); __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -#endif - -static void __init setup_processor(void) -{ - struct cpu_info *cpu_info; - u64 features, block; - u32 cwg; - int cls; - - cpu_info = lookup_processor_type(read_cpuid_id()); - if (!cpu_info) { - printk("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); - } - - cpu_name = cpu_info->cpu_name; - - printk("CPU: %s [%08x] revision %d\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15); - - sprintf(init_utsname()->machine, ELF_PLATFORM); - elf_hwcap = 0; - - cpuinfo_store_boot_cpu(); - - /* - * Check for sane CTR_EL0.CWG value. - */ - cwg = cache_type_cwg(); - cls = cache_line_size(); - if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); - - /* - * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. - * The blocks we test below represent incremental functionality - * for non-negative values. Negative values are reserved. - */ - features = read_cpuid(ID_AA64ISAR0_EL1); - block = (features >> 4) & 0xf; - if (!(block & 0x8)) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_PMULL; - case 1: - elf_hwcap |= HWCAP_AES; - case 0: - break; - } - } - - block = (features >> 8) & 0xf; - if (block && !(block & 0x8)) - elf_hwcap |= HWCAP_SHA1; - - block = (features >> 12) & 0xf; - if (block && !(block & 0x8)) - elf_hwcap |= HWCAP_SHA2; - - block = (features >> 16) & 0xf; - if (block && !(block & 0x8)) - elf_hwcap |= HWCAP_CRC32; - -#ifdef CONFIG_COMPAT - /* - * ID_ISAR5_EL1 carries similar information as above, but pertaining to - * the Aarch32 32-bit execution state. - */ - features = read_cpuid(ID_ISAR5_EL1); - block = (features >> 4) & 0xf; - if (!(block & 0x8)) { - switch (block) { - default: - case 2: - compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; - case 1: - compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; - case 0: - break; - } - } - - block = (features >> 8) & 0xf; - if (block && !(block & 0x8)) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; - - block = (features >> 12) & 0xf; - if (block && !(block & 0x8)) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; - - block = (features >> 16) & 0xf; - if (block && !(block & 0x8)) - compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; -#endif -} static void __init setup_machine_fdt(phys_addr_t dt_phys) { @@ -315,6 +194,8 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) while (true) cpu_relax(); } + + dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); } static void __init request_standard_resources(void) @@ -323,7 +204,7 @@ static void __init request_standard_resources(void) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); @@ -412,10 +293,11 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) { - setup_processor(); + pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); setup_machine_fdt(__fdt_pointer); + sprintf(init_utsname()->machine, ELF_PLATFORM); init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; @@ -423,6 +305,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_fixmap_init(); early_ioremap_init(); parse_early_param(); @@ -443,7 +326,6 @@ void __init setup_arch(char **cmdline_p) request_standard_resources(); - efi_idmap_init(); early_ioremap_reset(); unflatten_device_tree(); @@ -452,9 +334,16 @@ void __init setup_arch(char **cmdline_p) cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; cpu_read_bootcpu_ops(); -#ifdef CONFIG_SMP smp_init_cpus(); smp_build_mpidr_hash(); + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Make sure init_thread_info.ttbr0 always generates translation + * faults in case uaccess_enable() is inadvertently called by the init + * thread. + */ + init_thread_info.ttbr0 = virt_to_phys(empty_zero_page); #endif #ifdef CONFIG_VT @@ -464,6 +353,12 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + if (boot_args[1] || boot_args[2] || boot_args[3]) { + pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" + "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" + "This indicates a broken bootloader or old kernel\n", + boot_args[1], boot_args[2], boot_args[3]); + } } static int __init arm64_device_init(void) @@ -486,130 +381,3 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); - -static const char *hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL -}; - -#ifdef CONFIG_COMPAT -static const char *compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm", - NULL -}; - -static const char *compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL -}; -#endif /* CONFIG_COMPAT */ - -static int c_show(struct seq_file *m, void *v) -{ - int i, j; - - for_each_online_cpu(i) { - struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); - u32 midr = cpuinfo->reg_midr; - - /* - * glibc reads /proc/cpuinfo to determine the number of - * online processors, looking for lines beginning with - * "processor". Give glibc what it expects. - */ -#ifdef CONFIG_SMP - seq_printf(m, "processor\t: %d\n", i); -#endif - - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", - loops_per_jiffy / (500000UL/HZ), - loops_per_jiffy / (5000UL/HZ) % 100); - - /* - * Dump out the common processor features in a single line. - * Userspace should read the hwcaps with getauxval(AT_HWCAP) - * rather than attempting to parse this, but there's a body of - * software which does already (at least for 32-bit). - */ - seq_puts(m, "Features\t:"); - if (personality(current->personality) == PER_LINUX32) { -#ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) - seq_printf(m, " %s", compat_hwcap_str[j]); - - for (j = 0; compat_hwcap2_str[j]; j++) - if (compat_elf_hwcap2 & (1 << j)) - seq_printf(m, " %s", compat_hwcap2_str[j]); -#endif /* CONFIG_COMPAT */ - } else { - for (j = 0; hwcap_str[j]; j++) - if (elf_hwcap & (1 << j)) - seq_printf(m, " %s", hwcap_str[j]); - } - seq_puts(m, "\n"); - - seq_printf(m, "CPU implementer\t: 0x%02x\n", - MIDR_IMPLEMENTOR(midr)); - seq_printf(m, "CPU architecture: 8\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); - seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); - seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); - } - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < 1 ? (void *)1 : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return NULL; -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = c_show -}; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 6fa792137eda..660ccf9f7524 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -131,7 +131,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 128-bit boundary, then 'sp' should diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index b6da20fa7a48..c58aee062590 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -186,6 +186,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); break; + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); @@ -364,7 +370,7 @@ asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) struct compat_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, @@ -398,7 +404,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) struct compat_rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, @@ -521,7 +527,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ - __put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << + __put_user_error(!!(current->thread.fault_code & ESR_ELx_WNR) << FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err); __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ede186cdd452..3576b1590ebd 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -82,7 +82,6 @@ ENTRY(__cpu_suspend_enter) str x2, [x0, #CPU_CTX_SP] ldr x1, =sleep_save_sp ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] -#ifdef CONFIG_SMP mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -94,7 +93,6 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 -#endif bl __cpu_suspend_save /* * Grab suspend finisher in x20 and its argument in x19 @@ -134,6 +132,14 @@ ENTRY(cpu_resume_mmu) ldr x3, =cpu_resume_after_mmu msr sctlr_el1, x0 // restore sctlr_el1 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb br x3 // global jump to virtual address ENDPROC(cpu_resume_mmu) cpu_resume_after_mmu: @@ -149,7 +155,6 @@ ENDPROC(cpu_resume_after_mmu) ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly -#ifdef CONFIG_SMP mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -159,9 +164,6 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ -#else - mov x7, xzr -#endif adrp x0, sleep_save_sp add x0, x0, #:lo12:sleep_save_sp ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] @@ -172,6 +174,9 @@ ENTRY(cpu_resume) /* load physical address of identity map page table in x1 */ ldr x1, [x1, #:lo12:sleep_idmap_phys] mov sp, x2 + /* save thread_info */ + and x2, x2, #~(THREAD_SIZE - 1) + msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context physical address * pointer and x1 to contain physical address of 1:1 page tables diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a1f054549cb6..0d4cf1fbc680 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -141,21 +141,27 @@ asmlinkage void secondary_start_kernel(void) */ atomic_inc(&mm->mm_count); current->active_mm = mm; - cpumask_set_cpu(cpu, mm_cpumask(mm)); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - printk("CPU%u: Booted secondary processor\n", cpu); /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); preempt_disable(); trace_hardirqs_off(); + /* + * If the system has established the capabilities, make sure + * this CPU ticks all of those. If it doesn't, the CPU will + * fail to come online. + */ + verify_local_cpu_capabilities(); + if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); @@ -176,6 +182,8 @@ asmlinkage void secondary_start_kernel(void) * the CPU migration code to notice that the CPU is online * before we continue. */ + pr_info("CPU%u: Booted secondary processor [%08x]\n", + cpu, read_cpuid_id()); set_cpu_online(cpu, true); complete(&cpu_running); @@ -230,12 +238,6 @@ int __cpu_disable(void) * OK - migrate IRQs away from this CPU */ migrate_irqs(); - - /* - * Remove this CPU from the vm mask set of all processes. - */ - clear_tasks_mm_cpumask(cpu); - return 0; } @@ -309,11 +311,13 @@ void cpu_die(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); - apply_alternatives(); + setup_cpu_features(); + apply_alternatives_all(); } void __init smp_prepare_boot_cpu(void) { + cpuinfo_store_boot_cpu(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 23e35de789f7..ab14fee1f255 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -119,7 +119,7 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) else cpu_switch_mm(mm->pgd, mm); - flush_tlb_all(); + local_flush_tlb_all(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 1a7125c3099b..59630e4748f7 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -41,7 +41,6 @@ #include <asm/thread_info.h> #include <asm/stacktrace.h> -#ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { struct stackframe frame; @@ -61,7 +60,6 @@ unsigned long profile_pc(struct pt_regs *regs) return frame.pc; } EXPORT_SYMBOL(profile_pc); -#endif void __init time_init(void) { diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index b6ee26b0939a..2f98601318c3 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -19,10 +19,30 @@ #include <linux/nodemask.h> #include <linux/of.h> #include <linux/sched.h> +#include <linux/sched.h> +#include <linux/sched_energy.h> #include <asm/cputype.h> #include <asm/topology.h> +static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + +unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) +{ +#ifdef CONFIG_CPU_FREQ + unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu); + + return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT; +#else + return per_cpu(cpu_scale, cpu); +#endif +} + +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) +{ + per_cpu(cpu_scale, cpu) = capacity; +} + static int __init get_cpu_for_node(struct device_node *node) { struct device_node *cpu_node; @@ -206,11 +226,67 @@ out: struct cpu_topology cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); +/* sd energy functions */ +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1]; + + if (!sge) { + pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu); + return NULL; + } + + return sge; +} + +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0]; + + if (!sge) { + pr_warn("Invalid sched_group_energy for CPU%d\n", cpu); + return NULL; + } + + return sge; +} + const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_topology[cpu].core_sibling; } +static inline int cpu_corepower_flags(void) +{ + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES; +} + +static struct sched_domain_topology_level arm64_topology[] = { +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +static void update_cpu_capacity(unsigned int cpu) +{ + unsigned long capacity = SCHED_CAPACITY_SCALE; + + if (cpu_core_energy(cpu)) { + int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; + capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; + } + + set_capacity_scale(cpu, capacity); + + pr_info("CPU%d: update cpu_capacity %lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); +} + static void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; @@ -269,6 +345,7 @@ void store_cpu_topology(unsigned int cpuid) topology_populated: update_siblings_masks(cpuid); + update_cpu_capacity(cpuid); } static void __init reset_cpu_topology(void) @@ -299,4 +376,8 @@ void __init init_cpu_topology(void) */ if (parse_dt_topology()) reset_cpu_topology(); + else + set_sched_topology(arm64_topology); + + init_sched_energy_costs(); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4c104235ce01..b0b9ea7aed4f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -33,6 +33,7 @@ #include <asm/atomic.h> #include <asm/debug-monitors.h> +#include <asm/esr.h> #include <asm/traps.h> #include <asm/stacktrace.h> #include <asm/exception.h> @@ -59,8 +60,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -97,21 +97,12 @@ static void dump_backtrace_entry(unsigned long where, unsigned long stack) stack + sizeof(struct pt_regs)); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -125,8 +116,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -178,11 +179,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #else #define S_PREEMPT "" #endif -#ifdef CONFIG_SMP #define S_SMP " SMP" -#else -#define S_SMP "" -#endif static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) @@ -373,6 +370,51 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) return sys_ni_syscall(); } +static const char *esr_class_str[] = { + [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", + [ESR_ELx_EC_UNKNOWN] = "Unknown/Uncategorized", + [ESR_ELx_EC_WFx] = "WFI/WFE", + [ESR_ELx_EC_CP15_32] = "CP15 MCR/MRC", + [ESR_ELx_EC_CP15_64] = "CP15 MCRR/MRRC", + [ESR_ELx_EC_CP14_MR] = "CP14 MCR/MRC", + [ESR_ELx_EC_CP14_LS] = "CP14 LDC/STC", + [ESR_ELx_EC_FP_ASIMD] = "ASIMD", + [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", + [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", + [ESR_ELx_EC_ILL] = "PSTATE.IL", + [ESR_ELx_EC_SVC32] = "SVC (AArch32)", + [ESR_ELx_EC_HVC32] = "HVC (AArch32)", + [ESR_ELx_EC_SMC32] = "SMC (AArch32)", + [ESR_ELx_EC_SVC64] = "SVC (AArch64)", + [ESR_ELx_EC_HVC64] = "HVC (AArch64)", + [ESR_ELx_EC_SMC64] = "SMC (AArch64)", + [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", + [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", + [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", + [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", + [ESR_ELx_EC_PC_ALIGN] = "PC Alignment", + [ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)", + [ESR_ELx_EC_DABT_CUR] = "DABT (current EL)", + [ESR_ELx_EC_SP_ALIGN] = "SP Alignment", + [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", + [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", + [ESR_ELx_EC_SERROR] = "SError", + [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)", + [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)", + [ESR_ELx_EC_SOFTSTP_LOW] = "Software Step (lower EL)", + [ESR_ELx_EC_SOFTSTP_CUR] = "Software Step (current EL)", + [ESR_ELx_EC_WATCHPT_LOW] = "Watchpoint (lower EL)", + [ESR_ELx_EC_WATCHPT_CUR] = "Watchpoint (current EL)", + [ESR_ELx_EC_BKPT32] = "BKPT (AArch32)", + [ESR_ELx_EC_VECTOR32] = "Vector catch (AArch32)", + [ESR_ELx_EC_BRK64] = "BRK (AArch64)", +}; + +const char *esr_get_class_string(u32 esr) +{ + return esr_class_str[ESR_ELx_EC(esr)]; +} + /* * bad_mode handles the impossible case in the exception vector. This is always * fatal. @@ -381,8 +423,8 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { console_verbose(); - pr_crit("Bad mode in %s handler detected, code 0x%08x\n", - handler[reason], esr); + pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n", + handler[reason], esr, esr_get_class_string(esr)); die("Oops - bad mode", regs, 0); local_irq_disable(); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 32aeea083d93..71766af43b70 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -55,7 +55,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; */ static struct page *vectors_page[1]; -static int alloc_vectors_page(void) +static int __init alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; @@ -88,7 +88,7 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - static struct vm_special_mapping spec = { + static const struct vm_special_mapping spec = { .name = "[vectors]", .pages = vectors_page, @@ -200,7 +200,7 @@ up_fail: void update_vsyscall(struct timekeeper *tk) { struct timespec xtime_coarse; - u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); + u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); ++vdso_data->tb_seq_count; smp_wmb(); @@ -213,11 +213,17 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { - vdso_data->cs_cycle_last = tk->tkr.cycle_last; + /* tkr_mono.cycle_last == tkr_raw.cycle_last */ + vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->raw_time_sec = tk->raw_time.tv_sec; + vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; - vdso_data->cs_mult = tk->tkr.mult; - vdso_data->cs_shift = tk->tkr.shift; + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; + /* tkr_raw.xtime_nsec == 0 */ + vdso_data->cs_mono_mult = tk->tkr_mono.mult; + vdso_data->cs_raw_mult = tk->tkr_raw.mult; + /* tkr_mono.shift == tkr_raw.shift */ + vdso_data->cs_shift = tk->tkr_mono.shift; } smp_wmb(); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b467fd0a384b..62c84f7cb01b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds vdso-offsets.h +extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ - cp $@ include/generated/ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ endef -$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index fe652ffd34c2..1f8bba27e2f3 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -26,24 +26,109 @@ #define NSEC_PER_SEC_HI16 0x3b9a vdso_data .req x6 -use_syscall .req w7 -seqcnt .req w8 +seqcnt .req w7 +w_tmp .req w8 +x_tmp .req x8 + +/* + * Conventions for macro arguments: + * - An argument is write-only if its name starts with "res". + * - All other arguments are read-only, unless otherwise specified. + */ .macro seqcnt_acquire 9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] tbnz seqcnt, #0, 9999b dmb ishld - ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] .endm - .macro seqcnt_read, cnt + .macro seqcnt_check fail dmb ishld - ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] + cmp w_tmp, seqcnt + b.ne \fail .endm - .macro seqcnt_check, cnt, fail - cmp \cnt, seqcnt - b.ne \fail + .macro syscall_check fail + ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] + cbnz w_tmp, \fail + .endm + + .macro get_nsec_per_sec res + mov \res, #NSEC_PER_SEC_LO16 + movk \res, #NSEC_PER_SEC_HI16, lsl #16 + .endm + + /* + * Returns the clock delta, in nanoseconds left-shifted by the clock + * shift. + */ + .macro get_clock_shifted_nsec res, cycle_last, mult + /* Read the virtual counter. */ + isb + mrs x_tmp, cntvct_el0 + /* Calculate cycle delta and convert to ns. */ + sub \res, x_tmp, \cycle_last + /* We can only guarantee 56 bits of precision. */ + movn x_tmp, #0xff00, lsl #48 + and \res, x_tmp, \res + mul \res, \res, \mult + .endm + + /* + * Returns in res_{sec,nsec} the REALTIME timespec, based on the + * "wall time" (xtime) and the clock_mono delta. + */ + .macro get_ts_realtime res_sec, res_nsec, \ + clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec + add \res_nsec, \clock_nsec, \xtime_nsec + udiv x_tmp, \res_nsec, \nsec_to_sec + add \res_sec, \xtime_sec, x_tmp + msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec + .endm + + /* + * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, + * used for CLOCK_MONOTONIC_RAW. + */ + .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec + udiv \res_sec, \clock_nsec, \nsec_to_sec + msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec + .endm + + /* sec and nsec are modified in place. */ + .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec + /* Add timespec. */ + add \sec, \sec, \ts_sec + add \nsec, \nsec, \ts_nsec + + /* Normalise the new timespec. */ + cmp \nsec, \nsec_to_sec + b.lt 9999f + sub \nsec, \nsec, \nsec_to_sec + add \sec, \sec, #1 +9999: + cmp \nsec, #0 + b.ge 9998f + add \nsec, \nsec, \nsec_to_sec + sub \sec, \sec, #1 +9998: + .endm + + .macro clock_gettime_return, shift=0 + .if \shift == 1 + lsr x11, x11, x12 + .endif + stp x10, x11, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret + .endm + + .macro jump_slot jumptable, index, label + .if (. - \jumptable) != 4 * (\index) + .error "Jump slot index mismatch" + .endif + b \label .endm .text @@ -51,18 +136,25 @@ seqcnt .req w8 /* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ ENTRY(__kernel_gettimeofday) .cfi_startproc - mov x2, x30 - .cfi_register x30, x2 - - /* Acquire the sequence counter and get the timespec. */ adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 4f - /* If tv is NULL, skip to the timezone code. */ cbz x0, 2f - bl __do_get_tspec - seqcnt_check w9, 1b + + /* Compute the time of day. */ +1: seqcnt_acquire + syscall_check fail=4f + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=1b + + get_nsec_per_sec res=x9 + lsl x9, x9, x12 + + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 /* Convert ns to us. */ mov x13, #1000 @@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday) stp w4, w5, [x1, #TZ_MINWEST] 3: mov x0, xzr - ret x2 + ret 4: /* Syscall fallback. */ mov x8, #__NR_gettimeofday svc #0 - ret x2 + ret .cfi_endproc ENDPROC(__kernel_gettimeofday) +#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE + /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ ENTRY(__kernel_clock_gettime) .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - b.ne 2f + cmp w0, #JUMPSLOT_MAX + b.hi syscall + adr vdso_data, _vdso_data + adr x_tmp, jumptable + add x_tmp, x_tmp, w0, uxtw #2 + br x_tmp + + ALIGN +jumptable: + jump_slot jumptable, CLOCK_REALTIME, realtime + jump_slot jumptable, CLOCK_MONOTONIC, monotonic + b syscall + b syscall + jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw + jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse + jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse + + .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) + .error "Wrong jumptable size" + .endif + + ALIGN +realtime: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=realtime - mov x2, x30 - .cfi_register x30, x2 + /* All computations are done with left-shifted nsecs. */ + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get kernel timespec. */ - adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 7f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 - bl __do_get_tspec - seqcnt_check w9, 1b + ALIGN +monotonic: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic - mov x30, x2 + /* All computations are done with left-shifted nsecs. */ + lsl x4, x4, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - cmp w0, #CLOCK_MONOTONIC - b.ne 6f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - /* Get wtm timespec. */ - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 + clock_gettime_return, shift=1 - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 1b - b 4f -2: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 8f + ALIGN +monotonic_raw: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_raw_mult, w12 = cs_shift */ + ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] + ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] + seqcnt_check fail=monotonic_raw - /* xtime_coarse_nsec is already right-shifted */ - mov x12, #0 + /* All computations are done with left-shifted nsecs. */ + lsl x14, x14, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get coarse timespec. */ - adr vdso_data, _vdso_data -3: seqcnt_acquire + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_clock_raw res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, nsec_to_sec=x9 + + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 + + ALIGN +realtime_coarse: + seqcnt_acquire ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + seqcnt_check fail=realtime_coarse + clock_gettime_return - /* Get wtm timespec. */ + ALIGN +monotonic_coarse: + seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic_coarse - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 3b + /* Computations are done in (non-shifted) nsecs. */ + get_nsec_per_sec res=x9 + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return - cmp w0, #CLOCK_MONOTONIC_COARSE - b.ne 6f -4: - /* Add on wtm timespec. */ - add x10, x10, x13 - lsl x14, x14, x12 - add x11, x11, x14 - - /* Normalise the new timespec. */ - mov x15, #NSEC_PER_SEC_LO16 - movk x15, #NSEC_PER_SEC_HI16, lsl #16 - lsl x15, x15, x12 - cmp x11, x15 - b.lt 5f - sub x11, x11, x15 - add x10, x10, #1 -5: - cmp x11, #0 - b.ge 6f - add x11, x11, x15 - sub x10, x10, #1 - -6: /* Store to the user timespec. */ - lsr x11, x11, x12 - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret -7: - mov x30, x2 -8: /* Syscall fallback. */ + ALIGN +syscall: /* Syscall fallback. */ mov x8, #__NR_clock_gettime svc #0 ret @@ -178,6 +301,7 @@ ENTRY(__kernel_clock_getres) cmp w0, #CLOCK_REALTIME ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f ldr x2, 5f @@ -204,46 +328,3 @@ ENTRY(__kernel_clock_getres) .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) - -/* - * Read the current time from the architected counter. - * Expects vdso_data to be initialised. - * Clobbers the temporary registers (x9 - x15). - * Returns: - * - w9 = vDSO sequence counter - * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec) - * - w12 = cs_shift - */ -ENTRY(__do_get_tspec) - .cfi_startproc - - /* Read from the vDSO data page. */ - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] - seqcnt_read w9 - - /* Read the virtual counter. */ - isb - mrs x15, cntvct_el0 - - /* Calculate cycle delta and convert to ns. */ - sub x10, x15, x10 - /* We can only guarantee 56 bits of precision. */ - movn x15, #0xff00, lsl #48 - and x10, x15, x10 - mul x10, x10, x11 - - /* Use the kernel time to calculate the new timespec. */ - mov x11, #NSEC_PER_SEC_LO16 - movk x11, #NSEC_PER_SEC_HI16, lsl #16 - lsl x11, x11, x12 - add x15, x10, x14 - udiv x14, x15, x11 - add x10, x13, x14 - mul x13, x14, x11 - sub x11, x15, x13 - - ret - .cfi_endproc -ENDPROC(__do_get_tspec) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index 60c1db54b41a..82379a70ef03 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -21,9 +21,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .rodata .balign PAGE_SIZE vdso_start: .incbin "arch/arm64/kernel/vdso/vdso.so" diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 2f600294e8ca..724ed30e4992 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -5,9 +5,11 @@ */ #include <asm-generic/vmlinux.lds.h> +#include <asm/kernel-pgtable.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#include <asm/pgtable.h> #include "image.h" @@ -32,6 +34,30 @@ jiffies = jiffies_64; *(.hyp.text) \ VMLINUX_SYMBOL(__hyp_text_end) = .; +/* + * The size of the PE/COFF section that covers the kernel image, which + * runs from stext to _edata, must be a round multiple of the PE/COFF + * FileAlignment, which we set to its minimum value of 0x200. 'stext' + * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned + * boundary should be sufficient. + */ +PECOFF_FILE_ALIGNMENT = 0x200; + +#ifdef CONFIG_EFI +#define PECOFF_EDATA_PADDING \ + .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } +#else +#define PECOFF_EDATA_PADDING +#endif + +#ifdef CONFIG_DEBUG_ALIGN_RODATA +#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); +#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +#else +#define ALIGN_DEBUG_RO +#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); +#endif + SECTIONS { /* @@ -54,6 +80,7 @@ SECTIONS _text = .; HEAD_TEXT } + ALIGN_DEBUG_RO .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -70,19 +97,23 @@ SECTIONS *(.got) /* Global offset table */ } + ALIGN_DEBUG_RO + _etext = .; /* End of text section */ + RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES - _etext = .; /* End of text and rodata section */ + ALIGN_DEBUG_RO - . = ALIGN(PAGE_SIZE); + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) __init_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } - . = ALIGN(16); + + ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) @@ -114,6 +145,7 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + PECOFF_EDATA_PADDING _edata = .; BSS_SECTION(0, 0, 0) @@ -124,6 +156,11 @@ SECTIONS swapper_pg_dir = .; . += SWAPPER_DIR_SIZE; +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + _end = .; STABS_DEBUG diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 0b4326578985..6dbc6ace341f 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -52,7 +52,7 @@ static bool cpu_has_32bit_el1(void) { u64 pfr0; - pfr0 = read_cpuid(ID_AA64PFR0_EL1); + pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); return !!(pfr0 & 0x20); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3d7c2df89946..f4001cb14488 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -69,68 +69,31 @@ static u32 get_ccsidr(u32 csselr) return ccsidr; } -static void do_dc_cisw(u32 val) -{ - asm volatile("dc cisw, %x0" : : "r" (val)); - dsb(ish); -} - -static void do_dc_csw(u32 val) -{ - asm volatile("dc csw, %x0" : : "r" (val)); - dsb(ish); -} - -/* See note at ARM ARM B1.14.4 */ +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct sys_reg_params *p, const struct sys_reg_desc *r) { - unsigned long val; - int cpu; - if (!p->is_write) return read_from_write_only(vcpu, p); - cpu = get_cpu(); - - cpumask_setall(&vcpu->arch.require_dcache_flush); - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); - - /* If we were already preempted, take the long way around */ - if (cpu != vcpu->arch.last_pcpu) { - flush_cache_all(); - goto done; - } - - val = *vcpu_reg(vcpu, p->Rt); - - switch (p->CRm) { - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ - case 14: /* DCCISW */ - do_dc_cisw(val); - break; - - case 10: /* DCCSW */ - do_dc_csw(val); - break; - } - -done: - put_cpu(); - + kvm_set_way_flush(vcpu); return true; } /* * Generic accessor for VM registers. Only called as long as HCR_TVM - * is set. + * is set. If the guest enables the MMU, we stop trapping the VM + * sys_regs and leave it in complete control of the caches. */ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *p, const struct sys_reg_desc *r) { unsigned long val; + bool was_enabled = vcpu_has_cache_enabled(vcpu); BUG_ON(!p->is_write); @@ -143,25 +106,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; } - return true; -} - -/* - * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the - * guest enables the MMU, we stop trapping the VM sys_regs and leave - * it in complete control of the caches. - */ -static bool access_sctlr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - access_vm_reg(vcpu, p, r); - - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ - vcpu->arch.hcr_el2 &= ~HCR_TVM; - stage2_flush_vm(vcpu->kvm); - } - + kvm_toggle_cache(vcpu, was_enabled); return true; } @@ -377,7 +322,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_mpidr, MPIDR_EL1 }, /* SCTLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, + access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, /* CPACR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), NULL, reset_val, CPACR_EL1, 0 }, @@ -509,13 +454,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_cpuid(ID_AA64DFR0_EL1); - u64 pfr = read_cpuid(ID_AA64PFR0_EL1); - u32 el3 = !!((pfr >> 12) & 0xf); + u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); + u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT); - *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | - (((dfr >> 12) & 0xf) << 24) | - (((dfr >> 28) & 0xf) << 20) | + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | + (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | + (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) | (6 << 16) | (el3 << 14) | (el3 << 12)); return true; } @@ -657,7 +602,7 @@ static const struct sys_reg_desc cp14_64_regs[] = { * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a9723c71c52b..d7150e30438a 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,10 +17,7 @@ */ #include <linux/linkage.h> -#include <asm/alternative.h> -#include <asm/assembler.h> -#include <asm/cpufeature.h> -#include <asm/sysreg.h> +#include <asm/uaccess.h> .text @@ -33,29 +30,27 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x2, x3 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f 1: -USER(9f, str xzr, [x0], #8 ) +uao_user_alternative 9f, str, sttr, xzr, x0, 8 subs x1, x1, #8 b.pl 1b 2: adds x1, x1, #4 b.mi 3f -USER(9f, str wzr, [x0], #4 ) +uao_user_alternative 9f, str, sttr, wzr, x0, 4 sub x1, x1, #4 3: adds x1, x1, #2 b.mi 4f -USER(9f, strh wzr, [x0], #2 ) +uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f -USER(9f, strb wzr, [x0] ) +uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x2 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 1be9ef27be97..90154f3f7f2a 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -16,10 +16,8 @@ #include <linux/linkage.h> -#include <asm/alternative.h> -#include <asm/assembler.h> -#include <asm/cpufeature.h> -#include <asm/sysreg.h> +#include <asm/cache.h> +#include <asm/uaccess.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) @@ -31,49 +29,56 @@ * Returns: * x0 - bytes not copied */ -ENTRY(__copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) - add x5, x1, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: -USER(9f, ldp x3, x4, [x1], #16) - subs x2, x2, #16 - stp x3, x4, [x0], #16 - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f -USER(9f, ldr x3, [x1], #8 ) - sub x2, x2, #8 - str x3, [x0], #8 -2: adds x2, x2, #4 - b.mi 3f -USER(9f, ldr w3, [x1], #4 ) - sub x2, x2, #4 - str w3, [x0], #4 -3: adds x2, x2, #2 - b.mi 4f -USER(9f, ldrh w3, [x1], #2 ) - sub x2, x2, #2 - strh w3, [x0], #2 -4: adds x2, x2, #1 - b.mi 5f -USER(9f, ldrb w3, [x1] ) - strb w3, [x0] -5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) + + .macro ldrb1 ptr, regB, val + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val + .endm + + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm + + .macro ldrh1 ptr, regB, val + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val + .endm + + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm + + .macro ldr1 ptr, regB, val + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val + .endm + + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm + + .macro ldp1 ptr, regB, regC, val + uao_ldp 9998f, \ptr, \regB, \regC, \val + .endm + + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm + +end .req x5 +ENTRY(__arch_copy_from_user) + uaccess_enable_not_uao x3, x4 + add end, x0, x2 +#include "copy_template.S" + uaccess_disable_not_uao x3 + mov x0, #0 // Nothing to copy ret -ENDPROC(__copy_from_user) +ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 -9: sub x2, x5, x1 - mov x3, x2 -10: strb wzr, [x0], #1 // zero remaining buffer space - subs x3, x3, #1 - b.ne 10b - mov x0, x2 // bytes not copied +9998: + sub x0, end, dst +9999: + strb wzr, [dst], #1 // zero remaining buffer space + cmp dst, end + b.lo 9999b ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 1b94661e22b3..718b1c4e2f85 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -18,10 +18,8 @@ #include <linux/linkage.h> -#include <asm/alternative.h> -#include <asm/assembler.h> -#include <asm/cpufeature.h> -#include <asm/sysreg.h> +#include <asm/cache.h> +#include <asm/uaccess.h> /* * Copy from user space to user space (alignment handled by the hardware) @@ -33,44 +31,50 @@ * Returns: * x0 - bytes not copied */ + .macro ldrb1 ptr, regB, val + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val + .endm + + .macro strb1 ptr, regB, val + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val + .endm + + .macro ldrh1 ptr, regB, val + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val + .endm + + .macro strh1 ptr, regB, val + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val + .endm + + .macro ldr1 ptr, regB, val + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val + .endm + + .macro str1 ptr, regB, val + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val + .endm + + .macro ldp1 ptr, regB, regC, val + uao_ldp 9998f, \ptr, \regB, \regC, \val + .endm + + .macro stp1 ptr, regB, regC, val + uao_stp 9998f, \ptr, \regB, \regC, \val + .endm + +end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) - add x5, x0, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: -USER(9f, ldp x3, x4, [x1], #16) - subs x2, x2, #16 -USER(9f, stp x3, x4, [x0], #16) - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f -USER(9f, ldr x3, [x1], #8 ) - sub x2, x2, #8 -USER(9f, str x3, [x0], #8 ) -2: adds x2, x2, #4 - b.mi 3f -USER(9f, ldr w3, [x1], #4 ) - sub x2, x2, #4 -USER(9f, str w3, [x0], #4 ) -3: adds x2, x2, #2 - b.mi 4f -USER(9f, ldrh w3, [x1], #2 ) - sub x2, x2, #2 -USER(9f, strh w3, [x0], #2 ) -4: adds x2, x2, #1 - b.mi 5f -USER(9f, ldrb w3, [x1] ) -USER(9f, strb w3, [x0] ) -5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 + add end, x0, x2 +#include "copy_template.S" + uaccess_disable_not_uao x3 + mov x0, #0 ret ENDPROC(__copy_in_user) .section .fixup,"ax" .align 2 -9: sub x0, x5, x0 // bytes not copied +9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index a257b47e2dc4..e99e31c9acac 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -16,10 +16,8 @@ #include <linux/linkage.h> -#include <asm/alternative.h> -#include <asm/assembler.h> -#include <asm/cpufeature.h> -#include <asm/sysreg.h> +#include <asm/cache.h> +#include <asm/uaccess.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) @@ -31,44 +29,50 @@ * Returns: * x0 - bytes not copied */ -ENTRY(__copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) - add x5, x0, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: - ldp x3, x4, [x1], #16 - subs x2, x2, #16 -USER(9f, stp x3, x4, [x0], #16) - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f - ldr x3, [x1], #8 - sub x2, x2, #8 -USER(9f, str x3, [x0], #8 ) -2: adds x2, x2, #4 - b.mi 3f - ldr w3, [x1], #4 - sub x2, x2, #4 -USER(9f, str w3, [x0], #4 ) -3: adds x2, x2, #2 - b.mi 4f - ldrh w3, [x1], #2 - sub x2, x2, #2 -USER(9f, strh w3, [x0], #2 ) -4: adds x2, x2, #1 - b.mi 5f - ldrb w3, [x1] -USER(9f, strb w3, [x0] ) -5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm + + .macro strb1 ptr, regB, val + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val + .endm + + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm + + .macro strh1 ptr, regB, val + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val + .endm + + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm + + .macro str1 ptr, regB, val + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val + .endm + + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm + + .macro stp1 ptr, regB, regC, val + uao_stp 9998f, \ptr, \regB, \regC, \val + .endm + +end .req x5 +ENTRY(__arch_copy_to_user) + uaccess_enable_not_uao x3, x4 + add end, x0, x2 +#include "copy_template.S" + uaccess_disable_not_uao x3 + mov x0, #0 ret -ENDPROC(__copy_to_user) +ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 -9: sub x0, x5, x0 // bytes not copied +9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 00bc265f87e9..57f57fde5722 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -3,6 +3,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM64_PTDUMP) += dump.o obj-$(CONFIG_KASAN) += kasan_init.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 321a6ac84a94..1c10c80ff2d1 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -22,81 +22,7 @@ #include <asm/assembler.h> #include <asm/cpufeature.h> #include <asm/alternative.h> - -#include "proc-macros.S" - -/* - * __flush_dcache_all() - * - * Flush the whole D-cache. - * - * Corrupted registers: x0-x7, x9-x11 - */ -__flush_dcache_all: - dmb sy // ensure ordering with previous memory accesses - mrs x0, clidr_el1 // read clidr - and x3, x0, #0x7000000 // extract loc from clidr - lsr x3, x3, #23 // left align loc bit field - cbz x3, finished // if loc is 0, then no need to clean - mov x10, #0 // start clean at cache level 0 -loop1: - add x2, x10, x10, lsr #1 // work out 3x current cache level - lsr x1, x0, x2 // extract cache type bits from clidr - and x1, x1, #7 // mask of the bits for current cache only - cmp x1, #2 // see what cache we have at this level - b.lt skip // skip if no cache, or just i-cache - save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic - msr csselr_el1, x10 // select current cache level in csselr - isb // isb to sych the new cssr&csidr - mrs x1, ccsidr_el1 // read the new ccsidr - restore_irqs x9 - and x2, x1, #7 // extract the length of the cache lines - add x2, x2, #4 // add 4 (line length offset) - mov x4, #0x3ff - and x4, x4, x1, lsr #3 // find maximum number on the way size - clz w5, w4 // find bit position of way size increment - mov x7, #0x7fff - and x7, x7, x1, lsr #13 // extract max number of the index size -loop2: - mov x9, x4 // create working copy of max way size -loop3: - lsl x6, x9, x5 - orr x11, x10, x6 // factor way and cache number into x11 - lsl x6, x7, x2 - orr x11, x11, x6 // factor index number into x11 - dc cisw, x11 // clean & invalidate by set/way - subs x9, x9, #1 // decrement the way - b.ge loop3 - subs x7, x7, #1 // decrement the index - b.ge loop2 -skip: - add x10, x10, #2 // increment cache number - cmp x3, x10 - b.gt loop1 -finished: - mov x10, #0 // swith back to cache level 0 - msr csselr_el1, x10 // select current cache level in csselr - dsb sy - isb - ret -ENDPROC(__flush_dcache_all) - -/* - * flush_cache_all() - * - * Flush the entire cache system. The data cache flush is now achieved - * using atomic clean / invalidates working outwards from L1 cache. This - * is done using Set/Way based cache maintainance instructions. The - * instruction cache can still be invalidated back to the point of - * unification in a single instruction. - */ -ENTRY(flush_cache_all) - mov x12, lr - bl __flush_dcache_all - mov x0, #0 - ic ialluis // I+BTB cache invalidate - ret x12 -ENDPROC(flush_cache_all) +#include <asm/uaccess.h> /* * flush_icache_range(start,end) @@ -122,6 +48,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) + uaccess_ttbr0_enable x2, x3 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 @@ -143,6 +70,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU 9: // ignore any faulting cache operation dsb ish isb + uaccess_ttbr0_disable x1 ret ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) @@ -212,7 +140,12 @@ __dma_clean_range: dcache_line_size x2, x3 sub x3, x2, #1 bic x0, x0, x3 -1: alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE +1: +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc cvac, x0 +alternative_else + dc civac, x0 +alternative_endif add x0, x0, x2 cmp x0, x1 b.lo 1b diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 76c1e6cd36fc..a1877045141a 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,151 +17,190 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/init.h> +#include <linux/bitops.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/percpu.h> +#include <asm/cpufeature.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include <asm/cachetype.h> - -#define asid_bits(reg) \ - (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) - -#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) +static u32 asid_bits; static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; -/* - * We fork()ed a process, and we need a new context for the child to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); -} +static atomic64_t asid_generation; +static unsigned long *asid_map; -static void flush_context(void) -{ - /* set the reserved TTBR0 before flushing the TLB */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - if (icache_is_aivivt()) - __flush_icache_all(); -} +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; -#ifdef CONFIG_SMP +#define ASID_MASK (~GENMASK(asid_bits - 1, 0)) +#define ASID_FIRST_VERSION (1UL << asid_bits) +#define NUM_USER_ASIDS ASID_FIRST_VERSION -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +static void flush_context(unsigned int cpu) { - unsigned long flags; + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); /* - * Locking needed for multi-threaded applications where the same - * mm->context.id could be set from different CPUs during the - * broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. + * Ensure the generation bump is observed before we xchg the + * active_asids. */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + smp_wmb(); + + for_each_possible_cpu(i) { + asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* - * Old version of ASID found. Set the new one and reset - * mm_cpumask(mm). + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. */ - mm->context.id = asid; - cpumask_clear(mm_cpumask(mm)); + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + per_cpu(reserved_asids, i) = asid; } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); - /* - * Set the mm_cpumask(mm) bit for the current CPU. - */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_aivivt()) + __flush_icache_all(); } -/* - * Reset the ASID on the current CPU. This function call is broadcast from the - * CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +static int is_reserved_asid(u64 asid) { - unsigned int asid; - unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} + +static u64 new_context(struct mm_struct *mm, unsigned int cpu) +{ + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); + + if (asid != 0) { + /* + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (is_reserved_asid(asid)) + return generation | (asid & ~ASID_MASK); + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible. + */ + asid &= ~ASID_MASK; + if (!__test_and_set_bit(asid, asid_map)) + goto bump_gen; + } /* - * current->active_mm could be init_mm for the idle thread immediately - * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to - * the reserved value, so no need to reset any context. + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. + * We always count from ASID #1, as we use ASID #0 when setting a + * reserved TTBR0 for the init_mm. */ - if (mm == &init_mm) - return; + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid != NUM_USER_ASIDS) + goto set_asid; - smp_rmb(); - asid = cpu_last_asid + cpu; + /* We're out of ASIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); - flush_context(); - set_mm_context(mm, asid); + /* We have at least 1 ASID per CPU, so this will always succeed */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); - /* set the new ASID */ - cpu_switch_mm(mm->pgd, mm); -} +set_asid: + __set_bit(asid, asid_map); + cur_idx = asid; -#else - -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ - mm->context.id = asid; - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); +bump_gen: + asid |= generation; + return asid; } -#endif - -void __new_context(struct mm_struct *mm) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int bits = asid_bits(); + unsigned long flags; + u64 asid; + + asid = atomic64_read(&mm->context.id); - raw_spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP /* - * Check the ASID again, in case the change was broadcast from another - * CPU before we acquired the lock. + * The memory ordering here is subtle. We rely on the control + * dependency between the generation read and the update of + * active_asids to ensure that we are synchronised with a + * parallel rollover (i.e. this pairs with the smp_wmb() in + * flush_context). */ - if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; + if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) + && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); } -#endif - /* - * At this point, it is guaranteed that the current mm (with an old - * ASID) isn't active on any other CPU since the ASIDs are changed - * simultaneously via IPI. - */ - asid = ++cpu_last_asid; + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + + atomic64_set(&per_cpu(active_asids, cpu), asid); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + +switch_mm_fastpath: /* - * If we've used up all our ASIDs, we need to start a new version and - * flush the TLB. + * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when + * emulating PAN. */ - if (unlikely((asid & ((1 << bits) - 1)) == 0)) { - /* increment the ASID version */ - cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits); - if (cpu_last_asid == 0) - cpu_last_asid = ASID_FIRST_VERSION; - asid = cpu_last_asid + smp_processor_id(); - flush_context(); -#ifdef CONFIG_SMP - smp_wmb(); - smp_call_function(reset_context, NULL, 1); -#endif - cpu_last_asid += NR_CPUS - 1; + if (!system_uses_ttbr0_pan()) + cpu_switch_mm(mm->pgd, mm); +} + +static int asids_init(void) +{ + int fld = cpuid_feature_extract_field(read_cpuid(SYS_ID_AA64MMFR0_EL1), 4); + + switch (fld) { + default: + pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld); + /* Fallthrough */ + case 0: + asid_bits = 8; + break; + case 2: + asid_bits = 16; } - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); + /* If we end up with more CPUs than ASIDs, expect things to crash */ + WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + atomic64_set(&asid_generation, ASID_FIRST_VERSION); + asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), + GFP_KERNEL); + if (!asid_map) + panic("Failed to allocate bitmap for %lu ASIDs\n", + NUM_USER_ASIDS); + + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); + return 0; } +early_initcall(asids_init); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6efbb52cb92e..97f741cd34e7 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -42,7 +42,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, static struct gen_pool *atomic_pool; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { @@ -170,7 +170,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false), - NULL); + __builtin_return_address(0)); if (!coherent_ptr) goto no_map; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c new file mode 100644 index 000000000000..bf69601be546 --- /dev/null +++ b/arch/arm64/mm/dump.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 and arm implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> + +#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + VMALLOC_START_NR = 0, + VMALLOC_END_NR, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + VMEMMAP_START_NR, + VMEMMAP_END_NR, +#endif + PCI_START_NR, + PCI_END_NR, + FIXADDR_START_NR, + FIXADDR_END_NR, + MODULES_START_NR, + MODUELS_END_NR, + KERNEL_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + { VMALLOC_START, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + { 0, "vmemmap start" }, + { 0, "vmemmap end" }, +#endif + { (unsigned long) PCI_IOBASE, "PCI I/O start" }, + { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { -1, NULL }, +}; + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = PTE_USER, + .val = PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = PTE_RDONLY, + .val = PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = PTE_PXN, + .val = PTE_PXN, + .set = "NX", + .clear = "x ", + }, { + .mask = PTE_SHARED, + .val = PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = PTE_AF, + .val = PTE_AF, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_NG, + .val = PTE_NG, + .set = "NG", + .clear = " ", + }, { + .mask = PTE_UXN, + .val = PTE_UXN, + .set = "UXN", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), + .set = "DEVICE/nGnRnE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), + .set = "DEVICE/nGnRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_GRE), + .set = "DEVICE/GRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_NC), + .set = "MEM/NORMAL-NC", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL), + .set = "MEM/NORMAL", + } +}; + +struct pg_level { + const struct prot_bits *bits; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pud */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pmd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pte */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, + size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + seq_printf(st->seq, " %s", s); + } +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, + u64 val) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (addr < LOWEST_ADDR) + return; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + seq_printf(st->seq, "0x%16lx-0x%16lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + seq_puts(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd)) + note_page(st, addr, 3, pmd_val(*pmd)); + else + walk_pte(st, pmd, addr); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud)) + note_page(st, addr, 2, pud_val(*pud)); + else + walk_pmd(st, pud, addr); + } +} + +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +{ + pgd_t *pgd = pgd_offset(mm, 0); + unsigned i; + unsigned long addr; + + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = start + i * PGDIR_SIZE; + if (pgd_none(*pgd) || pgd_bad(*pgd)) + note_page(st, addr, 1, pgd_val(*pgd)); + else + walk_pud(st, pgd, addr); + } +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .marker = address_markers, + }; + + walk_pgd(&st, &init_mm, LOWEST_ADDR); + + note_page(&st, 0, 0, 0); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *pe; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; + + address_markers[VMEMMAP_START_NR].start_address = + (unsigned long)virt_to_page(PAGE_OFFSET); + address_markers[VMEMMAP_END_NR].start_address = + (unsigned long)virt_to_page(high_memory); + + pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return pe ? 0 : -ENOMEM; +} +device_initcall(ptdump_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 7788e084fddb..3ea32524f199 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -81,6 +81,11 @@ void show_pte(struct mm_struct *mm, unsigned long addr) printk("\n"); } +static bool is_el1_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; +} + /* * The kernel tried to access some page that wasn't present. */ @@ -89,8 +94,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, { /* * Are we prepared to handle this kernel fault? + * We are almost certainly not prepared to handle instruction faults. */ - if (fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; /* @@ -153,8 +159,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -193,6 +197,26 @@ out: return fault; } +static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs) +{ + unsigned int ec = ESR_ELx_EC(esr); + unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + + if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + return false; + + if (system_uses_ttbr0_pan()) + return fsc_type == ESR_ELx_FSC_FAULT && + (regs->pstate & PSR_PAN_BIT); + else + return fsc_type == ESR_ELx_FSC_PERM; +} + +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -219,19 +243,24 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_EL1_WRITE) && !(esr & ESR_EL1_CM)) { + } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - /* - * PAN bit set implies the fault happened in kernel space, but not - * in the arch's user access functions. - */ - if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT)) - goto no_context; + if (addr < USER_DS && is_permission_fault(esr, regs)) { + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) + die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + + if (is_el1_instruction_abort(esr)) + die("Attempting to execute userspace memory", regs, esr); + + if (!search_exception_tables(regs->pc)) + panic("Accessing user space memory outside uaccess.h routines"); + } /* * As per x86, we may deadlock here. However, since the kernel only @@ -383,7 +412,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; @@ -545,8 +574,21 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, } #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void) +void cpu_enable_pan(void *__unused) { config_sctlr_el1(SCTLR_EL1_SPAN, 0); } #endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_UAO +/* + * Kernel threads have fs=KERNEL_DS by default, and don't need to call + * set_fs(), devtmpfs in particular relies on this behaviour. + * We need to enable the feature at runtime (instead of adding it to + * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. + */ +void cpu_enable_uao(void *__unused) +{ + asm(SET_PSTATE_UAO(1)); +} +#endif /* CONFIG_ARM64_UAO */ diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index bfb8eb168f2d..a90615baa529 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -60,14 +60,10 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) { -#ifdef CONFIG_SMP preempt_disable(); -#endif memcpy(dst, src, len); flush_ptrace_access(vma, page, uaddr, dst, len); -#ifdef CONFIG_SMP preempt_enable(); -#endif } void __sync_icache_dcache(pte_t pte, unsigned long addr) @@ -98,7 +94,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ -EXPORT_SYMBOL(flush_cache_all); EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index c49a0a8152cf..c7c9f2b289fb 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -68,7 +68,7 @@ early_param("initrd", early_initrd); * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -114,19 +114,21 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) } #ifdef CONFIG_HAVE_ARCH_PFN_VALID +#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1) + int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif #ifndef CONFIG_SPARSEMEM -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { } #else -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { struct memblock_region *reg; @@ -350,6 +352,7 @@ void __init mem_init(void) void free_initmem(void) { + fixup_init(); free_initmem_default(0); free_alternatives_memory(); } diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 4a07630a6616..cbb99c8f1e04 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - -static inline pud_t * __init early_ioremap_pud(unsigned long addr) -{ - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - - return pud_offset(pgd, addr); -} - -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) -{ - pud_t *pud = early_ioremap_pud(addr); - - BUG_ON(pud_none(*pud) || pud_bad(*pud)); - - return pmd_offset(pud, addr); -} - -static inline pte_t * __init early_ioremap_pte(unsigned long addr) -{ - pmd_t *pmd = early_ioremap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); -} - +/* + * Must be called after early_fixmap_init + */ void __init early_ioremap_init(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - - pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmd %p != %p\n", - pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", - FIX_BTMAP_BEGIN); - } - early_ioremap_setup(); } - -void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *pte; - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - - pte = early_ioremap_pte(addr); - - if (pgprot_val(flags)) - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); - else { - pte_clear(&init_mm, addr, pte); - flush_tlb_kernel_range(addr, addr+PAGE_SIZE); - } -} diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index d519f4f50c8c..ef47d99b5cbc 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,2 +1,3 @@ extern void __init bootmem_init(void); -extern void __init arm64_swiotlb_init(void); + +void fixup_init(void); diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 54922d1275b8..c9fbfabb8cb0 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -51,9 +51,14 @@ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; - if (current->flags & PF_RANDOMIZE) - rnd = (long)get_random_int() & STACK_RND_MASK; - + if (current->flags & PF_RANDOMIZE) { +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_32BIT)) + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); + else +#endif + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); + } return rnd << PAGE_SHIFT; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4f8b500f74c..6147b780f805 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -26,8 +26,11 @@ #include <linux/memblock.h> #include <linux/fs.h> #include <linux/io.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> #include <asm/cputype.h> +#include <asm/fixmap.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -37,87 +40,15 @@ #include "mm.h" +u64 idmap_t0sz = TCR_T0SZ(VA_BITS); + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -struct page *empty_zero_page; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -struct cachepolicy { - const char policy[16]; - u64 mair; - u64 tcr; -}; - -static struct cachepolicy cache_policies[] __initdata = { - { - .policy = "uncached", - .mair = 0x44, /* inner, outer non-cacheable */ - .tcr = TCR_IRGN_NC | TCR_ORGN_NC, - }, { - .policy = "writethrough", - .mair = 0xaa, /* inner, outer write-through, read-allocate */ - .tcr = TCR_IRGN_WT | TCR_ORGN_WT, - }, { - .policy = "writeback", - .mair = 0xee, /* inner, outer write-back, read-allocate */ - .tcr = TCR_IRGN_WBnWA | TCR_ORGN_WBnWA, - } -}; - -/* - * These are useful for identifying cache coherency problems by allowing the - * cache or the cache and writebuffer to be turned off. It changes the Normal - * memory caching attributes in the MAIR_EL1 register. - */ -static int __init early_cachepolicy(char *p) -{ - int i; - u64 tmp; - - for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { - int len = strlen(cache_policies[i].policy); - - if (memcmp(p, cache_policies[i].policy, len) == 0) - break; - } - if (i == ARRAY_SIZE(cache_policies)) { - pr_err("ERROR: unknown or unsupported cache policy: %s\n", p); - return 0; - } - - flush_cache_all(); - - /* - * Modify MT_NORMAL attributes in MAIR_EL1. - */ - asm volatile( - " mrs %0, mair_el1\n" - " bfi %0, %1, %2, #8\n" - " msr mair_el1, %0\n" - " isb\n" - : "=&r" (tmp) - : "r" (cache_policies[i].mair), "i" (MT_NORMAL * 8)); - - /* - * Modify TCR PTW cacheability attributes. - */ - asm volatile( - " mrs %0, tcr_el1\n" - " bic %0, %0, %2\n" - " orr %0, %0, %1\n" - " msr tcr_el1, %0\n" - " isb\n" - : "=&r" (tmp) - : "r" (cache_policies[i].tcr), "r" (TCR_IRGN_MASK | TCR_ORGN_MASK)); - - flush_cache_all(); - - return 0; -} -early_param("cachepolicy", early_cachepolicy); - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -132,21 +63,43 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); memset(ptr, 0, sz); return ptr; } -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - pgprot_t prot) + pgprot_t prot, + void *(*alloc)(unsigned long size)) { pte_t *pte; - if (pmd_none(*pmd)) { - pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); } - BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -155,29 +108,40 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; - pmdval_t prot_sect; - pgprot_t prot_pte; - - if (map_io) { - prot_sect = PROT_SECT_DEVICE_nGnRE; - prot_pte = __pgprot(PROT_DEVICE_nGnRE); - } else { - prot_sect = PROT_SECT_NORMAL_EXEC; - prot_pte = PAGE_KERNEL_EXEC; - } /* * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_bad(*pud)) { - pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); - pud_populate(&init_mm, pud, pmd); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } + pud_populate(mm, pud, pmd); + flush_tlb_all(); } pmd = pmd_offset(pud, addr); @@ -186,31 +150,51 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect)); + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. */ - if (!pmd_none(old_pmd)) + if (!pmd_none(old_pmd)) { flush_tlb_all(); + if (pmd_table(old_pmd)) { + phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); + } + } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot_pte); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(&init_mm, pgd, pud); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -221,10 +205,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, /* * For 4K granule only, attempt to put down a 1GB block */ - if (!map_io && (PAGE_SHIFT == 12) && - ((addr | next | phys) & ~PUD_MASK) == 0) { + if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC)); + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will @@ -234,12 +218,15 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Look up the old pmd table and free it. */ if (!pud_none(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); - memblock_free(table, PAGE_SIZE); flush_tlb_all(); + if (pud_table(old_pud)) { + phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); + } } } else { - alloc_init_pmd(pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -249,9 +236,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - int map_io) +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; @@ -261,31 +249,94 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, prot, early_alloc); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + late_alloc); } -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { - if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { - pr_warn("BUG: not creating id mapping for %pa\n", &addr); + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); return; } - __create_mapping(&idmap_pg_dir[pgd_index(addr)], - addr, addr, size, map_io); + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); +} + +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); } +#endif static void __init map_mem(void) { @@ -331,53 +382,70 @@ static void __init map_mem(void) memblock_set_current_limit(limit); } #endif - - create_mapping(start, __phys_to_virt(start), end - start); + __map_memblock(start, end); } /* Limit no longer required. */ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)__init_begin - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. */ void __init paging_init(void) { - void *zero_page; - map_mem(); - - /* - * Finally flush the caches and tlb to ensure that we're in a - * consistent state. - */ - flush_cache_all(); - flush_tlb_all(); - - /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); + fixup_executable(); bootmem_init(); - empty_zero_page = virt_to_page(zero_page); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); -} - -/* - * Enable the identity mapping to allow the MMU disabling. - */ -void setup_mm_for_reboot(void) -{ - cpu_switch_mm(idmap_pg_dir, &init_mm); - flush_tlb_all(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); } /* @@ -463,3 +531,96 @@ void vmemmap_free(unsigned long start, unsigned long end) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + +static inline pud_t * fixmap_pud(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + return pud_offset(pgd, addr); +} + +static inline pmd_t * fixmap_pmd(unsigned long addr) +{ + pud_t *pud = fixmap_pud(addr); + + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * fixmap_pte(unsigned long addr) +{ + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_fixmap_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr = FIXADDR_START; + + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not preparted: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p, %p\n", + pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S deleted file mode 100644 index 005d29e2977d..000000000000 --- a/arch/arm64/mm/proc-macros.S +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Based on arch/arm/mm/proc-macros.S - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> - -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm - -/* - * dcache_line_size - get the minimum D-cache line size from the CTR register. - */ - .macro dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm \tmp, \tmp, #16, #19 // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * icache_line_size - get the minimum I-cache line size from the CTR register. - */ - .macro icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 302353d9150b..ce2c71a52d05 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,22 +23,15 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hwcap.h> -#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include "proc-macros.S" - #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K #else #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K #endif -#ifdef CONFIG_SMP #define TCR_SMP_FLAGS TCR_SHARED -#else -#define TCR_SMP_FLAGS 0 -#endif /* PTWs cacheable, inner/outer WBWA */ #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA @@ -46,52 +39,6 @@ #define MAIR(attr, mt) ((attr) << ((mt) * 8)) /* - * cpu_cache_off() - * - * Turn the CPU D-cache off. - */ -ENTRY(cpu_cache_off) - mrs x0, sctlr_el1 - bic x0, x0, #1 << 2 // clear SCTLR.C - msr sctlr_el1, x0 - isb - ret -ENDPROC(cpu_cache_off) - -/* - * cpu_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the same state - * as it would be if it had been reset, and branch to what would be the - * reset vector. It must be executed with the flat identity mapping. - * - * - loc - location to jump to for soft reset - */ - .align 5 -ENTRY(cpu_reset) - mrs x1, sctlr_el1 - bic x1, x1, #1 - msr sctlr_el1, x1 // disable the MMU - isb - ret x0 -ENDPROC(cpu_reset) - -ENTRY(cpu_soft_restart) - /* Save address of cpu_reset() and reset address */ - mov x19, x0 - mov x20, x1 - - /* Turn D-cache off */ - bl cpu_cache_off - - /* Push out all dirty data, and ensure cache is empty */ - bl flush_cache_all - - mov x0, x20 - ret x19 -ENDPROC(cpu_soft_restart) - -/* * cpu_do_idle() * * Idle the processor (wait for interrupt). @@ -156,6 +103,7 @@ ENTRY(cpu_do_resume) msr cpacr_el1, x6 msr ttbr0_el1, x1 msr ttbr1_el1, x7 + tcr_set_idmap_t0sz x8, x7 msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 @@ -179,7 +127,7 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - mmid w1, x1 // get mm->context.id + mmid x1, x1 // get mm->context.id bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb @@ -195,13 +143,13 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - ic iallu // I+BTB cache invalidate - tlbi vmalle1is // invalidate I + D TLBs - dsb ish + tlbi vmalle1 // Invalidate local TLB + dsb nsh mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD - msr mdscr_el1, xzr // Reset mdscr_el1 + mov x0, #1 << 12 // Reset mdscr_el1 and disable + msr mdscr_el1, x0 // access to the DCC from EL0 isb // Unmask debug exceptions now, enable_dbg // since this is per-cpu /* @@ -235,6 +183,8 @@ ENTRY(__cpu_setup) */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + tcr_set_idmap_t0sz x10, x9 + /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in * TCR_EL1. diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 8bbe9401f4f0..b96db5dafec4 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,6 +49,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/uaccess.h> #include <xen/interface/xen.h> @@ -89,6 +90,20 @@ ENTRY(privcmd_call) mov x2, x3 mov x3, x4 mov x4, x5 + /* + * Privcmd calls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_ttbr0_enable x6, x7 hvc XEN_IMM + + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_ttbr0_disable x6 ret ENDPROC(privcmd_call); |