diff options
Diffstat (limited to 'arch')
80 files changed, 498 insertions, 532 deletions
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index f03a091cd076..dfcc0dd637e5 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -116,7 +116,7 @@ ranges = <0 0x2000 0x2000>; scm_conf: scm_conf@0 { - compatible = "syscon"; + compatible = "syscon", "simple-bus"; reg = <0x0 0x1400>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index b6478e97d6a7..e6540b5cfa4c 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -286,8 +286,8 @@ can1: can@53fe4000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe4000 0x1000>; - clocks = <&clks 33>; - clock-names = "ipg"; + clocks = <&clks 33>, <&clks 33>; + clock-names = "ipg", "per"; interrupts = <43>; status = "disabled"; }; @@ -295,8 +295,8 @@ can2: can@53fe8000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe8000 0x1000>; - clocks = <&clks 34>; - clock-names = "ipg"; + clocks = <&clks 34>, <&clks 34>; + clock-names = "ipg", "per"; interrupts = <44>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index f74a8ded515f..38c786018a09 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -153,10 +153,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_PCIE_AXI>, <&clks IMX6QDL_CLK_LVDS1_GATE>, <&clks IMX6QDL_CLK_PCIE_REF_125M>; diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 4773d6af66a0..d56d68fe7ffc 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -13,9 +13,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi index d5adee3c0067..af9b7190533a 100644 --- a/arch/arm/boot/dts/k2hk-clocks.dtsi +++ b/arch/arm/boot/dts/k2hk-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi index eb1e3e29f073..ef8464bb11ff 100644 --- a/arch/arm/boot/dts/k2l-clocks.dtsi +++ b/arch/arm/boot/dts/k2l-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi index 11a7963be003..2390f387c271 100644 --- a/arch/arm/boot/dts/omap2430.dtsi +++ b/arch/arm/boot/dts/omap2430.dtsi @@ -51,7 +51,8 @@ }; scm_conf: scm_conf@270 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x270 0x240>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index f884d6adb71e..84be9da74c7e 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -191,7 +191,8 @@ }; omap4_padconf_global: omap4_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0x170>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 7d24ae0306b5..874a26f9dc0f 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -180,7 +180,8 @@ }; omap5_padconf_global: omap5_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0xec>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile index 4c38674c73ec..54d274da7ccb 100644 --- a/arch/arm/mach-bcm/Makefile +++ b/arch/arm/mach-bcm/Makefile @@ -43,5 +43,5 @@ obj-$(CONFIG_ARCH_BCM_63XX) := bcm63xx.o ifeq ($(CONFIG_ARCH_BRCMSTB),y) CFLAGS_platsmp-brcmstb.o += -march=armv7-a obj-y += brcmstb.o -obj-$(CONFIG_SMP) += headsmp-brcmstb.o platsmp-brcmstb.o +obj-$(CONFIG_SMP) += platsmp-brcmstb.o endif diff --git a/arch/arm/mach-bcm/brcmstb.h b/arch/arm/mach-bcm/brcmstb.h deleted file mode 100644 index ec0c3d112b36..000000000000 --- a/arch/arm/mach-bcm/brcmstb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __BRCMSTB_H__ -#define __BRCMSTB_H__ - -void brcmstb_secondary_startup(void); - -#endif /* __BRCMSTB_H__ */ diff --git a/arch/arm/mach-bcm/headsmp-brcmstb.S b/arch/arm/mach-bcm/headsmp-brcmstb.S deleted file mode 100644 index 199c1ea58248..000000000000 --- a/arch/arm/mach-bcm/headsmp-brcmstb.S +++ /dev/null @@ -1,33 +0,0 @@ -/* - * SMP boot code for secondary CPUs - * Based on arch/arm/mach-tegra/headsmp.S - * - * Copyright (C) 2010 NVIDIA, Inc. - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <asm/assembler.h> -#include <linux/linkage.h> -#include <linux/init.h> - - .section ".text.head", "ax" - -ENTRY(brcmstb_secondary_startup) - /* - * Ensure CPU is in a sane state by disabling all IRQs and switching - * into SVC mode. - */ - setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r0 - - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(brcmstb_secondary_startup) diff --git a/arch/arm/mach-bcm/platsmp-brcmstb.c b/arch/arm/mach-bcm/platsmp-brcmstb.c index e209e6fc7caf..44d6bddf7a4e 100644 --- a/arch/arm/mach-bcm/platsmp-brcmstb.c +++ b/arch/arm/mach-bcm/platsmp-brcmstb.c @@ -30,8 +30,6 @@ #include <asm/mach-types.h> #include <asm/smp_plat.h> -#include "brcmstb.h" - enum { ZONE_MAN_CLKEN_MASK = BIT(0), ZONE_MAN_RESET_CNTL_MASK = BIT(1), @@ -153,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu) * Set the reset vector to point to the secondary_startup * routine */ - cpu_set_boot_addr(cpu, virt_to_phys(brcmstb_secondary_startup)); + cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup)); /* Unhalt the cpu */ cpu_rst_cfg_set(cpu, 0); diff --git a/arch/arm/mach-berlin/headsmp.S b/arch/arm/mach-berlin/headsmp.S index 4a4c56a58ad3..dc82a3486b05 100644 --- a/arch/arm/mach-berlin/headsmp.S +++ b/arch/arm/mach-berlin/headsmp.S @@ -12,12 +12,6 @@ #include <linux/init.h> #include <asm/assembler.h> -ENTRY(berlin_secondary_startup) - ARM_BE8(setend be) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(berlin_secondary_startup) - /* * If the following instruction is set in the reset exception vector, CPUs * will fetch the value of the software reset address vector when being diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c index 702e7982015a..34a3753e7356 100644 --- a/arch/arm/mach-berlin/platsmp.c +++ b/arch/arm/mach-berlin/platsmp.c @@ -22,7 +22,6 @@ #define RESET_VECT 0x00 #define SW_RESET_ADDR 0x94 -extern void berlin_secondary_startup(void); extern u32 boot_inst; static void __iomem *cpu_ctrl; @@ -85,7 +84,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus) * Write the secondary startup address into the SW reset address * vector. This is used by boot_inst. */ - writel(virt_to_phys(berlin_secondary_startup), vectors_base + SW_RESET_ADDR); + writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR); iounmap(vectors_base); unmap_scu: diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile index 6b7b3033de0b..659db1933ed3 100644 --- a/arch/arm/mach-hisi/Makefile +++ b/arch/arm/mach-hisi/Makefile @@ -6,4 +6,4 @@ CFLAGS_platmcpm.o := -march=armv7-a obj-y += hisilicon.o obj-$(CONFIG_MCPM) += platmcpm.o -obj-$(CONFIG_SMP) += platsmp.o hotplug.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o hotplug.o diff --git a/arch/arm/mach-hisi/core.h b/arch/arm/mach-hisi/core.h index 92a682d8e939..c7648ef1825c 100644 --- a/arch/arm/mach-hisi/core.h +++ b/arch/arm/mach-hisi/core.h @@ -12,7 +12,6 @@ extern void hi3xxx_cpu_die(unsigned int cpu); extern int hi3xxx_cpu_kill(unsigned int cpu); extern void hi3xxx_set_cpu(int cpu, bool enable); -extern void hisi_secondary_startup(void); extern struct smp_operations hix5hd2_smp_ops; extern void hix5hd2_set_cpu(int cpu, bool enable); extern void hix5hd2_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-hisi/headsmp.S b/arch/arm/mach-hisi/headsmp.S deleted file mode 100644 index 81e35b159e75..000000000000 --- a/arch/arm/mach-hisi/headsmp.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2014 Hisilicon Limited. - * Copyright (c) 2014 Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <linux/init.h> - - __CPUINIT - -ENTRY(hisi_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index 8880c8e8b296..51744127db66 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle) { phys_addr_t jumpaddr; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr); hix5hd2_set_cpu(cpu, true); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) struct device_node *node; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr); node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl"); diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S index de5047c8a6c8..b5e976816b63 100644 --- a/arch/arm/mach-imx/headsmp.S +++ b/arch/arm/mach-imx/headsmp.S @@ -25,7 +25,6 @@ diag_reg_offset: .endm ENTRY(v7_secondary_startup) - bl v7_invalidate_l1 set_diag_reg b secondary_startup ENDPROC(v7_secondary_startup) diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 08d5ed46b996..48e4c4b3cd1c 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -21,7 +21,6 @@ ENTRY(mvebu_cortex_a9_secondary_startup) ARM_BE8(setend be) - bl v7_invalidate_l1 bl armada_38x_scu_power_up b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 3b56722dfd8a..6833df45d7b1 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = { .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 752969ff9de0..5286e7773ed4 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2373,6 +2373,9 @@ static int of_dev_hwmod_lookup(struct device_node *np, * registers. This address is needed early so the OCP registers that * are part of the device's address space can be ioremapped properly. * + * If SYSC access is not needed, the registers will not be remapped + * and non-availability of MPU access is not treated as an error. + * * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and * -ENXIO on absent or invalid register target address space. */ @@ -2387,6 +2390,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, _save_mpu_port_index(oh); + /* if we don't need sysc access we don't need to ioremap */ + if (!oh->class->sysc) + return 0; + + /* we can't continue without MPU PORT if we need sysc access */ if (oh->_int_flags & _HWMOD_NO_MPU_PORT) return -ENXIO; @@ -2396,8 +2404,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, oh->name); /* Extract the IO space from device tree blob */ - if (!np) + if (!np) { + pr_err("omap_hwmod: %s: no dt node\n", oh->name); return -ENXIO; + } va_start = of_iomap(np, index + oh->mpu_rt_idx); } else { @@ -2456,13 +2466,11 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->name, np->name); } - if (oh->class->sysc) { - r = _init_mpu_rt_base(oh, NULL, index, np); - if (r < 0) { - WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", - oh->name); - return 0; - } + r = _init_mpu_rt_base(oh, NULL, index, np); + if (r < 0) { + WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", + oh->name); + return 0; } r = _init_clocks(oh, NULL); diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S index d86fe33c5f53..209d9fc5c16c 100644 --- a/arch/arm/mach-prima2/headsmp.S +++ b/arch/arm/mach-prima2/headsmp.S @@ -15,7 +15,6 @@ * ready for them to initialise. */ ENTRY(sirfsoc_secondary_startup) - bl v7_invalidate_l1 mrc p15, 0, r0, c0, c0, 5 and r0, r0, #15 adr r4, 1f diff --git a/arch/arm/mach-rockchip/core.h b/arch/arm/mach-rockchip/core.h index 39bca96b555a..492c048813da 100644 --- a/arch/arm/mach-rockchip/core.h +++ b/arch/arm/mach-rockchip/core.h @@ -17,4 +17,3 @@ extern char rockchip_secondary_trampoline; extern char rockchip_secondary_trampoline_end; extern unsigned long rockchip_boot_fn; -extern void rockchip_secondary_startup(void); diff --git a/arch/arm/mach-rockchip/headsmp.S b/arch/arm/mach-rockchip/headsmp.S index 46c22dedf632..d69708b07282 100644 --- a/arch/arm/mach-rockchip/headsmp.S +++ b/arch/arm/mach-rockchip/headsmp.S @@ -15,14 +15,6 @@ #include <linux/linkage.h> #include <linux/init.h> -ENTRY(rockchip_secondary_startup) - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - ldr r1, =0x00000c09 @ Cortex-A9 primary part number - teq r0, r1 - beq v7_invalidate_l1 - b secondary_startup -ENDPROC(rockchip_secondary_startup) - ENTRY(rockchip_secondary_trampoline) ldr pc, 1f ENDPROC(rockchip_secondary_trampoline) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 5b4ca3c3c879..2e6ab67e2284 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -149,8 +149,7 @@ static int __cpuinit rockchip_boot_secondary(unsigned int cpu, * sram_base_addr + 8: start address for pc * */ udelay(10); - writel(virt_to_phys(rockchip_secondary_startup), - sram_base_addr + 8); + writel(virt_to_phys(secondary_startup), sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } @@ -189,7 +188,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) } /* set the boot function for the sram code */ - rockchip_boot_fn = virt_to_phys(rockchip_secondary_startup); + rockchip_boot_fn = virt_to_phys(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index afc60bad6fd6..476092b86c6e 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -14,7 +14,6 @@ extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); extern int shmobile_smp_cpu_disable(unsigned int cpu); -extern void shmobile_invalidate_start(void); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S index 69df8bfac167..fa5248c52399 100644 --- a/arch/arm/mach-shmobile/headsmp-scu.S +++ b/arch/arm/mach-shmobile/headsmp-scu.S @@ -22,7 +22,7 @@ * Boot code for secondary CPUs. * * First we turn on L1 cache coherency for our CPU. Then we jump to - * shmobile_invalidate_start that invalidates the cache and hands over control + * secondary_startup that invalidates the cache and hands over control * to the common ARM startup code. */ ENTRY(shmobile_boot_scu) @@ -36,7 +36,7 @@ ENTRY(shmobile_boot_scu) bic r2, r2, r3 @ Clear bits of our CPU (Run Mode) str r2, [r0, #8] @ write back - b shmobile_invalidate_start + b secondary_startup ENDPROC(shmobile_boot_scu) .text diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index 50c491567e11..330c1fc63197 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -16,13 +16,6 @@ #include <asm/assembler.h> #include <asm/memory.h> -#ifdef CONFIG_SMP -ENTRY(shmobile_invalidate_start) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(shmobile_invalidate_start) -#endif - /* * Reset vector for secondary CPUs. * This will be mapped at address 0 by SBAR register. diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index f483b560b066..b0790fc32282 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -133,7 +133,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle) { /* For this particular CPU register boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(shmobile_invalidate_start), 0); + shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0); return apmu_wrap(cpu, apmu_power_on); } diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index a0f3b1cd497c..767c09e954a0 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -31,7 +31,6 @@ #define RSTMGR_MPUMODRST_CPU1 0x2 /* CPU1 Reset */ -extern void socfpga_secondary_startup(void); extern void __iomem *socfpga_scu_base_addr; extern void socfpga_init_clocks(void); diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S index f65ea0af4af3..5bb016427107 100644 --- a/arch/arm/mach-socfpga/headsmp.S +++ b/arch/arm/mach-socfpga/headsmp.S @@ -30,8 +30,3 @@ ENTRY(secondary_trampoline) 1: .long . .long socfpga_cpu1start_addr ENTRY(secondary_trampoline_end) - -ENTRY(socfpga_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(socfpga_secondary_startup) diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index c64d89b7c0ca..79c5336c569f 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); - writel(virt_to_phys(socfpga_secondary_startup), + writel(virt_to_phys(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); flush_cache_all(); diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index e48a74458c25..fffad2426ee4 100644 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += pm-tegra30.o ifeq ($(CONFIG_CPU_IDLE),y) obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpuidle-tegra30.o endif -obj-$(CONFIG_SMP) += platsmp.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_ARCH_TEGRA_114_SOC) += sleep-tegra30.o diff --git a/arch/arm/mach-tegra/headsmp.S b/arch/arm/mach-tegra/headsmp.S deleted file mode 100644 index 2072e7322c39..000000000000 --- a/arch/arm/mach-tegra/headsmp.S +++ /dev/null @@ -1,12 +0,0 @@ -#include <linux/linkage.h> -#include <linux/init.h> - -#include "sleep.h" - - .section ".text.head", "ax" - -ENTRY(tegra_secondary_startup) - check_cpu_part_num 0xc09, r8, r9 - bleq v7_invalidate_l1 - b secondary_startup -ENDPROC(tegra_secondary_startup) diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 894c5c472184..6fd9db54887e 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -94,7 +94,7 @@ void __init tegra_cpu_reset_handler_init(void) __tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] = *((u32 *)cpu_possible_mask); __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] = - virt_to_phys((void *)tegra_secondary_startup); + virt_to_phys((void *)secondary_startup); #endif #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h index 29c3dec0126a..9c479c7925b8 100644 --- a/arch/arm/mach-tegra/reset.h +++ b/arch/arm/mach-tegra/reset.h @@ -37,7 +37,6 @@ void __tegra_cpu_reset_handler_start(void); void __tegra_cpu_reset_handler(void); void __tegra20_cpu1_resettable_status_offset(void); void __tegra_cpu_reset_handler_end(void); -void tegra_secondary_startup(void); #ifdef CONFIG_PM_SLEEP #define tegra_cpu_lp1_mask \ diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index 382c60e9aa16..7038cae95ddc 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -17,8 +17,6 @@ #ifndef __MACH_ZYNQ_COMMON_H__ #define __MACH_ZYNQ_COMMON_H__ -void zynq_secondary_startup(void); - extern int zynq_slcr_init(void); extern int zynq_early_slcr_init(void); extern void zynq_slcr_system_reset(void); diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S index dd8c071941e7..045c72720a4d 100644 --- a/arch/arm/mach-zynq/headsmp.S +++ b/arch/arm/mach-zynq/headsmp.S @@ -22,8 +22,3 @@ zynq_secondary_trampoline_jump: .globl zynq_secondary_trampoline_end zynq_secondary_trampoline_end: ENDPROC(zynq_secondary_trampoline) - -ENTRY(zynq_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(zynq_secondary_startup) diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c index 52d768ff7857..f66816c49186 100644 --- a/arch/arm/mach-zynq/platsmp.c +++ b/arch/arm/mach-zynq/platsmp.c @@ -87,10 +87,9 @@ int zynq_cpun_start(u32 address, int cpu) } EXPORT_SYMBOL(zynq_cpun_start); -static int zynq_boot_secondary(unsigned int cpu, - struct task_struct *idle) +static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle) { - return zynq_cpun_start(virt_to_phys(zynq_secondary_startup), cpu); + return zynq_cpun_start(virt_to_phys(secondary_startup), cpu); } /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3d1054f11a8a..7911f14c2157 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -268,7 +268,10 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: +1: adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -277,7 +280,7 @@ __v7_ca17mp_setup: orreq r0, r0, r10 @ Enable CPU-specific SMP bits mcreq p15, 0, r0, c1, c0, 1 #endif - b __v7_setup + b __v7_setup_cont __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -335,10 +338,11 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_louis - ldmia r12, {r0-r5, r7, r9, r11, lr} + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} +__v7_setup_cont: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 @@ -460,7 +464,7 @@ ENDPROC(__v7_setup) .align 2 __v7_setup_stack: - .space 4 * 11 @ 11 registers + .space 4 * 7 @ 12 registers __INITDATA diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8aa791051029..1160434eece0 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -6,9 +6,15 @@ obj-vdso := vgettimeofday.o datapage.o targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING -ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds @@ -40,10 +46,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Actual build commands quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ - $(call cc-ldoption, -Wl$(comma)--build-id) \ - -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 -o $@ + cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2d13aef4ac42..87de4e03ef51 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1329,7 +1329,7 @@ static int armpmu_device_probe(struct platform_device *pdev) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) - return 0; + goto out; irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -1366,6 +1366,7 @@ static int armpmu_device_probe(struct platform_device *pdev) else kfree(irqs); +out: cpu_pmu->plat_device = pdev; return 0; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d26fcd4cd6e6..c0cff3410166 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitely for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif break; @@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE)) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index f02530e726f6..85c57158dcd9 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, false, addr); - - inject_abt64(vcpu, false, addr); + else + inject_abt64(vcpu, false, addr); } /** @@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, true, addr); - - inject_abt64(vcpu, true, addr); + else + inject_abt64(vcpu, true, addr); } /** @@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_undef32(vcpu); - - inject_undef64(vcpu); + else + inject_undef64(vcpu); } diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 7fc8397d16f2..fd2a36a79f97 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -186,6 +186,7 @@ int get_c0_perfcount_int(void) { return ATH79_MISC_IRQ(5); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h deleted file mode 100644 index 11d3b572b1b3..000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H -#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H - -#include <asm/bmips.h> - -#define plat_post_dma_flush bmips_post_dma_flush - -#include <asm/mach-generic/dma-coherence.h> - -#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 819af9d057a8..70f6e7f073b0 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -182,8 +182,39 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ +#ifdef CONFIG_64BIT +#define LL_INSN "lld" +#define SC_INSN "scd" +#else /* CONFIG_32BIT */ +#define LL_INSN "ll" +#define SC_INSN "sc" +#endif + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + " .set push\n" + " .set noreorder\n" + "1: " LL_INSN " %[tmp], %[buddy]\n" + " bnez %[tmp], 2f\n" + " or %[tmp], %[tmp], %[global]\n" + " " SC_INSN " %[tmp], %[buddy]\n" + " beqz %[tmp], 1b\n" + " nop\n" + "2:\n" + " .set pop" + : [buddy] "+m" (buddy->pte), + [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } #endif } diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 28d6d9364bd1..a71da576883c 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -152,6 +152,31 @@ .set noreorder bltz k0, 8f move k1, sp +#ifdef CONFIG_EVA + /* + * Flush interAptiv's Return Prediction Stack (RPS) by writing + * EntryHi. Toggling Config7.RPS is slower and less portable. + * + * The RPS isn't automatically flushed when exceptions are + * taken, which can result in kernel mode speculative accesses + * to user addresses if the RPS mispredicts. That's harmless + * when user and kernel share the same address space, but with + * EVA the same user segments may be unmapped to kernel mode, + * even containing sensitive MMIO regions or invalid memory. + * + * This can happen when the kernel sets the return address to + * ret_from_* and jr's to the exception handler, which looks + * more like a tail call than a function call. If nested calls + * don't evict the last user address in the RPS, it will + * mispredict the return and fetch from a user controlled + * address into the icache. + * + * More recent EVA-capable cores with MAAR to restrict + * speculative accesses aren't affected. + */ + MFC0 k0, CP0_ENTRYHI + MTC0 k0, CP0_ENTRYHI +#endif .set reorder /* Called from user mode, new stack. */ get_saved_sp diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 3e4491aa6d6b..789d7bf4fef3 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -154,7 +154,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { unsigned int real_len; - cpumask_t mask; + cpumask_t allowed, mask; int retval; struct task_struct *p; @@ -173,7 +173,8 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_and(&mask, &p->thread.user_cpus_allowed, cpu_possible_mask); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: read_unlock(&tasklist_lock); diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index 74bab9ddd0e1..c6bbf2165051 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -24,7 +24,7 @@ LEAF(relocate_new_kernel) process_entry: PTR_L s2, (s0) - PTR_ADD s0, s0, SZREG + PTR_ADDIU s0, s0, SZREG /* * In case of a kdump/crash kernel, the indirection page is not @@ -61,9 +61,9 @@ copy_word: /* copy page word by word */ REG_L s5, (s2) REG_S s5, (s4) - PTR_ADD s4, s4, SZREG - PTR_ADD s2, s2, SZREG - LONG_SUB s6, s6, 1 + PTR_ADDIU s4, s4, SZREG + PTR_ADDIU s2, s2, SZREG + LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word b process_entry diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index ad4d44635c76..a6f6b762c47a 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -80,7 +80,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_64_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 446cc654da56..4b2010654c46 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -72,7 +72,7 @@ n32_syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_N32_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 19a7705f2a01..5d7f2634996f 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d2d1c1933bc9..5f5f44edc77d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -192,6 +192,7 @@ static void show_stacktrace(struct task_struct *task, void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; + mm_segment_t old_fs = get_fs(); if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; @@ -210,7 +211,13 @@ void show_stack(struct task_struct *task, unsigned long *sp) prepare_frametrace(®s); } } + /* + * show_stack() deals exclusively with kernel mode, so be sure to access + * the stack in the kernel (not user) address space. + */ + set_fs(KERNEL_DS); show_stacktrace(task, ®s); + set_fs(old_fs); } static void show_code(unsigned int __user *pc) @@ -1518,6 +1525,7 @@ asmlinkage void do_mcheck(struct pt_regs *regs) const int field = 2 * sizeof(unsigned long); int multi_match = regs->cp0_status & ST0_TS; enum ctx_state prev_state; + mm_segment_t old_fs = get_fs(); prev_state = exception_enter(); show_regs(regs); @@ -1539,8 +1547,13 @@ asmlinkage void do_mcheck(struct pt_regs *regs) dump_tlb_all(); } + if (!user_mode(regs)) + set_fs(KERNEL_DS); + show_code((unsigned int __user *) regs->cp0_epc); + set_fs(old_fs); + /* * Some chips may have other causes of machine check (e.g. SB1 * graduation timer) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index af84bef0c90d..eb3efd137fd1 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -438,7 +438,7 @@ do { \ : "memory"); \ } while(0) -#define StoreDW(addr, value, res) \ +#define _StoreDW(addr, value, res) \ do { \ __asm__ __volatile__ ( \ ".set\tpush\n\t" \ diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 6ab10573490d..d01ade63492f 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -466,6 +466,7 @@ int get_c0_perfcount_int(void) { return ltq_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f45..a7f7d9ffb402 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -148,6 +148,7 @@ int get_c0_perfcount_int(void) return mips_cpu_perf_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { @@ -165,14 +166,17 @@ unsigned int get_c0_compare_int(void) static void __init init_rtc(void) { - /* stop the clock whilst setting it up */ - CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL); + unsigned char freq, ctrl; - /* 32KHz time base */ - CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); + /* Set 32KHz time base if not already set */ + freq = CMOS_READ(RTC_FREQ_SELECT); + if ((freq & RTC_DIV_CTL) != RTC_REF_CLCK_32KHZ) + CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); - /* start the clock */ - CMOS_WRITE(RTC_24H, RTC_CONTROL); + /* Ensure SET bit is clear so RTC can run */ + ctrl = CMOS_READ(RTC_CONTROL); + if (ctrl & RTC_SET) + CMOS_WRITE(ctrl & ~RTC_SET, RTC_CONTROL); } void __init plat_time_init(void) diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index e1d69895fb1d..a120b7a5a8fe 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -77,6 +77,7 @@ int get_c0_perfcount_int(void) return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; return -1; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa..ab73f6f405bb 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -26,6 +26,7 @@ int get_c0_perfcount_int(void) { return gic_get_c0_perfcount_int(); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); void __init plat_time_init(void) { diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 7cf91b92e9d1..199ace4ca1ad 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -89,6 +89,7 @@ int get_c0_perfcount_int(void) { return rt_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f92..da50e0c9c57e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 1f0aa2024e94..6424249d5f78 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -28,16 +28,10 @@ * Must preserve %o5 between VISEntryHalf and VISExitHalf */ #define VISEntryHalf \ - rd %fprs, %o5; \ - andcc %o5, FPRS_FEF, %g0; \ - be,pt %icc, 297f; \ - sethi %hi(298f), %g7; \ - sethi %hi(VISenterhalf), %g1; \ - jmpl %g1 + %lo(VISenterhalf), %g0; \ - or %g7, %lo(298f), %g7; \ - clr %o5; \ -297: wr %o5, FPRS_FEF, %fprs; \ -298: + VISEntry + +#define VISExitHalf \ + VISExit #define VISEntryHalfFast(fail_label) \ rd %fprs, %o5; \ @@ -47,7 +41,7 @@ ba,a,pt %xcc, fail_label; \ 297: wr %o5, FPRS_FEF, %fprs; -#define VISExitHalf \ +#define VISExitHalfFast \ wr %o5, 0, %fprs; #ifndef __ASSEMBLY__ diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 140527a20e7d..83aeeb1dffdb 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -240,8 +240,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +#ifdef NON_USER_COPY + VISExitHalfFast +#else VISExitHalf - +#endif brz,pn %o2, .Lexit cmp %o2, 19 ble,pn %icc, .Lsmall_unaligned diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index b320ae9e2e2e..a063d84336d6 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -44,9 +44,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stx %g3, [%g6 + TI_GSR] 2: add %g6, %g1, %g3 - cmp %o5, FPRS_DU - be,pn %icc, 6f - sll %g1, 3, %g1 + mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 + sll %g1, 3, %g1 stb %o5, [%g3 + TI_FPSAVED] rd %gsr, %g2 add %g6, %g1, %g3 @@ -80,65 +79,3 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop - -6: ldub [%g3 + TI_FPSAVED], %o5 - or %o5, FPRS_DU, %o5 - add %g6, TI_FPREGS+0x80, %g2 - stb %o5, [%g3 + TI_FPSAVED] - - sll %g1, 5, %g1 - add %g6, TI_FPREGS+0xc0, %g3 - wr %g0, FPRS_FEF, %fprs - membar #Sync - stda %f32, [%g2 + %g1] ASI_BLK_P - stda %f48, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 80f - nop - - .align 32 -80: jmpl %g7 + %g0, %g0 - nop - - .align 32 -VISenterhalf: - ldub [%g6 + TI_FPDEPTH], %g1 - brnz,a,pn %g1, 1f - cmp %g1, 1 - stb %g0, [%g6 + TI_FPSAVED] - stx %fsr, [%g6 + TI_XFSR] - clr %o5 - jmpl %g7 + %g0, %g0 - wr %g0, FPRS_FEF, %fprs - -1: bne,pn %icc, 2f - srl %g1, 1, %g1 - ba,pt %xcc, vis1 - sub %g7, 8, %g7 -2: addcc %g6, %g1, %g3 - sll %g1, 3, %g1 - andn %o5, FPRS_DU, %g2 - stb %g2, [%g3 + TI_FPSAVED] - - rd %gsr, %g2 - add %g6, %g1, %g3 - stx %g2, [%g3 + TI_GSR] - add %g6, %g1, %g2 - stx %fsr, [%g2 + TI_XFSR] - sll %g1, 5, %g1 -3: andcc %o5, FPRS_DL, %g0 - be,pn %icc, 4f - add %g6, TI_FPREGS, %g2 - - add %g6, TI_FPREGS+0x40, %g3 - membar #Sync - stda %f0, [%g2 + %g1] ASI_BLK_P - stda %f16, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 4f - nop - - .align 32 -4: and %o5, FPRS_DU, %o5 - jmpl %g7 + %g0, %g0 - wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1d649a95660c..8069ce12f20b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -135,10 +135,6 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); -/* CRYPTO code needs this */ -void VISenterhalf(void); -EXPORT_SYMBOL(VISenterhalf); - extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index e8c2c04143cd..c667e104a0c2 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) return -EFAULT; - memset(to, 0, sizeof(*to)); - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 6fe6b182c998..9dfce4e0417d 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short __pad2; /* Was called gs, but was always zero. */ - unsigned short __pad1; /* Was called fs, but was always zero. */ - unsigned short ss; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8a2cd3..d8b9f9081e86 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,24 +177,9 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - - /* - * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), - * Linux saved and restored fs and gs in these slots. This - * was counterproductive, as fsbase and gsbase were never - * saved, so arch_prctl was presumably unreliable. - * - * If these slots are ever needed for any other purpose, there - * is some risk that very old 64-bit binaries could get - * confused. I doubt that many such binaries still work, - * though, since the same patch in 2.5.64 also removed the - * 64-bit set_thread_area syscall, so it appears that there is - * no TLS API that works in both pre- and post-2.5.64 kernels. - */ - __u16 __pad2; /* Was gs. */ - __u16 __pad1; /* Was fs. */ - - __u16 ss; + __u16 gs; + __u16 fs; + __u16 __pad0; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dcb52850a28f..cde732c1b495 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1424,7 +1424,7 @@ static inline void __x2apic_disable(void) { u64 msr; - if (cpu_has_apic) + if (!cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1483,10 +1483,13 @@ void x2apic_setup(void) static __init void x2apic_disable(void) { - u32 x2apic_id; + u32 x2apic_id, state = x2apic_state; - if (x2apic_state != X2APIC_ON) - goto out; + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; + + if (state != X2APIC_ON) + return; x2apic_id = read_apic_id(); if (x2apic_id >= 255) @@ -1494,9 +1497,6 @@ static __init void x2apic_disable(void) __x2apic_disable(); register_lapic_address(mp_lapic_addr); -out: - x2apic_state = X2APIC_DISABLED; - x2apic_mode = 0; } static __init void x2apic_enable(void) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9bf90702036a..8a3445bdf6a7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -809,8 +809,6 @@ do_preempt_schedule_irq: restore_c_regs_and_iret: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 - -irq_return: INTERRUPT_RETURN ENTRY(native_iret) @@ -1431,11 +1429,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1444,32 +1443,151 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as our temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + + SWAPGS + cld + movq %rsp, %rdx + movq PER_CPU_VAR(kernel_stack), %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ /* - * Check the special variable on the stack to see if NMIs are - * executing. + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + jmp restore_c_regs_and_iret + +.Lnmi_from_kernel: + /* + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + + /* + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. + */ + + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: + + /* + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ @@ -1480,25 +1598,21 @@ ENTRY(nmi) cmpq %rdx, 4*8(%rsp) /* If it is below the NMI stack, it is a normal NMI */ jb first_nmi - /* Ah, it is within the NMI stack, treat it as nested */ + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ CFI_REMEMBER_STATE nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 1*8 @@ -1517,60 +1631,23 @@ nested_nmi_out: popq_cfi %rdx CFI_RESTORE rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN CFI_RESTORE_STATE first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx CFI_RESTORE rdx - /* Set the NMI executing variable on the stack. */ + /* Set "NMI executing" on the stack. */ pushq_cfi $1 - /* - * Leave room for the "copied" frame - */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp CFI_ADJUST_CFA_OFFSET 5*8 - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq_cfi 11*8(%rsp) .endr @@ -1578,6 +1655,7 @@ first_nmi: /* Everything up to here is safe from nested NMIs */ +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1586,16 +1664,21 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. + * + * Set "NMI executing" in case we came back here via IRET. */ movq $1, 10*8(%rsp) - /* Make another copy, this one may be modified by nested NMIs */ + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ addq $(10*8), %rsp CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 @@ -1606,9 +1689,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK @@ -1623,29 +1706,11 @@ end_repeat_nmi: call paranoid_entry DEFAULT_FRAME 0 - /* - * Save off the CR2 register. If we take a page fault in the NMI then - * it could corrupt the CR2 value. If the NMI preempts a page fault - * handler before it was able to read the CR2 register, and then the - * NMI itself takes a page fault, the page fault that was preempted - * will read the information from the NMI page fault and not the - * origin fault. Save it off and restore it if it changes. - * Use the r12 callee-saved register. - */ - movq %cr2, %r12 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi - /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 -1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: @@ -1653,12 +1718,27 @@ nmi_swapgs: nmi_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS - /* Pop the extra iret frame at once */ + + /* Point RSP at the "iret" frame. */ REMOVE_PT_GPREGS_FROM_STACK 6*8 - /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) - jmp irq_return + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ + INTERRUPT_RETURN CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751c..d05bd2e2ee91 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C (preventing nested - * NMIs if an NMI takes a trap). Simply have 3 states the NMI - * can be in: + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. + * + * As a result, NMIs can nest if NMIs get unmasked due an IRET during + * NMI processing. On x86_64, the asm glue protects us from nested NMIs + * if the outer NMI came from kernel mode, but we can still nest if the + * outer NMI came from user mode. + * + * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing @@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the exit of the first NMI will - * perform a dec_return, if the result is zero (NOT_RUNNING), then - * it will simply exit the NMI handler. If not, the dec_return - * would have set the state to NMI_EXECUTING (what we want it to - * be when we are running). In this case, we simply jump back - * to rerun the NMI handler again, and restart the 'latched' NMI. + * If an NMI executes an iret, another NMI can preempt it. We do not + * want to allow this new NMI to run, but we want to execute it when the + * first one finishes. We set the state to "latched", and the exit of + * the first NMI will perform a dec_return, if the result is zero + * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the + * dec_return would have set the state to NMI_EXECUTING (what we want it + * to be when we are running). In this case, we simply jump back to + * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING @@ -461,49 +460,36 @@ enum nmi_states { static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); -#define nmi_nesting_preprocess(regs) \ - do { \ - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ - this_cpu_write(nmi_state, NMI_LATCHED); \ - return; \ - } \ - this_cpu_write(nmi_state, NMI_EXECUTING); \ - this_cpu_write(nmi_cr2, read_cr2()); \ - } while (0); \ - nmi_restart: - -#define nmi_nesting_postprocess() \ - do { \ - if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ - write_cr2(this_cpu_read(nmi_cr2)); \ - if (this_cpu_dec_return(nmi_state)) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ +#ifdef CONFIG_X86_64 /* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. + * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without + * some care, the inner breakpoint will clobber the outer breakpoint's + * stack. * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. + * If a breakpoint is being processed, and the debug stack is being + * used, if an NMI comes in and also hits a breakpoint, the stack + * pointer will be set to the same fixed address as the breakpoint that + * was interrupted, causing that stack to be corrupted. To handle this + * case, check if the stack that was interrupted is the debug stack, and + * if so, change the IDT so that new breakpoints will use the current + * stack and not switch to the fixed address. On return of the NMI, + * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); +#endif -static inline void nmi_nesting_preprocess(struct pt_regs *regs) +dotraplinkage notrace void +do_nmi(struct pt_regs *regs, long error_code) { + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { + this_cpu_write(nmi_state, NMI_LATCHED); + return; + } + this_cpu_write(nmi_state, NMI_EXECUTING); + this_cpu_write(nmi_cr2, read_cr2()); +nmi_restart: + +#ifdef CONFIG_X86_64 /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) debug_stack_set_zero(); this_cpu_write(update_debug_stack, 1); } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} #endif -dotraplinkage notrace void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - nmi_enter(); inc_irq_stat(__nmi_count); @@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); +#ifdef CONFIG_X86_64 + if (unlikely(this_cpu_read(update_debug_stack))) { + debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } +#endif + + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3b1dc0..971743774248 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -453,6 +453,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static void mwait_idle(void) { if (!current_set_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { smp_mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -464,6 +465,7 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 74c44c4f0b2f..12c28f79e2e7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); +#else /* !CONFIG_X86_32 */ + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); +#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->__pad2); - put_user_ex(0, &sc->__pad1); - put_user_ex(regs->ss, &sc->ss); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -450,19 +456,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* - * Set up the CS and SS registers to run signal handlers in - * 64-bit mode, even if the handler happens to be interrupting - * 32-bit or 16-bit code. - * - * SS is subtle. In 64-bit mode, we don't need any particular - * SS descriptor, but we do need SS to be valid. It's possible - * that the old SS is entirely bogus -- this can happen if the - * signal we're trying to deliver is #GP or #SS caused by a bad - * SS value. - */ + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - regs->ss = __USER_DS; return 0; } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9d28383fc1e7..c4ea87eedf8a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -150,7 +150,7 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { - return vcpu->arch.apic->pending_events; + return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e88fda867a33..484145368a24 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,7 +8,7 @@ config XEN select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_TSC + depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -17,7 +17,7 @@ config XEN config XEN_DOM0 def_bool y depends on XEN && PCI_XEN && SWIOTLB_XEN - depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + depends on X86_IO_APIC && ACPI && PCI config XEN_PVHVM def_bool y diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7322755f337a..4b6e29ac0968 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,13 +13,13 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o + p2m.o apic.o obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += apic.o vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..a671e837228d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..bef30cbb56c4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -101,17 +101,15 @@ struct dom0_vga_console_info; #ifdef CONFIG_XEN_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -void __init xen_init_apic(void); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) { } -static inline void __init xen_init_apic(void) -{ -} #endif +void __init xen_init_apic(void); + #ifdef CONFIG_XEN_EFI extern void xen_efi_init(void); #else |