diff options
Diffstat (limited to 'arch')
246 files changed, 12560 insertions, 1960 deletions
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 4a177365b2c4..7991e08d606b 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,9 +157,8 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", __func__, start, end); + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 18a9f5ef643a..1e9cca81eeac 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -474,6 +474,7 @@ config ARCH_IXP4XX bool "IXP4xx-based" depends on MMU select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_REQUIRE_GPIOLIB select CLKSRC_MMIO select CPU_XSCALE @@ -1494,6 +1495,109 @@ config SCHED_SMT MultiThreading at a cost of slightly increased overhead in some places. If unsure say N here. +config DISABLE_CPU_SCHED_DOMAIN_BALANCE + bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing" + help + Disables scheduler load-balancing at CPU sched domain level. + +config SCHED_HMP + bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling" + depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP + help + Experimental scheduler optimizations for heterogeneous platforms. + Attempts to introspectively select task affinity to optimize power + and performance. Basic support for multiple (>2) cpu types is in place, + but it has only been tested with two types of cpus. + There is currently no support for migration of task groups, hence + !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled + between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE). + When turned on, this option adds sys/kernel/hmp directory which + contains the following files: + up_threshold - the load average threshold used for up migration + (0 - 1023) + down_threshold - the load average threshold used for down migration + (0 - 1023) + hmp_domains - a list of cpumasks for the present HMP domains, + starting with the 'biggest' and ending with the + 'smallest'. + Note that both the threshold files can be written at runtime to + control scheduler behaviour. + +config SCHED_HMP_PRIO_FILTER + bool "(EXPERIMENTAL) Filter HMP migrations by task priority" + depends on SCHED_HMP + help + Enables task priority based HMP migration filter. Any task with + a NICE value above the threshold will always be on low-power cpus + with less compute capacity. + +config SCHED_HMP_PRIO_FILTER_VAL + int "NICE priority threshold" + default 5 + depends on SCHED_HMP_PRIO_FILTER + +config HMP_FAST_CPU_MASK + string "HMP scheduler fast CPU mask" + depends on SCHED_HMP + help + Leave empty to use device tree information. + Specify the cpuids of the fast CPUs in the system as a list string, + e.g. cpuid 0+1 should be specified as 0-1. + +config HMP_SLOW_CPU_MASK + string "HMP scheduler slow CPU mask" + depends on SCHED_HMP + help + Leave empty to use device tree information. + Specify the cpuids of the slow CPUs in the system as a list string, + e.g. cpuid 0+1 should be specified as 0-1. + +config HMP_VARIABLE_SCALE + bool "Allows changing the load tracking scale through sysfs" + depends on SCHED_HMP + help + When turned on, this option exports the load average period value + for the load tracking patches through sysfs. + The values can be modified to change the rate of load accumulation + used for HMP migration. 'load_avg_period_ms' is the time in ms to + reach a load average of 0.5 for an idle task of 0 load average + ratio which becomes 100% busy. + For example, with load_avg_period_ms = 128 and up_threshold = 512, + a running task with a load of 0 will be migrated to a bigger CPU after + 128ms, because after 128ms its load_avg_ratio is 0.5 and the real + up_threshold is 0.5. + This patch has the same behavior as changing the Y of the load + average computation to + (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms) + but removes intermediate overflows in computation. + +config HMP_FREQUENCY_INVARIANT_SCALE + bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP" + depends on SCHED_HMP && CPU_FREQ + help + Scales the current load contribution in line with the frequency + of the CPU that the task was executed on. + In this version, we use a simple linear scale derived from the + maximum frequency reported by CPUFreq. + Restricting tracked load to be scaled by the CPU's frequency + represents the consumption of possible compute capacity + (rather than consumption of actual instantaneous capacity as + normal) and allows the HMP migration's simple threshold + migration strategy to interact more predictably with CPUFreq's + asynchronous compute capacity changes. + +config SCHED_HMP_LITTLE_PACKING + bool "Small task packing for HMP" + depends on SCHED_HMP + default n + help + Allows the HMP Scheduler to pack small tasks into CPUs in the + smallest HMP domain. + Controlled by two sysfs files in sys/kernel/hmp. + packing_enable: 1 to enable, 0 to disable packing. Default 1. + packing_limit: runqueue load ratio where a RQ is considered + to be full. Default is NICE_0_LOAD * 9/8. + config HAVE_ARM_SCU bool help @@ -1521,6 +1625,31 @@ config MCPM for (multi-)cluster based systems, such as big.LITTLE based systems. +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support for the big.LITTLE architecture. + +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1ba358ba16b8..70bc19e2274f 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,6 +16,7 @@ LDFLAGS := LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 +LDFLAGS_MODULE += --be8 endif OBJCOPYFLAGS :=-O binary -R .comment -S diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 032a8d987148..f6e34be012ff 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -135,6 +135,7 @@ start: .word _edata @ zImage end address THUMB( .thumb ) 1: + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 mrs r9, cpsr #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install @ get into SVC mode, reversibly @@ -679,9 +680,7 @@ __armv4_mmu_cache_on: mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x0030 -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables bl __common_mmu_cache_on mov r0, #0 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs @@ -708,9 +707,7 @@ __armv7_mmu_cache_on: orr r0, r0, #1 << 22 @ U (v6 unaligned access model) @ (needed for ARM1176) #ifdef CONFIG_MMU -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index f0895c581a89..00baf9f5766a 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -202,7 +202,14 @@ dtb-$(CONFIG_ARCH_VERSATILE) += versatile-ab.dtb \ dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \ vexpress-v2p-ca9.dtb \ vexpress-v2p-ca15-tc1.dtb \ - vexpress-v2p-ca15_a7.dtb + vexpress-v2p-ca15_a7.dtb \ + rtsm_ve-cortex_a9x2.dtb \ + rtsm_ve-cortex_a9x4.dtb \ + rtsm_ve-cortex_a15x1.dtb \ + rtsm_ve-cortex_a15x2.dtb \ + rtsm_ve-cortex_a15x4.dtb \ + rtsm_ve-v2p-ca15x1-ca7x1.dtb \ + rtsm_ve-v2p-ca15x4-ca7x4.dtb dtb-$(CONFIG_ARCH_VIRT) += xenvm-4.2.dtb dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \ wm8505-ref.dtb \ diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi new file mode 100644 index 000000000000..0b0ff6ead4b2 --- /dev/null +++ b/arch/arm/boot/dts/clcd-panels.dtsi @@ -0,0 +1,52 @@ +/* + * ARM Ltd. Versatile Express + * + */ + +/ { + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts new file mode 100644 index 000000000000..c9eee916aa7e --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts @@ -0,0 +1,159 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x1CT + * + * RTSM_VE_Cortex_A15x1.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts new file mode 100644 index 000000000000..853a166e3c32 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts @@ -0,0 +1,165 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x2CT + * + * RTSM_VE_Cortex_A15x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts new file mode 100644 index 000000000000..c1947a3a5c88 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts @@ -0,0 +1,177 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * + * RTSM_VE_Cortex_A15x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts new file mode 100644 index 000000000000..fca6b2f79677 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts @@ -0,0 +1,171 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx2CT + * + * RTSM_VE_Cortex_A9x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts new file mode 100644 index 000000000000..fd8a6ed97a04 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts @@ -0,0 +1,183 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx4CT + * + * RTSM_VE_Cortex_A9x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi new file mode 100644 index 000000000000..a2d895ee5faa --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi @@ -0,0 +1,231 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * Motherboard component + * + * VEMotherBoard.lisa + */ + + motherboard { + compatible = "arm,vexpress,v2m-p1", "simple-bus"; + arm,hbi = <0x190>; + arm,vexpress,site = <0>; + arm,v2m-memory-map = "rs1"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + #interrupt-cells = <1>; + ranges; + + flash@0,00000000 { + compatible = "arm,vexpress-flash", "cfi-flash"; + reg = <0 0x00000000 0x04000000>, + <4 0x00000000 0x04000000>; + bank-width = <4>; + }; + + vram@2,00000000 { + compatible = "arm,vexpress-vram"; + reg = <2 0x00000000 0x00800000>; + }; + + ethernet@2,02000000 { + compatible = "smsc,lan91c111"; + reg = <2 0x02000000 0x10000>; + interrupts = <15>; + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + v2m_sysreg: sysreg@010000 { + compatible = "arm,vexpress-sysreg"; + reg = <0x010000 0x1000>; + gpio-controller; + #gpio-cells = <2>; + }; + + v2m_sysctl: sysctl@020000 { + compatible = "arm,sp810", "arm,primecell"; + reg = <0x020000 0x1000>; + clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>; + clock-names = "refclk", "timclk", "apb_pclk"; + #clock-cells = <1>; + clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3"; + }; + + aaci@040000 { + compatible = "arm,pl041", "arm,primecell"; + reg = <0x040000 0x1000>; + interrupts = <11>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + mmci@050000 { + compatible = "arm,pl180", "arm,primecell"; + reg = <0x050000 0x1000>; + interrupts = <9 10>; + cd-gpios = <&v2m_sysreg 0 0>; + wp-gpios = <&v2m_sysreg 1 0>; + max-frequency = <12000000>; + vmmc-supply = <&v2m_fixed_3v3>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "mclk", "apb_pclk"; + }; + + kmi@060000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x060000 0x1000>; + interrupts = <12>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + kmi@070000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x070000 0x1000>; + interrupts = <13>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + v2m_serial0: uart@090000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x090000 0x1000>; + interrupts = <5>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial1: uart@0a0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0a0000 0x1000>; + interrupts = <6>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial2: uart@0b0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0b0000 0x1000>; + interrupts = <7>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial3: uart@0c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0c0000 0x1000>; + interrupts = <8>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + wdt@0f0000 { + compatible = "arm,sp805", "arm,primecell"; + reg = <0x0f0000 0x1000>; + interrupts = <0>; + clocks = <&v2m_refclk32khz>, <&smbclk>; + clock-names = "wdogclk", "apb_pclk"; + }; + + v2m_timer01: timer@110000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x110000 0x1000>; + interrupts = <2>; + clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + v2m_timer23: timer@120000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x120000 0x1000>; + interrupts = <3>; + clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + rtc@170000 { + compatible = "arm,pl031", "arm,primecell"; + reg = <0x170000 0x1000>; + interrupts = <4>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + clcd@1f0000 { + compatible = "arm,pl111", "arm,primecell"; + reg = <0x1f0000 0x1000>; + interrupts = <14>; + clocks = <&v2m_oscclk1>, <&smbclk>; + clock-names = "v2m:oscclk1", "apb_pclk"; + mode = "VGA"; + use_dma = <0>; + framebuffer = <0x18000000 0x00180000>; + }; + + virtio_block@0130000 { + compatible = "virtio,mmio"; + reg = <0x130000 0x200>; + interrupts = <42>; + }; + + }; + + v2m_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + v2m_clk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "v2m:clk24mhz"; + }; + + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "v2m:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "v2m:refclk32khz"; + }; + + mcc { + compatible = "simple-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + v2m_oscclk1: osc@1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 63500000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + muxfpga@0 { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown@0 { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + }; + }; diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts new file mode 100644 index 000000000000..fe8cf5dc8570 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts @@ -0,0 +1,244 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x1_A7x1.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x1-A7x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core1: core@0 { + reg = <0>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core1>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci-400", "arm,cci"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts new file mode 100644 index 000000000000..f715285131d8 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts @@ -0,0 +1,358 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x4_A7x4.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x4-A7x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + core2: core@2 { + reg = <2>; + }; + + core3: core@3 { + reg = <3>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core4: core@0 { + reg = <0>; + }; + + core5: core@1 { + reg = <1>; + }; + + core6: core@2 { + reg = <2>; + }; + + core7: core@3 { + reg = <3>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cluster = <&cluster0>; + core = <&core1>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + cluster = <&cluster0>; + core = <&core2>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + cluster = <&cluster0>; + core = <&core3>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu4: cpu@4 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core4>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu5: cpu@5 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + cluster = <&cluster1>; + core = <&core5>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu6: cpu@6 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x102>; + cluster = <&cluster1>; + core = <&core6>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu7: cpu@7 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x103>; + cluster = <&cluster1>; + core = <&core7>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci-400", "arm,cci"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + gic-cpuif@2 { + compatible = "arm,gic-cpuif"; + cpuif-id = <2>; + cpu = <&cpu2>; + }; + gic-cpuif@3 { + compatible = "arm,gic-cpuif"; + cpuif-id = <3>; + cpu = <&cpu3>; + }; + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu4>; + }; + gic-cpuif@5 { + compatible = "arm,gic-cpuif"; + cpuif-id = <5>; + cpu = <&cpu5>; + }; + gic-cpuif@6 { + compatible = "arm,gic-cpuif"; + cpuif-id = <6>; + cpu = <&cpu6>; + }; + gic-cpuif@7 { + compatible = "arm,gic-cpuif"; + cpuif-id = <7>; + cpu = <&cpu7>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index ac870fb3fa0d..9584232ee6b6 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -228,6 +228,7 @@ }; clcd@1f0000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f0000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi index f1420368355b..6593398c11ae 100644 --- a/arch/arm/boot/dts/vexpress-v2m.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m.dtsi @@ -227,6 +227,7 @@ }; clcd@1f000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts index 9420053acc14..cc6a8c0cfe33 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA15"; arm,hbi = <0x237>; @@ -57,6 +59,8 @@ interrupts = <0 85 4>; clocks = <&oscclk5>; clock-names = "pxlclk"; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; }; memory-controller@2b0a0000 { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index d2803be4e1a8..f1dc620c5c45 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -9,11 +9,13 @@ /dts-v1/; +/memreserve/ 0xff000000 0x01000000; + / { model = "V2P-CA15_CA7"; arm,hbi = <0x249>; arm,vexpress,site = <0xf>; - compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress"; + compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress", "arm,generic"; interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; @@ -29,44 +31,106 @@ i2c1 = &v2m_i2c_pcie; }; - cpus { + clusters { #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { - device_type = "cpu"; - compatible = "arm,cortex-a15"; + cluster0: cluster@0 { reg = <0>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + }; }; - cpu1: cpu@1 { - device_type = "cpu"; - compatible = "arm,cortex-a15"; + cluster1: cluster@1 { reg = <1>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core2: core@0 { + reg = <0>; + }; + + core3: core@1 { + reg = <1>; + }; + + core4: core@2 { + reg = <2>; + }; + }; }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; cpu2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x100>; + cluster = <&cluster1>; + core = <&core2>; + clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; }; cpu3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x101>; + cluster = <&cluster1>; + core = <&core3>; + clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; }; cpu4: cpu@4 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x102>; + cluster = <&cluster1>; + core = <&core4>; + clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; + clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cluster = <&cluster0>; + core = <&core1>; + clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; }; }; memory@80000000 { device_type = "memory"; - reg = <0 0x80000000 0 0x40000000>; + reg = <0 0x80000000 0 0x80000000>; }; wdt@2a490000 { @@ -81,6 +145,8 @@ compatible = "arm,hdlcd"; reg = <0 0x2b000000 0 0x1000>; interrupts = <0 85 4>; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; clocks = <&oscclk5>; clock-names = "pxlclk"; }; @@ -102,6 +168,64 @@ <0 0x2c004000 0 0x2000>, <0 0x2c006000 0 0x2000>; interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + gic-cpuif@2 { + compatible = "arm,gic-cpuif"; + cpuif-id = <2>; + cpu = <&cpu2>; + }; + + gic-cpuif@3 { + compatible = "arm,gic-cpuif"; + cpuif-id = <3>; + cpu = <&cpu3>; + }; + + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu4>; + }; + }; + + cci@2c090000 { + compatible = "arm,cci-400"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + cci-pmu@2c099000 { + compatible = "arm,cci-400-pmu"; + reg = <0 0x2c099000 0 0x6000>; + interrupts = <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>, + <0 105 4>; }; memory-controller@7ffd0000 { @@ -125,6 +249,12 @@ clock-names = "apb_pclk"; }; + spc@7fff0000 { + compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc"; + reg = <0 0x7fff0000 0 0x1000>; + interrupts = <0 95 4>; + }; + timer { compatible = "arm,armv7-timer"; interrupts = <1 13 0xf08>, @@ -133,12 +263,21 @@ <1 10 0xf08>; }; - pmu { + pmu_a15 { compatible = "arm,cortex-a15-pmu"; + cluster = <&cluster0>; interrupts = <0 68 4>, <0 69 4>; }; + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + cluster = <&cluster1>; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + }; + oscclk6a: oscclk6a { /* Reference 24MHz clock */ compatible = "fixed-clock"; @@ -147,6 +286,15 @@ clock-output-names = "oscclk6a"; }; + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0x80100001>; + cpu_off = <0x80100002>; + cpu_on = <0x80100003>; + migrate = <0x80100004>; + }; + dcc { compatible = "arm,vexpress,config-bus"; arm,vexpress,config-bridge = <&v2m_sysreg>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts index c544a5504591..cf633ed6a1b4 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA5s"; arm,hbi = <0x225>; @@ -59,6 +61,8 @@ interrupts = <0 85 4>; clocks = <&oscclk3>; clock-names = "pxlclk"; + mode = "640x480-16@60"; + framebuffer = <0xbf000000 0x01000000>; }; memory-controller@2a150000 { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts index 62d9b225dcce..f83706bd3f9a 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts @@ -9,6 +9,8 @@ /dts-v1/; +/include/ "clcd-panels.dtsi" + / { model = "V2P-CA9"; arm,hbi = <0x191>; @@ -73,6 +75,8 @@ interrupts = <0 44 4>; clocks = <&oscclk1>, <&oscclk2>; clock-names = "clcdclk", "apb_pclk"; + mode = "XVGA"; + use_dma = <1>; }; memory-controller@100e0000 { diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 48434cbe3e89..462cd580fc2d 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -14,5 +14,9 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o + AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a +CFLAGS_REMOVE_mcpm_entry.o = -pg diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 000000000000..8fee70dfb302 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,864 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cpu_pm.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/clockchips.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/notifier.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/moduleparam.h> + +#include <asm/smp_plat.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/suspend.h> +#include <asm/mcpm.h> +#include <asm/bL_switcher.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power_cpu_migrate.h> + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here as the value _does_ change along some code paths. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id)); + return id & MPIDR_HWID_BITMASK; +} + +/* + * Get a global nanosecond time stamp for tracing. + */ +static s64 get_ns(void) +{ + struct timespec ts; + getnstimeofday(&ts); + return timespec_to_ns(&ts); +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_arg) +{ + unsigned ib_mpidr, ib_cpu, ib_cluster; + long volatile handshake, **handshake_ptr = _arg; + + pr_debug("%s\n", __func__); + + ib_mpidr = cpu_logical_map(smp_processor_id()); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + /* Advertise our handshake location */ + if (handshake_ptr) { + handshake = 0; + *handshake_ptr = &handshake; + } else + handshake = -1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* + * Let's wait until our inbound is alive. + */ + while (!handshake) { + wfe(); + smp_mb(); + } + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. + */ +#define STACK_SIZE 512 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + void *stack = current_thread_info() + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += clusterid * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; +static int bL_switcher_cpu_pairing[NR_CPUS]; + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, this_cpu, that_cpu; + unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; + struct completion inbound_alive; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; + long volatile *handshake_ptr; + int ipi_nr, ret; + + this_cpu = smp_processor_id(); + ob_mpidr = read_mpidr(); + ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); + ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); + BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); + + if (new_cluster_id == ob_cluster) + return 0; + + that_cpu = bL_switcher_cpu_pairing[this_cpu]; + ib_mpidr = cpu_logical_map(that_cpu); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", + this_cpu, ob_mpidr, ib_mpidr); + + this_cpu = smp_processor_id(); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); + mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); + + /* Install our "inbound alive" notifier. */ + init_completion(&inbound_alive); + ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); + ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]); + mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in bL_power_down(). + */ + gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0); + + /* + * Wait for the inbound to come up. This allows for other + * tasks to be scheduled in the mean time. + */ + wait_for_completion(&inbound_alive); + mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0); + + /* + * From this point we are entering the switch critical zone + * and can't sleep/schedule anymore. + */ + local_irq_disable(); + local_fiq_disable(); + trace_cpu_migrate_begin(get_ns(), ob_mpidr); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); + + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* + * Swap the physical CPUs in the logical map for this logical CPU. + * This must be flushed to RAM as the resume code + * needs to access it while the caches are still disabled. + */ + cpu_logical_map(this_cpu) = ib_mpidr; + cpu_logical_map(that_cpu) = ob_mpidr; + sync_cache_w(&cpu_logical_map(this_cpu)); + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); + BUG_ON(mpidr != ib_mpidr); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + + trace_cpu_migrate_finish(get_ns(), ib_mpidr); + local_fiq_enable(); + local_irq_enable(); + + *handshake_ptr = 1; + dsb_sev(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct bL_thread { + spinlock_t lock; + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; + struct completion started; + bL_switch_completion_handler completer; + void *completer_cookie; +}; + +static struct bL_thread bL_threads[NR_CPUS]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + bL_switch_completion_handler completer; + void *completer_cookie; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + + spin_lock(&t->lock); + cluster = t->wanted_cluster; + completer = t->completer; + completer_cookie = t->completer_cookie; + t->wanted_cluster = -1; + t->completer = NULL; + spin_unlock(&t->lock); + + if (cluster != -1) { + bL_switch_to(cluster); + + if (completer) + completer(completer_cookie); + } + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct * bL_switcher_thread_create(int cpu, void *arg) +{ + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; +} + +/* + * bL_switch_request_cb - Switch to a specific cluster for the given CPU, + * with completion notification via a callback + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * @completer: switch completion callback. if non-NULL, + * @completer(@completer_cookie) will be called on completion of + * the switch, in non-atomic context. + * @completer_cookie: opaque context argument for @completer. + * + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. + * + * If a @completer callback function is supplied, it will be called when + * the switch is complete. This can be used to determine asynchronously + * when the switch is complete, regardless of when bL_switch_request() + * returns. When @completer is supplied, no new switch request is permitted + * for the affected CPU until after the switch is complete, and @completer + * has returned. + */ +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; + + spin_lock(&t->lock); + if (t->completer) { + spin_unlock(&t->lock); + return -EBUSY; + } + t->completer = completer; + t->completer_cookie = completer_cookie; + t->wanted_cluster = new_cluster_id; + spin_unlock(&t->lock); + wake_up(&t->wq); + return 0; +} + +EXPORT_SYMBOL_GPL(bL_switch_request_cb); + +/* + * Detach an outstanding switch request. + * + * The switcher will continue with the switch request in the background, + * but the completer function will not be called. + * + * This may be necessary if the completer is in a kernel module which is + * about to be unloaded. + */ +void bL_switch_request_detach(unsigned int cpu, + bL_switch_completion_handler completer) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task) || !t->task) + return; + + spin_lock(&t->lock); + if (t->completer == completer) + t->completer = NULL; + spin_unlock(&t->lock); +} + +EXPORT_SYMBOL_GPL(bL_switch_request_detach); + +/* + * Activation and configuration code. + */ + +static DEFINE_MUTEX(bL_switcher_activation_lock); +static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier); +static unsigned int bL_switcher_active; +static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS]; +static cpumask_t bL_switcher_removed_logical_cpus; + +int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_register_notifier); + +int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier); + +static int bL_activation_notify(unsigned long val) +{ + int ret; + + ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL); + if (ret & NOTIFY_STOP_MASK) + pr_err("%s: notifier chain failed with status 0x%x\n", + __func__, ret); + return notifier_to_errno(ret); +} + +static void bL_switcher_restore_cpus(void) +{ + int i; + + for_each_cpu(i, &bL_switcher_removed_logical_cpus) + cpu_up(i); +} + +static int bL_switcher_halve_cpus(void) +{ + int i, j, cluster_0, gic_id, ret; + unsigned int cpu, cluster, mask; + cpumask_t available_cpus; + + /* First pass to validate what we have */ + mask = 0; + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster >= 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER)) + return -EINVAL; + mask |= (1 << cluster); + } + if (mask != 3) { + pr_err("%s: no CPU pairing possible\n", __func__); + return -EINVAL; + } + + /* + * Now let's do the pairing. We match each CPU with another CPU + * from a different cluster. To get a uniform scheduling behavior + * without fiddling with CPU topology and compute capacity data, + * we'll use logical CPUs initially belonging to the same cluster. + */ + memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing)); + cpumask_copy(&available_cpus, cpu_online_mask); + cluster_0 = -1; + for_each_cpu(i, &available_cpus) { + int match = -1; + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster_0 == -1) + cluster_0 = cluster; + if (cluster != cluster_0) + continue; + cpumask_clear_cpu(i, &available_cpus); + for_each_cpu(j, &available_cpus) { + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1); + /* + * Let's remember the last match to create "odd" + * pairing on purpose in order for other code not + * to assume any relation between physical and + * logical CPU numbers. + */ + if (cluster != cluster_0) + match = j; + } + if (match != -1) { + bL_switcher_cpu_pairing[i] = match; + cpumask_clear_cpu(match, &available_cpus); + pr_info("CPU%d paired with CPU%d\n", i, match); + } + } + + /* + * Now we disable the unwanted CPUs i.e. everything that has no + * pairing information (that includes the pairing counterparts). + */ + cpumask_clear(&bL_switcher_removed_logical_cpus); + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + + /* Let's take note of the GIC ID for this CPU */ + gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + bL_switcher_restore_cpus(); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + + if (bL_switcher_cpu_pairing[i] != -1) { + bL_switcher_cpu_original_cluster[i] = cluster; + continue; + } + + ret = cpu_down(i); + if (ret) { + bL_switcher_restore_cpus(); + return ret; + } + cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); + } + + return 0; +} + +/* Determine the logical CPU a given physical CPU is grouped on. */ +int bL_switcher_get_logical_index(u32 mpidr) +{ + int cpu; + + if (!bL_switcher_active) + return -EUNATCH; + + mpidr &= MPIDR_HWID_BITMASK; + for_each_online_cpu(cpu) { + int pairing = bL_switcher_cpu_pairing[cpu]; + if (pairing == -1) + continue; + if ((mpidr == cpu_logical_map(cpu)) || + (mpidr == cpu_logical_map(pairing))) + return cpu; + } + return -EINVAL; +} + +static void bL_switcher_trace_trigger_cpu(void *__always_unused info) +{ + trace_cpu_migrate_current(get_ns(), read_mpidr()); +} + +int bL_switcher_trace_trigger(void) +{ + int ret; + + preempt_disable(); + + bL_switcher_trace_trigger_cpu(NULL); + ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); + +static int bL_switcher_enable(void) +{ + int cpu, ret; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + if (bL_switcher_active) { + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return 0; + } + + pr_info("big.LITTLE switcher initializing\n"); + + ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE); + if (ret) + goto error; + + ret = bL_switcher_halve_cpus(); + if (ret) + goto error; + + bL_switcher_trace_trigger(); + + for_each_online_cpu(cpu) { + struct bL_thread *t = &bL_threads[cpu]; + spin_lock_init(&t->lock); + init_waitqueue_head(&t->wq); + init_completion(&t->started); + t->wanted_cluster = -1; + t->task = bL_switcher_thread_create(cpu, t); + } + + bL_switcher_active = 1; + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + pr_info("big.LITTLE switcher initialized\n"); + goto out; + +error: + pr_warning("big.LITTLE switcher initialization failed\n"); + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return ret; +} + +#ifdef CONFIG_SYSFS + +static void bL_switcher_disable(void) +{ + unsigned int cpu, cluster; + struct bL_thread *t; + struct task_struct *task; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + + if (!bL_switcher_active) + goto out; + + if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) { + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + goto out; + } + + bL_switcher_active = 0; + + /* + * To deactivate the switcher, we must shut down the switcher + * threads to prevent any other requests from being accepted. + * Then, if the final cluster for given logical CPU is not the + * same as the original one, we'll recreate a switcher thread + * just for the purpose of switching the CPU back without any + * possibility for interference from external requests. + */ + for_each_online_cpu(cpu) { + t = &bL_threads[cpu]; + task = t->task; + t->task = NULL; + if (!task || IS_ERR(task)) + continue; + kthread_stop(task); + /* no more switch may happen on this CPU at this point */ + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + init_completion(&t->started); + t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; + task = bL_switcher_thread_create(cpu, t); + if (!IS_ERR(task)) { + wait_for_completion(&t->started); + kthread_stop(task); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + } + /* If execution gets here, we're in trouble. */ + pr_crit("%s: unable to restore original cluster for CPU %d\n", + __func__, cpu); + pr_crit("%s: CPU %d can't be restored\n", + __func__, bL_switcher_cpu_pairing[cpu]); + cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu], + &bL_switcher_removed_logical_cpus); + } + + bL_switcher_restore_cpus(); + bL_switcher_trace_trigger(); + + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); +} + +static ssize_t bL_switcher_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", bL_switcher_active); +} + +static ssize_t bL_switcher_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + + switch (buf[0]) { + case '0': + bL_switcher_disable(); + ret = 0; + break; + case '1': + ret = bL_switcher_enable(); + break; + default: + ret = -EINVAL; + } + + return (ret >= 0) ? count : ret; +} + +static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret = bL_switcher_trace_trigger(); + + return ret ? ret : count; +} + +static struct kobj_attribute bL_switcher_active_attr = + __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); + +static struct kobj_attribute bL_switcher_trace_trigger_attr = + __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store); + +static struct attribute *bL_switcher_attrs[] = { + &bL_switcher_active_attr.attr, + &bL_switcher_trace_trigger_attr.attr, + NULL, +}; + +static struct attribute_group bL_switcher_attr_group = { + .attrs = bL_switcher_attrs, +}; + +static struct kobject *bL_switcher_kobj; + +static int __init bL_switcher_sysfs_init(void) +{ + int ret; + + bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); + if (!bL_switcher_kobj) + return -ENOMEM; + ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); + if (ret) + kobject_put(bL_switcher_kobj); + return ret; +} + +#endif /* CONFIG_SYSFS */ + +bool bL_switcher_get_enabled(void) +{ + mutex_lock(&bL_switcher_activation_lock); + + return bL_switcher_active; +} +EXPORT_SYMBOL_GPL(bL_switcher_get_enabled); + +void bL_switcher_put_enabled(void) +{ + mutex_unlock(&bL_switcher_activation_lock); +} +EXPORT_SYMBOL_GPL(bL_switcher_put_enabled); + +/* + * Veto any CPU hotplug operation while the switcher is active. + * We're just not ready to deal with that given the trickery involved. + */ +static int bL_switcher_hotplug_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + switch (action) { + case CPU_UP_PREPARE: + case CPU_DOWN_PREPARE: + if (bL_switcher_active) + return NOTIFY_BAD; + } + return NOTIFY_DONE; +} + +static struct notifier_block bL_switcher_hotplug_notifier = + { &bL_switcher_hotplug_callback, NULL, 0 }; + +#ifdef CONFIG_SCHED_HMP +static bool no_bL_switcher = true; +#else +static bool no_bL_switcher; +#endif +core_param(no_bL_switcher, no_bL_switcher, bool, 0644); + +static int __init bL_switcher_init(void) +{ + int ret; + + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + register_cpu_notifier(&bL_switcher_hotplug_notifier); + + if (!no_bL_switcher) { + ret = bL_switcher_enable(); + if (ret) + return ret; + } + +#ifdef CONFIG_SYSFS + ret = bL_switcher_sysfs_init(); + if (ret) + pr_err("%s: unable to create sysfs entry\n", __func__); +#endif + + return 0; +} + +late_initcall(bL_switcher_init); diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 000000000000..5e2dd197e728 --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -0,0 +1,71 @@ +/* + * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Dummy interface to user space for debugging purpose only. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <asm/uaccess.h> +#include <asm/bL_switcher.h> + +static ssize_t bL_switcher_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + unsigned char val[3]; + unsigned int cpu, cluster; + int ret; + + pr_debug("%s\n", __func__); + + if (len < 3) + return -EINVAL; + + if (copy_from_user(val, buf, 3)) + return -EFAULT; + + /* format: <cpu#>,<cluster#> */ + if (val[0] < '0' || val[0] > '4' || + val[1] != ',' || + val[2] < '0' || val[2] > '1') + return -EINVAL; + + cpu = val[0] - '0'; + cluster = val[2] - '0'; + ret = bL_switch_request(cpu, cluster); + + return ret ? : len; +} + +static const struct file_operations bL_switcher_fops = { + .write = bL_switcher_write, + .owner = THIS_MODULE, +}; + +static struct miscdevice bL_switcher_device = { + MISC_DYNAMIC_MINOR, + "b.L_switcher", + &bL_switcher_fops +}; + +static int __init bL_switcher_dummy_if_init(void) +{ + return misc_register(&bL_switcher_device); +} + +static void __exit bL_switcher_dummy_if_exit(void) +{ + misc_deregister(&bL_switcher_device); +} + +module_init(bL_switcher_dummy_if_init); +module_exit(bL_switcher_dummy_if_exit); diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 370236dd1a03..4a2b32fd53a1 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); } +extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; + +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val) +{ + unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; + poke[0] = poke_phys_addr; + poke[1] = poke_val; + __cpuc_flush_dcache_area((void *)poke, 8); + outer_clean_range(__pa(poke), __pa(poke + 2)); +} + static const struct mcpm_platform_ops *platform_ops; int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 8178705c4b24..0decb3c07165 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -15,6 +15,7 @@ #include <linux/linkage.h> #include <asm/mcpm.h> +#include <asm/assembler.h> #include "vlock.h" @@ -47,6 +48,7 @@ ENTRY(mcpm_entry_point) + ARM_BE8(setend be) THUMB( adr r12, BSYM(1f) ) THUMB( bx r12 ) THUMB( .thumb ) @@ -71,12 +73,19 @@ ENTRY(mcpm_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8, r11} + ldmia r5, {r0, r6, r7, r8, r11} + add r0, r5, r0 @ r0 = mcpm_entry_early_pokes add r6, r5, r6 @ r6 = mcpm_entry_vectors ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys add r8, r5, r8 @ r8 = mcpm_sync add r11, r5, r11 @ r11 = first_man_locks + @ Perform an early poke, if any + add r0, r0, r4, lsl #3 + ldmia r0, {r0, r1} + teq r0, #0 + strne r1, [r0] + mov r0, #MCPM_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = sync cluster base @@ -195,7 +204,8 @@ mcpm_entry_gated: .align 2 -3: .word mcpm_entry_vectors - . +3: .word mcpm_entry_early_pokes - . + .word mcpm_entry_vectors - 3b .word mcpm_power_up_setup_phys - 3b .word mcpm_sync - 3b .word first_man_locks - 3b @@ -214,6 +224,10 @@ first_man_locks: ENTRY(mcpm_entry_vectors) .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_entry_early_pokes, #object +ENTRY(mcpm_entry_early_pokes) + .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_power_up_setup_phys, #object ENTRY(mcpm_power_up_setup_phys) .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index accefe099182..a60052b24916 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -89,17 +89,43 @@ static inline u64 arch_counter_get_cntvct(void) return cval; } -static inline void __cpuinit arch_counter_set_user_access(void) +static inline u32 arch_timer_get_cntkctl(void) { u32 cntkctl; - asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r" (cntkctl)); + return cntkctl; +} - /* disable user access to everything */ - cntkctl &= ~((3 << 8) | (7 << 0)); - +static inline void arch_timer_set_cntkctl(u32 cntkctl) +{ asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl)); } + +static inline void __cpuinit arch_counter_set_user_access(void) +{ + u32 cntkctl = arch_timer_get_cntkctl(); + + /* Disable user access to both physical/virtual counters/timers */ + /* Also disable virtual event stream */ + cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN + | ARCH_TIMER_USR_VT_ACCESS_EN + | ARCH_TIMER_VIRT_EVT_EN + | ARCH_TIMER_USR_VCT_ACCESS_EN + | ARCH_TIMER_USR_PCT_ACCESS_EN); + arch_timer_set_cntkctl(cntkctl); +} + +static inline void arch_timer_evtstrm_enable(int divider) +{ + u32 cntkctl = arch_timer_get_cntkctl(); + cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK; + /* Set the divider and enable virtual event stream */ + cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) + | ARCH_TIMER_VIRT_EVT_EN; + arch_timer_set_cntkctl(cntkctl); + elf_hwcap |= HWCAP_EVTSTRM; +} + #endif #endif diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 05ee9eebad6b..e780afbcee54 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -53,6 +53,13 @@ #define put_byte_3 lsl #0 #endif +/* Select code for any configuration running in BE8 mode */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define ARM_BE8(code...) code +#else +#define ARM_BE8(code...) +#endif + /* * Data preload for architectures that support it */ diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index da1c77d39327..6447a0b7b127 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -301,8 +301,8 @@ static inline void atomic64_add(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_add\n" "1: ldrexd %0, %H0, [%3]\n" -" adds %0, %0, %4\n" -" adc %H0, %H0, %H4\n" +" adds %Q0, %Q0, %Q4\n" +" adc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -320,8 +320,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_add_return\n" "1: ldrexd %0, %H0, [%3]\n" -" adds %0, %0, %4\n" -" adc %H0, %H0, %H4\n" +" adds %Q0, %Q0, %Q4\n" +" adc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -341,8 +341,8 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_sub\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +" subs %Q0, %Q0, %Q4\n" +" sbc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -360,8 +360,8 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_sub_return\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +" subs %Q0, %Q0, %Q4\n" +" sbc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -428,9 +428,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v) __asm__ __volatile__("@ atomic64_dec_if_positive\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, #1\n" -" sbc %H0, %H0, #0\n" -" teq %H0, #0\n" +" subs %Q0, %Q0, #1\n" +" sbc %R0, %R0, #0\n" +" teq %R0, #0\n" " bmi 2f\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" @@ -459,8 +459,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) " teqeq %H0, %H5\n" " moveq %1, #0\n" " beq 2f\n" -" adds %0, %0, %6\n" -" adc %H0, %H0, %H6\n" +" adds %Q0, %Q0, %Q6\n" +" adc %R0, %R0, %R6\n" " strexd %2, %0, %H0, [%4]\n" " teq %2, #0\n" " bne 1b\n" diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 000000000000..482383b45c91 --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h @@ -0,0 +1,83 @@ +/* + * arch/arm/include/asm/bL_switcher.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +#include <linux/compiler.h> +#include <linux/types.h> + +typedef void (*bL_switch_completion_handler)(void *cookie); + +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie); +static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +{ + return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL); +} + +/* + * Register here to be notified about runtime enabling/disabling of + * the switcher. + * + * The notifier chain is called with the switcher activation lock held: + * the switcher will not be enabled or disabled during callbacks. + * Callbacks must not call bL_switcher_{get,put}_enabled(). + */ +#define BL_NOTIFY_PRE_ENABLE 0 +#define BL_NOTIFY_POST_ENABLE 1 +#define BL_NOTIFY_PRE_DISABLE 2 +#define BL_NOTIFY_POST_DISABLE 3 + +#ifdef CONFIG_BL_SWITCHER + +void bL_switch_request_detach(unsigned int cpu, + bL_switch_completion_handler completer); + +int bL_switcher_register_notifier(struct notifier_block *nb); +int bL_switcher_unregister_notifier(struct notifier_block *nb); + +/* + * Use these functions to temporarily prevent enabling/disabling of + * the switcher. + * bL_switcher_get_enabled() returns true if the switcher is currently + * enabled. Each call to bL_switcher_get_enabled() must be followed + * by a call to bL_switcher_put_enabled(). These functions are not + * recursive. + */ +bool bL_switcher_get_enabled(void); +void bL_switcher_put_enabled(void); + +int bL_switcher_trace_trigger(void); +int bL_switcher_get_logical_index(u32 mpidr); + +#else +static void bL_switch_request_detach(unsigned int cpu, + bL_switch_completion_handler completer) { } + +static inline int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline bool bL_switcher_get_enabled(void) { return false; } +static inline void bL_switcher_put_enabled(void) { } +static inline int bL_switcher_trace_trigger(void) { return 0; } +static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; } +#endif /* CONFIG_BL_SWITCHER */ + +#endif diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 7af5c6c3653a..b274bde24905 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -2,6 +2,8 @@ #define _ASMARM_BUG_H #include <linux/linkage.h> +#include <linux/types.h> +#include <asm/opcodes.h> #ifdef CONFIG_BUG @@ -12,10 +14,10 @@ */ #ifdef CONFIG_THUMB2_KERNEL #define BUG_INSTR_VALUE 0xde02 -#define BUG_INSTR_TYPE ".hword " +#define BUG_INSTR(__value) __inst_thumb16(__value) #else #define BUG_INSTR_VALUE 0xe7f001f2 -#define BUG_INSTR_TYPE ".word " +#define BUG_INSTR(__value) __inst_arm(__value) #endif @@ -33,7 +35,7 @@ #define __BUG(__file, __line, __value) \ do { \ - asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n" \ + asm volatile("1:\t" BUG_INSTR(__value) "\n" \ ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ @@ -48,7 +50,7 @@ do { \ #define __BUG(__file, __line, __value) \ do { \ - asm volatile(BUG_INSTR_TYPE #__value); \ + asm volatile(BUG_INSTR(__value) "\n"); \ unreachable(); \ } while (0) #endif /* CONFIG_DEBUG_BUGVERBOSE */ diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index a25e62d2de6e..2059f019bef4 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -437,4 +437,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr)) #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr)) +/* + * Disabling cache access for one CPU in an ARMv7 SMP system is tricky. + * To do so we must: + * + * - Clear the SCTLR.C bit to prevent further cache allocations + * - Flush the desired level of cache + * - Clear the ACTLR "SMP" bit to disable local coherency + * + * ... and so without any intervening memory access in between those steps, + * not even to the stack. + * + * WARNING -- After this has been called: + * + * - No ldrex/strex (and similar) instructions must be used. + * - The CPU is obviously no longer coherent with the other CPUs. + * - This is unlikely to work as expected if Linux is running non-secure. + * + * Note: + * + * - This is known to apply to several ARMv7 processor implementations, + * however some exceptions may exist. Caveat emptor. + * + * - The clobber list is dictated by the call to v7_flush_dcache_*. + * fp is preserved to the stack explicitly prior disabling the cache + * since adding it to the clobber list is incompatible with having + * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering + * trampoline are inserted by the linker and to keep sp 64-bit aligned. + */ +#define v7_exit_coherency_flush(level) \ + asm volatile( \ + "stmfd sp!, {fp, ip} \n\t" \ + "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ + "bic r0, r0, #"__stringify(CR_C)" \n\t" \ + "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ + "isb \n\t" \ + "bl v7_flush_dcache_"__stringify(level)" \n\t" \ + "clrex \n\t" \ + "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \ + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ + "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ + "isb \n\t" \ + "dsb \n\t" \ + "ldmfd sp!, {fp, ip}" \ + : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ + "r9","r10","lr","memory" ) + #endif diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 1f3262e99d81..cedd3721318b 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -61,6 +61,20 @@ static inline void set_cr(unsigned int val) isb(); } +static inline unsigned int get_auxcr(void) +{ + unsigned int val; + asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val)); + return val; +} + +static inline void set_auxcr(unsigned int val) +{ + asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" + : : "r" (val)); + isb(); +} + #ifndef CONFIG_SMP extern void adjust_cr(unsigned long mask, unsigned long set); #endif diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index 3ed37b4d93da..4f8e9e5514b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h @@ -2,10 +2,9 @@ #define ASMARM_DMA_CONTIGUOUS_H #ifdef __KERNEL__ -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA #include <linux/types.h> -#include <asm-generic/dma-contiguous.h> void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 56211f2084ef..f4b46d39b9cf 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fp elf_fpregset_t; -#define EM_ARM 40 - #define EF_ARM_EABI_MASK 0xff000000 #define EF_ARM_EABI_UNKNOWN 0x00000000 #define EF_ARM_EABI_VER1 0x01000000 diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index f89515adac60..eb577f4f5f70 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level) #endif -#define HAVE_ARCH_CALLER_ADDR - -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)return_address(1)) -#define CALLER_ADDR2 ((unsigned long)return_address(2)) -#define CALLER_ADDR3 ((unsigned long)return_address(3)) -#define CALLER_ADDR4 ((unsigned long)return_address(4)) -#define CALLER_ADDR5 ((unsigned long)return_address(5)) -#define CALLER_ADDR6 ((unsigned long)return_address(6)) +#define ftrace_return_addr(n) return_address(n) #endif /* ifndef __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 2740c2a2df63..3d7351c844aa 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 6 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index 0cf7a6b842ff..ad774f37c47c 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -24,8 +24,8 @@ #define TRACER_TIMEOUT 10000 #define etm_writel(t, v, x) \ - (__raw_writel((v), (t)->etm_regs + (x))) -#define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x))) + (writel_relaxed((v), (t)->etm_regs + (x))) +#define etm_readl(t, x) (readl_relaxed((t)->etm_regs + (x))) /* CoreSight Management Registers */ #define CSMR_LOCKACCESS 0xfb0 @@ -142,8 +142,8 @@ #define ETBFF_TRIGFL BIT(10) #define etb_writel(t, v, x) \ - (__raw_writel((v), (t)->etb_regs + (x))) -#define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x))) + (writel_relaxed((v), (t)->etb_regs + (x))) +#define etb_readl(t, x) (readl_relaxed((t)->etb_regs + (x))) #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) #define etm_unlock(t) \ diff --git a/arch/arm/include/asm/hardware/debug-pl01x.S b/arch/arm/include/asm/hardware/debug-pl01x.S index f9fd083eff63..6489d1ffe3c8 100644 --- a/arch/arm/include/asm/hardware/debug-pl01x.S +++ b/arch/arm/include/asm/hardware/debug-pl01x.S @@ -18,12 +18,14 @@ .macro waituart,rd,rx 1001: ldr \rd, [\rx, #UART01x_FR] + ARM_BE8( rev \rd, \rd ) tst \rd, #UART01x_FR_TXFF bne 1001b .endm .macro busyuart,rd,rx 1001: ldr \rd, [\rx, #UART01x_FR] + ARM_BE8( rev \rd, \rd ) tst \rd, #UART01x_FR_BUSY bne 1001b .endm diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h index 48066ce9ea34..0a9d5dd93294 100644 --- a/arch/arm/include/asm/kgdb.h +++ b/arch/arm/include/asm/kgdb.h @@ -11,6 +11,7 @@ #define __ARM_KGDB_H__ #include <linux/ptrace.h> +#include <asm/opcodes.h> /* * GDB assumes that we're a user process being debugged, so @@ -41,7 +42,7 @@ static inline void arch_kgdb_breakpoint(void) { - asm(".word 0xe7ffdeff"); + asm(__inst_arm(0xe7ffdeff)); } extern void kgdb_handle_bus_error(void); diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 308ad7d6f98b..75bf07910b81 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#include <linux/types.h> + #ifndef __ASSEMBLY__ struct tag; @@ -16,8 +18,10 @@ struct pt_regs; struct smp_operations; #ifdef CONFIG_SMP #define smp_ops(ops) (&(ops)) +#define smp_init_ops(ops) (&(ops)) #else #define smp_ops(ops) (struct smp_operations *)NULL +#define smp_init_ops(ops) (bool (*)(void))NULL #endif struct machine_desc { @@ -41,6 +45,7 @@ struct machine_desc { unsigned char reserve_lp2 :1; /* never has lp2 */ char restart_mode; /* default restart mode */ struct smp_operations *smp; /* SMP operations */ + bool (*smp_init)(void); void (*fixup)(struct tag *, char **, struct meminfo *); void (*reserve)(void);/* reserve mem blocks */ diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 0f7b7620e9a5..7626a7fd4938 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -42,6 +42,14 @@ extern void mcpm_entry_point(void); void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); /* + * This sets an early poke i.e a value to be poked into some address + * from very early assembly code before the CPU is ungated. The + * address must be physical, and if 0 then nothing will happen. + */ +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val); + +/* * CPU/cluster power operations API for higher subsystems to use. */ diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 6f18da09668b..64fd15159b7d 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -16,7 +16,7 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID #define ASID_BITS 8 #define ASID_MASK ((~0ULL) << ASID_BITS) -#define ASID(mm) ((mm)->context.id.counter & ~ASID_MASK) +#define ASID(mm) ((unsigned int)((mm)->context.id.counter & ~ASID_MASK)) #else #define ASID(mm) (0) #endif diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index f24edad26c70..0cd7824ca762 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -62,9 +62,19 @@ struct pmu_hw_events { raw_spinlock_t pmu_lock; }; +struct cpupmu_regs { + u32 pmc; + u32 pmcntenset; + u32 pmuseren; + u32 pmintenset; + u32 pmxevttype[8]; + u32 pmxevtcnt[8]; +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + cpumask_t valid_cpus; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); @@ -81,6 +91,8 @@ struct arm_pmu { int (*request_irq)(struct arm_pmu *, irq_handler_t handler); void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); + void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *); + void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *); int num_events; atomic_t active_events; struct mutex reserve_mutex; diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index ce0dbe7c1625..f0a8627c9f1c 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,6 +16,10 @@ #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL0 0 +#define PSCI_POWER_STATE_AFFINITY_LEVEL1 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL2 2 +#define PSCI_POWER_STATE_AFFINITY_LEVEL3 3 struct psci_power_state { u16 id; @@ -32,5 +36,22 @@ struct psci_operations { }; extern struct psci_operations psci_ops; +extern struct smp_operations psci_smp_ops; +#ifdef CONFIG_ARM_PSCI +void psci_init(void); +bool psci_smp_available(void); +#else +static inline void psci_init(void) { } +static inline bool psci_smp_available(void) { return false; } +#endif + +#ifdef CONFIG_ARM_PSCI +extern int __init psci_probe(void); +#else +static inline int psci_probe(void) +{ + return -ENODEV; +} +#endif #endif /* __ASM_ARM_PSCI_H */ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index d3a22bebe6ce..610ccf33f5e7 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +extern int register_ipi_completion(struct completion *completion, int cpu); + struct smp_operations { #ifdef CONFIG_SMP /* diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 58b8b84adcd2..983fa7c153a2 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -26,11 +26,45 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask); + +#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE +/* Common values for CPUs */ +#ifndef SD_CPU_INIT +#define SD_CPU_INIT (struct sched_domain) { \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ + \ + .flags = 0*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 0*SD_BALANCE_WAKE \ + | 1*SD_WAKE_AFFINE \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + , \ + .last_balance = jiffies, \ + .balance_interval = 1, \ +} +#endif +#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */ #else static inline void init_cpu_topology(void) { } static inline void store_cpu_topology(unsigned int cpuid) { } +static inline int cluster_to_logical_mask(unsigned int socket_id, + cpumask_t *cluster_mask) { return -EINVAL; } #endif diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 3688fd15a32d..7dcc10d67253 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -25,6 +25,7 @@ #define HWCAP_IDIVT (1 << 18) #define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */ #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) - +#define HWCAP_LPAE (1 << 20) +#define HWCAP_EVTSTRM (1 << 21) #endif /* _UAPI__ASMARM_HWCAP_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..aa775438388c 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,7 +17,8 @@ CFLAGS_REMOVE_return_address.o = -pg obj-y := elf.o entry-armv.o entry-common.o irq.o opcodes.o \ process.o ptrace.o return_address.o sched_clock.o \ - setup.o signal.o stacktrace.o sys_arm.o time.o traps.o + setup.o signal.o sigreturn_codes.o \ + stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o @@ -82,6 +83,9 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o -obj-$(CONFIG_ARM_PSCI) += psci.o +ifeq ($(CONFIG_ARM_PSCI),y) +obj-y += psci.o +obj-$(CONFIG_SMP) += psci_smp.o +endif extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 32640ae7750f..45a68d6bb2a3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -416,9 +416,8 @@ __und_usr: bne __und_usr_thumb sub r4, r2, #4 @ ARM instr at LR - 4 1: ldrt r0, [r4] -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r0, r0 @ little endian instruction -#endif + ARM_BE8(rev r0, r0) @ little endian instruction + @ r0 = 32-bit ARM instruction which caused the exception @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index bc5bc0a97131..8c79344552d5 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -379,9 +379,7 @@ ENTRY(vector_swi) #else ldr r10, [lr, #-4] @ get SWI instruction #endif -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r10, r10 @ little endian instruction -#endif + ARM_BE8(rev r10, r10) @ little endian instruction #elif defined(CONFIG_AEABI) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 8bac553fe213..11284e744c80 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -77,6 +77,7 @@ __HEAD ENTRY(stext) + ARM_BE8(setend be ) @ ensure we are in BE8 mode THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, @@ -342,7 +343,6 @@ __turn_mmu_on_loc: .long __turn_mmu_on_end #if defined(CONFIG_SMP) - __CPUINIT ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. @@ -351,6 +351,9 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ + + ARM_BE8(setend be) @ ensure we are in BE8 mode + #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install_secondary #endif @@ -584,8 +587,10 @@ __fixup_a_pv_table: b 2f 1: add r7, r3 ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) and ip, 0x8f00 orr ip, r6 @ mask in offset bits 31-24 +ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot @@ -594,8 +599,14 @@ __fixup_a_pv_table: #else b 2f 1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + orr ip, ip, r6, lsl#24 +#else bic ip, ip, #0x000000ff orr ip, ip, r6 @ mask in offset bits 31-24 +#endif str ip, [r7, r3] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 1fd749ee4a1b..1b803117ed91 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = { static void __init pm_init(void) { - cpu_pm_register_notifier(&dbg_cpu_pm_nb); + if (has_ossr) + cpu_pm_register_notifier(&dbg_cpu_pm_nb); } #else static inline void pm_init(void) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1e9be5d25e56..7e137873083d 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -24,6 +24,7 @@ #include <asm/sections.h> #include <asm/smp_plat.h> #include <asm/unwind.h> +#include <asm/opcodes.h> #ifdef CONFIG_XIP_KERNEL /* @@ -60,6 +61,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, Elf32_Sym *sym; const char *symname; s32 offset; + u32 tmp; #ifdef CONFIG_THUMB2_KERNEL u32 upper, lower, sign, j1, j2; #endif @@ -95,7 +97,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: - offset = (*(u32 *)loc & 0x00ffffff) << 2; + offset = __mem_to_opcode_arm(*(u32 *)loc); + offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; @@ -111,9 +114,10 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, } offset >>= 2; + offset &= 0x00ffffff; - *(u32 *)loc &= 0xff000000; - *(u32 *)loc |= offset & 0x00ffffff; + *(u32 *)loc &= __opcode_to_mem_arm(0xff000000); + *(u32 *)loc |= __opcode_to_mem_arm(offset); break; case R_ARM_V4BX: @@ -121,8 +125,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, * other bits to re-code instruction as * MOV PC,Rm. */ - *(u32 *)loc &= 0xf000000f; - *(u32 *)loc |= 0x01a0f000; + *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f); + *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000); break; case R_ARM_PREL31: @@ -132,7 +136,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: - offset = *(u32 *)loc; + offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; @@ -140,16 +144,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) offset >>= 16; - *(u32 *)loc &= 0xfff0f000; - *(u32 *)loc |= ((offset & 0xf000) << 4) | - (offset & 0x0fff); + tmp &= 0xfff0f000; + tmp |= ((offset & 0xf000) << 4) | + (offset & 0x0fff); + + *(u32 *)loc = __opcode_to_mem_arm(tmp); break; #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * 25 bit signed address range (Thumb-2 BL and B.W @@ -198,17 +204,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, sign = (offset >> 24) & 1; j1 = sign ^ (~(offset >> 23) & 1); j2 = sign ^ (~(offset >> 22) & 1); - *(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) | + upper = (u16)((upper & 0xf800) | (sign << 10) | ((offset >> 12) & 0x03ff)); - *(u16 *)(loc + 2) = (u16)((lower & 0xd000) | - (j1 << 13) | (j2 << 11) | - ((offset >> 1) & 0x07ff)); + lower = (u16)((lower & 0xd000) | + (j1 << 13) | (j2 << 11) | + ((offset >> 1) & 0x07ff)); + + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * MOVT/MOVW instructions encoding in Thumb-2: @@ -229,12 +238,14 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) offset >>= 16; - *(u16 *)loc = (u16)((upper & 0xfbf0) | - ((offset & 0xf000) >> 12) | - ((offset & 0x0800) >> 1)); - *(u16 *)(loc + 2) = (u16)((lower & 0x8f00) | - ((offset & 0x0700) << 4) | - (offset & 0x00ff)); + upper = (u16)((upper & 0xfbf0) | + ((offset & 0xf000) >> 12) | + ((offset & 0x0800) >> 1)); + lower = (u16)((lower & 0x8f00) | + ((offset & 0x0700) << 4) | + (offset & 0x00ff)); + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; #endif diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ace0ce8f6641..b41749fe56dc 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -12,6 +12,7 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -86,6 +87,9 @@ armpmu_map_event(struct perf_event *event, return armpmu_map_cache_event(cache_map, config); case PERF_TYPE_RAW: return armpmu_map_raw_event(raw_event_mask, config); + default: + if (event->attr.type >= PERF_TYPE_MAX) + return armpmu_map_raw_event(raw_event_mask, config); } return -ENOENT; @@ -163,6 +167,8 @@ armpmu_stop(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to update the counter, so ignore * PERF_EF_UPDATE, see comments in armpmu_start(). @@ -179,6 +185,8 @@ static void armpmu_start(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to reprogram the period, so ignore * PERF_EF_RELOAD, see the comment below. @@ -206,6 +214,9 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; clear_bit(idx, hw_events->used_mask); @@ -222,6 +233,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return 0; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -431,6 +446,10 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 1f2740e3dbc0..0b48a38e3cf4 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -19,6 +19,7 @@ #define pr_fmt(fmt) "CPU PMU: " fmt #include <linux/bitmap.h> +#include <linux/cpu_pm.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/of.h> @@ -31,33 +32,36 @@ #include <asm/pmu.h> /* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; +static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu); static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs); + /* * Despite the names, these two functions are CPU-specific and are used * by the OProfile/perf code. */ const char *perf_pmu_name(void) { - if (!cpu_pmu) + struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); + if (!pmu) return NULL; - return cpu_pmu->name; + return pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { - int max_events = 0; + struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; + if (!pmu) + return 0; - return max_events; + return pmu->num_events; } EXPORT_SYMBOL_GPL(perf_num_counters); @@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu = -1; irqs = min(pmu_device->num_resources, num_possible_cpus()); for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); if (irq >= 0) @@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu = -1; if (!pmu_device) return -ENODEV; @@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) for (i = 0; i < irqs; ++i) { err = 0; + cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); irq = platform_get_irq(pmu_device, i); if (irq < 0) continue; @@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) * assume that we're running on a uniprocessor machine and * continue. Otherwise, continue without this interrupt. */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", irq, i); continue; @@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return err; } - cpumask_set_cpu(i, &cpu_pmu->active_irqs); + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); } return 0; @@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) static void cpu_pmu_init(struct arm_pmu *cpu_pmu) { int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) { struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); events->events = per_cpu(hw_events, cpu); events->used_mask = per_cpu(used_mask, cpu); @@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1); } /* @@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) static int __cpuinit cpu_pmu_notify(struct notifier_block *b, unsigned long action, void *hcpu) { + struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu); + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) return NOTIFY_DONE; - if (cpu_pmu && cpu_pmu->reset) - cpu_pmu->reset(cpu_pmu); + if (pmu && pmu->reset) + pmu->reset(pmu); else return NOTIFY_DONE; return NOTIFY_OK; } +static int cpu_pmu_pm_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + int cpu = smp_processor_id(); + struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu); + struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu); + + if (!pmu) + return NOTIFY_DONE; + + if (action == CPU_PM_ENTER && pmu->save_regs) { + pmu->save_regs(pmu, pmuregs); + } else if (action == CPU_PM_EXIT && pmu->restore_regs) { + pmu->restore_regs(pmu, pmuregs); + } + + return NOTIFY_OK; +} + static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = { .notifier_call = cpu_pmu_notify, }; +static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = { + .notifier_call = cpu_pmu_pm_notify, +}; + /* * PMU platform driver and devicetree bindings. */ @@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu) } } + /* assume PMU support all the CPUs in this case */ + cpumask_setall(&pmu->valid_cpus); + put_cpu(); return ret; } @@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu) static int cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; - int (*init_fn)(struct arm_pmu *); struct device_node *node = pdev->dev.of_node; struct arm_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!"); - return -ENOSPC; - } + int ret = 0; + int cpu; pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); if (!pmu) { @@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) } if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - ret = init_fn(pmu); + smp_call_func_t init_fn = (smp_call_func_t)of_id->data; + struct device_node *ncluster; + int cluster = -1; + cpumask_t sibling_mask; + + ncluster = of_parse_phandle(node, "cluster", 0); + if (ncluster) { + int len; + const u32 *hwid; + hwid = of_get_property(ncluster, "reg", &len); + if (hwid && len == 4) + cluster = be32_to_cpup(hwid); + } + /* set sibling mask to all cpu mask if socket is not specified */ + if (cluster == -1 || + cluster_to_logical_mask(cluster, &sibling_mask)) + cpumask_setall(&sibling_mask); + + smp_call_function_any(&sibling_mask, init_fn, pmu, 1); + + /* now set the valid_cpus after init */ + cpumask_copy(&pmu->valid_cpus, &sibling_mask); } else { ret = probe_current_pmu(pmu); } @@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) goto out_free; } - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - cpu_pmu_init(cpu_pmu); - ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); + for_each_cpu_mask(cpu, pmu->valid_cpus) + per_cpu(cpu_pmu, cpu) = pmu; + + pmu->plat_device = pdev; + cpu_pmu_init(pmu); + ret = armpmu_register(pmu, -1); if (!ret) return 0; @@ -313,9 +366,17 @@ static int __init register_pmu_driver(void) if (err) return err; + err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier); + if (err) { + unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + return err; + } + err = platform_driver_register(&cpu_pmu_driver); - if (err) + if (err) { + cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier); unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + } return err; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 039cffb053a7..654db5030c31 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) } #endif +static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu, + struct cpupmu_regs *regs) +{ + unsigned int cnt; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc)); + if (!(regs->pmc & ARMV7_PMNC_E)) + return; + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset)); + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren)); + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset)); + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0])); + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 1" + : "=r"(regs->pmxevttype[cnt])); + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r"(regs->pmxevtcnt[cnt])); + } + return; +} + +static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu, + struct cpupmu_regs *regs) +{ + unsigned int cnt; + if (!(regs->pmc & ARMV7_PMNC_E)) + return; + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset)); + asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren)); + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset)); + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0])); + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mcr p15, 0, %0, c9, c13, 1" + : : "r"(regs->pmxevttype[cnt])); + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r"(regs->pmxevtcnt[cnt])); + } + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc)); +} + static void armv7pmu_enable_event(struct perf_event *event) { unsigned long flags; @@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; + cpu_pmu->save_regs = armv7pmu_save_regs; + cpu_pmu->restore_regs = armv7pmu_restore_regs; cpu_pmu->max_period = (1LLU << 32) - 1; }; @@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void) static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A8"; + cpu_pmu->name = "ARMv7_Cortex_A8"; cpu_pmu->map_event = armv7_a8_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A9"; + cpu_pmu->name = "ARMv7_Cortex_A9"; cpu_pmu->map_event = armv7_a9_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A5"; + cpu_pmu->name = "ARMv7_Cortex_A5"; cpu_pmu->map_event = armv7_a5_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A15"; + cpu_pmu->name = "ARMv7_Cortex_A15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; @@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A7"; + cpu_pmu->name = "ARMv7_Cortex_A7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 36531643cc2c..0daf4f252284 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/of.h> +#include <linux/string.h> #include <asm/compiler.h> #include <asm/errno.h> @@ -26,6 +27,11 @@ struct psci_operations psci_ops; +/* Type of psci support. Currently can only be enabled or disabled */ +#define PSCI_SUP_DISABLED 0 +#define PSCI_SUP_ENABLED 1 + +static unsigned int psci; static int (*invoke_psci_fn)(u32, u32, u32, u32); enum psci_function { @@ -42,6 +48,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; #define PSCI_RET_EOPNOTSUPP -1 #define PSCI_RET_EINVAL -2 #define PSCI_RET_EPERM -3 +#define PSCI_RET_EALREADYON -4 static int psci_to_linux_errno(int errno) { @@ -54,6 +61,8 @@ static int psci_to_linux_errno(int errno) return -EINVAL; case PSCI_RET_EPERM: return -EPERM; + case PSCI_RET_EALREADYON: + return -EAGAIN; }; return -EINVAL; @@ -158,15 +167,18 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; -static int __init psci_init(void) +void __init psci_init(void) { struct device_node *np; const char *method; u32 id; + if (psci == PSCI_SUP_DISABLED) + return; + np = of_find_matching_node(NULL, psci_of_match); if (!np) - return 0; + return; pr_info("probing function IDs from device-tree\n"); @@ -206,6 +218,35 @@ static int __init psci_init(void) out_put_node: of_node_put(np); - return 0; + return; +} + +int __init psci_probe(void) +{ + struct device_node *np; + int ret = -ENODEV; + + if (psci == PSCI_SUP_ENABLED) { + np = of_find_matching_node(NULL, psci_of_match); + if (np) + ret = 0; + } + + of_node_put(np); + return ret; +} + +static int __init early_psci(char *val) +{ + int ret = 0; + + if (strcmp(val, "enable") == 0) + psci = PSCI_SUP_ENABLED; + else if (strcmp(val, "disable") == 0) + psci = PSCI_SUP_DISABLED; + else + ret = -EINVAL; + + return ret; } -early_initcall(psci_init); +early_param("psci", early_psci); diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c new file mode 100644 index 000000000000..23a11424c568 --- /dev/null +++ b/arch/arm/kernel/psci_smp.c @@ -0,0 +1,84 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/smp.h> +#include <linux/of.h> + +#include <asm/psci.h> +#include <asm/smp_plat.h> + +/* + * psci_smp assumes that the following is true about PSCI: + * + * cpu_suspend Suspend the execution on a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * cpu_off Power down a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * no return on successful call + * + * cpu_on Power up a CPU + * @cpuid cpuid of target CPU, as from MPIDR + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * migrate Migrate the context to a different CPU + * @cpuid cpuid of target CPU, as from MPIDR + * returns 0 success, < 0 on failure + * + */ + +extern void secondary_startup(void); + +static int __cpuinit psci_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + if (psci_ops.cpu_on) + return psci_ops.cpu_on(cpu_logical_map(cpu), + __pa(secondary_startup)); + return -ENODEV; +} + +#ifdef CONFIG_HOTPLUG_CPU +void __ref psci_cpu_die(unsigned int cpu) +{ + const struct psci_power_state ps = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + if (psci_ops.cpu_off) + psci_ops.cpu_off(ps); + + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); +} +#else +#define psci_cpu_die NULL +#endif + +bool __init psci_smp_available(void) +{ + /* is cpu_on available at least? */ + return (psci_ops.cpu_on != NULL); +} + +struct smp_operations __initdata psci_smp_ops = { + .smp_boot_secondary = psci_boot_secondary, + .cpu_die = psci_cpu_die, +}; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index eb83bcc70ec8..29beb8c76560 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -37,6 +37,7 @@ #include <asm/cputype.h> #include <asm/elf.h> #include <asm/procinfo.h> +#include <asm/psci.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -261,6 +262,19 @@ static int cpu_has_aliasing_icache(unsigned int arch) int aliasing_icache; unsigned int id_reg, num_sets, line_size; +#ifdef CONFIG_BIG_LITTLE + /* + * We expect a combination of Cortex-A15 and Cortex-A7 cores. + * A7 = VIPT aliasing I-cache + * A15 = PIPT (non-aliasing) I-cache + * To cater for this discrepancy, let's assume aliasing I-cache + * all the time. This means unneeded extra work on the A15 but + * only ptrace is affected which is not performance critical. + */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0) + return 1; +#endif + /* PIPT caches never alias. */ if (icache_is_pipt()) return 0; @@ -818,9 +832,15 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); arm_dt_init_cpu_maps(); + psci_init(); #ifdef CONFIG_SMP if (is_smp()) { - smp_set_ops(mdesc->smp); + if (!mdesc->smp_init || !mdesc->smp_init()) { + if (psci_smp_available()) + smp_set_ops(&psci_smp_ops); + else if (mdesc->smp) + smp_set_ops(mdesc->smp); + } smp_init_cpus(); } #endif @@ -894,6 +914,9 @@ static const char *hwcap_str[] = { "vfpv4", "idiva", "idivt", + "vfpd32", + "lpae", + "evtstrm", NULL }; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 5a42c12767af..3c23086dc8e2 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -21,29 +21,7 @@ #include <asm/unistd.h> #include <asm/vfp.h> -/* - * For ARM syscalls, we encode the syscall number into the instruction. - */ -#define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) -#define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) - -/* - * With EABI, the syscall number has to be loaded into r7. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -/* - * For Thumb syscalls, we pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -static const unsigned long sigreturn_codes[7] = { - MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, -}; +extern const unsigned long sigreturn_codes[7]; static unsigned long signal_return_offset; diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S new file mode 100644 index 000000000000..3c5d0f2170fd --- /dev/null +++ b/arch/arm/kernel/sigreturn_codes.S @@ -0,0 +1,80 @@ +/* + * sigreturn_codes.S - code sinpets for sigreturn syscalls + * + * Created by: Victor Kamensky, 2013-08-13 + * Copyright: (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <asm/unistd.h> + +/* + * For ARM syscalls, we encode the syscall number into the instruction. + * With EABI, the syscall number has to be loaded into r7. As result + * ARM syscall sequence snippet will have move and svc in .arm encoding + * + * For Thumb syscalls, we pass the syscall number via r7. We therefore + * need two 16-bit instructions in .thumb encoding + * + * Please note sigreturn_codes code are not executed in place. Instead + * they just copied by kernel into appropriate places. Code inside of + * arch/arm/kernel/signal.c is very sensitive to layout of these code + * snippets. + */ + +#if __LINUX_ARM_ARCH__ <= 4 + /* + * Note we manually set minimally required arch that supports + * required thumb opcodes for early arch versions. It is OK + * for this file to be used in combination with other + * lower arch variants, since these code snippets are only + * used as input data. + */ + .arch armv4t +#endif + + .section .rodata + .global sigreturn_codes + .type sigreturn_codes, #object + + .arm + +sigreturn_codes: + + /* ARM sigreturn syscall code snippet */ + mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) + + /* Thumb sigreturn syscall code snippet */ + .thumb + movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* ARM sigreturn_rt syscall code snippet */ + .arm + mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) + + /* Thumb sigreturn_rt syscall code snippet */ + .thumb + movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* + * Note on addtional space: setup_return in signal.c + * algorithm uses two words copy regardless whether + * it is thumb case or not, so we need additional + * word after real last entry. + */ + .arm + .space 4 + + .size sigreturn_codes, . - sigreturn_codes diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 987dcf33415c..baf4d28213a5 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -4,6 +4,7 @@ #include <asm/assembler.h> #include <asm/glue-cache.h> #include <asm/glue-proc.h> +#include "entry-header.S" .text /* @@ -30,9 +31,8 @@ ENTRY(__cpu_suspend) mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp #ifdef CONFIG_SMP - ALT_SMP(mrc p15, 0, lr, c0, c0, 5) - ALT_UP(mov lr, #0) - and lr, lr, #15 + get_thread_info r5 + ldr lr, [r5, #TI_CPU] @ cpu logical index add r3, r3, lr, lsl #2 #endif bl __cpu_suspend_save @@ -81,11 +81,15 @@ ENDPROC(cpu_resume_after_mmu) .data .align ENTRY(cpu_resume) +ARM_BE8(setend be) @ ensure we are in BE mode #ifdef CONFIG_SMP + mov r1, #0 @ fall-back logical index for UP + ALT_SMP(mrc p15, 0, r0, c0, c0, 5) + ALT_UP_B(1f) + bic r0, #0xff000000 + bl cpu_logical_index @ return logical index in r1 +1: adr r0, sleep_save_sp - ALT_SMP(mrc p15, 0, r1, c0, c0, 5) - ALT_UP(mov r1, #0) - and r1, r1, #15 ldr r0, [r0, r1, lsl #2] @ stack phys addr #else ldr r0, sleep_save_sp @ stack phys addr @@ -102,3 +106,20 @@ sleep_save_sp: .rept CONFIG_NR_CPUS .long 0 @ preserve stack phys ptr here .endr + +#ifdef CONFIG_SMP +cpu_logical_index: + adr r3, cpu_map_ptr + ldr r2, [r3] + add r3, r3, r2 @ virt_to_phys(__cpu_logical_map) + mov r1, #0 +1: + ldr r2, [r3, r1, lsl #2] + cmp r2, r0 + moveq pc, lr + add r1, r1, #1 + b 1b + +cpu_map_ptr: + .long __cpu_logical_map - . +#endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb451bb9..dc2843f337af 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -46,6 +46,9 @@ #include <asm/virt.h> #include <asm/mach/arch.h> +#define CREATE_TRACE_POINTS +#include <trace/events/arm-ipi.h> + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -57,7 +60,7 @@ struct secondary_data secondary_data; * control for which core is the next to come out of the secondary * boot "holding pen" */ -volatile int __cpuinitdata pen_release = -1; +volatile int pen_release = -1; enum ipi_msg_type { IPI_WAKEUP, @@ -66,6 +69,7 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_COMPLETION, }; static DECLARE_COMPLETION(cpu_running); @@ -463,6 +467,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_COMPLETION, "completion interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -588,6 +593,19 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +static DEFINE_PER_CPU(struct completion *, cpu_completion); + +int register_ipi_completion(struct completion *completion, int cpu) +{ + per_cpu(cpu_completion, cpu) = completion; + return IPI_COMPLETION; +} + +static void ipi_complete(unsigned int cpu) +{ + complete(per_cpu(cpu_completion, cpu)); +} + /* * Main handler for inter-processor interrupts */ @@ -604,6 +622,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if (ipinr < NR_IPI) __inc_irq_stat(cpu, ipi_irqs[ipinr]); + trace_arm_ipi_entry(ipinr); switch (ipinr) { case IPI_WAKEUP: break; @@ -638,11 +657,18 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_COMPLETION: + irq_enter(); + ipi_complete(cpu); + irq_exit(); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; } + trace_arm_ipi_exit(ipinr); set_irq_regs(old_regs); } diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 5bc1a63284e3..1aafa0d785eb 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -28,7 +28,7 @@ */ unsigned int __init scu_get_core_count(void __iomem *scu_base) { - unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG); + unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG); return (ncores & 0x03) + 1; } @@ -42,19 +42,19 @@ void scu_enable(void __iomem *scu_base) #ifdef CONFIG_ARM_ERRATA_764369 /* Cortex-A9 only */ if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { - scu_ctrl = __raw_readl(scu_base + 0x30); + scu_ctrl = readl_relaxed(scu_base + 0x30); if (!(scu_ctrl & 1)) - __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); + writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30); } #endif - scu_ctrl = __raw_readl(scu_base + SCU_CTRL); + scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) return; scu_ctrl |= 1; - __raw_writel(scu_ctrl, scu_base + SCU_CTRL); + writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); /* * Ensure that the data accessed by CPU0 before the SCU was @@ -80,9 +80,9 @@ int scu_power_mode(void __iomem *scu_base, unsigned int mode) if (mode > 3 || mode == 1 || cpu > 3) return -EINVAL; - val = __raw_readb(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; + val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; val |= mode; - __raw_writeb(val, scu_base + SCU_CPU_STATUS + cpu); + writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu); return 0; } diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index f6fd1d4398c6..4971ccf012ca 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -45,7 +45,7 @@ static void twd_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_PERIODIC; - __raw_writel(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), + writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), twd_base + TWD_TIMER_LOAD); break; case CLOCK_EVT_MODE_ONESHOT: @@ -58,18 +58,18 @@ static void twd_set_mode(enum clock_event_mode mode, ctrl = 0; } - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); } static int twd_set_next_event(unsigned long evt, struct clock_event_device *unused) { - unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL); + unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL); ctrl |= TWD_TIMER_CONTROL_ENABLE; - __raw_writel(evt, twd_base + TWD_TIMER_COUNTER); - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); return 0; } @@ -82,8 +82,8 @@ static int twd_set_next_event(unsigned long evt, */ static int twd_timer_ack(void) { - if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) { - __raw_writel(1, twd_base + TWD_TIMER_INTSTAT); + if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) { + writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT); return 1; } @@ -209,15 +209,15 @@ static void __cpuinit twd_calibrate_rate(void) waitjiffies += 5; /* enable, no interrupt or reload */ - __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL); /* maximum value */ - __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); while (get_jiffies_64() < waitjiffies) udelay(10); - count = __raw_readl(twd_base + TWD_TIMER_COUNTER); + count = readl_relaxed(twd_base + TWD_TIMER_COUNTER); twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); @@ -275,7 +275,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk) * bother with the below. */ if (per_cpu(percpu_setup_called, cpu)) { - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clockevents_register_device(*__this_cpu_ptr(twd_evt)); enable_percpu_irq(clk->irq, 0); return 0; @@ -288,7 +288,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk) * The following is done once per CPU the first time .setup() is * called. */ - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clk->name = "local_timer"; clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index c5a59546a256..677da58d9e88 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <asm/cputype.h> +#include <asm/smp_plat.h> #include <asm/topology.h> /* @@ -289,6 +290,140 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } + +#ifdef CONFIG_SCHED_HMP + +static const char * const little_cores[] = { + "arm,cortex-a7", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) + return true; + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 4) { + pr_err("* %s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct cpumask hmp_fast_cpu_mask; + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + + return -EINVAL; +} + /* * init_cpu_topology is called at boot when only one cpu is running * which prevent simultaneous write access to cpu_topology array diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d6a0fdb6c2ee..b4fd850c34b2 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/unwind.h> #include <asm/tls.h> #include <asm/system_misc.h> +#include <asm/opcodes.h> static const char *handler[]= { "prefetch abort", @@ -347,15 +348,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs, int is_valid_bugaddr(unsigned long pc) { #ifdef CONFIG_THUMB2_KERNEL - unsigned short bkpt; + u16 bkpt; + u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE); #else - unsigned long bkpt; + u32 bkpt; + u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif if (probe_kernel_address((unsigned *)pc, bkpt)) return 0; - return bkpt == BUG_INSTR_VALUE; + return bkpt == insn; } #endif @@ -408,25 +411,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (processor_mode(regs) == SVC_MODE) { #ifdef CONFIG_THUMB2_KERNEL if (thumb_mode(regs)) { - instr = ((u16 *)pc)[0]; + instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]); if (is_wide_instruction(instr)) { - instr <<= 16; - instr |= ((u16 *)pc)[1]; + u16 inst2; + inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]); + instr = __opcode_thumb32_compose(instr, inst2); } } else #endif - instr = *(u32 *) pc; + instr = __mem_to_opcode_arm(*(u32 *) pc); } else if (thumb_mode(regs)) { if (get_user(instr, (u16 __user *)pc)) goto die_sig; + instr = __mem_to_opcode_thumb16(instr); if (is_wide_instruction(instr)) { unsigned int instr2; if (get_user(instr2, (u16 __user *)pc+1)) goto die_sig; - instr <<= 16; - instr |= instr2; + instr2 = __mem_to_opcode_thumb16(instr2); + instr = __opcode_thumb32_compose(instr, instr2); } } else if (get_user(instr, (u32 __user *)pc)) { + instr = __mem_to_opcode_arm(instr); goto die_sig; } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ef1703b9587b..1d55afe7fd4b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -17,6 +17,7 @@ */ #include <linux/cpu.h> +#include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> @@ -835,6 +836,33 @@ static struct notifier_block hyp_init_cpu_nb = { .notifier_call = hyp_init_cpu_notify, }; +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, + void *v) +{ + if (cmd == CPU_PM_EXIT) { + cpu_init_hyp_mode(NULL); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hyp_init_cpu_pm_nb = { + .notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); +} +#else +static inline void hyp_cpu_pm_init(void) +{ +} +#endif + /** * Inits Hyp-mode on all online CPUs */ @@ -995,6 +1023,8 @@ int kvm_arch_init(void *opaque) goto out_err; } + hyp_cpu_pm_init(); + kvm_coproc_table_init(); return 0; out_err: diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c index 753b94f3fca7..d88234e14f96 100644 --- a/arch/arm/mach-exynos/mach-exynos5-dt.c +++ b/arch/arm/mach-exynos/mach-exynos5-dt.c @@ -14,6 +14,7 @@ #include <linux/memblock.h> #include <linux/io.h> #include <linux/clocksource.h> +#include <linux/dma-mapping.h> #include <asm/mach/arch.h> #include <mach/regs-pmu.h> @@ -23,11 +24,31 @@ #include "common.h" +static u64 dma_mask64 = DMA_BIT_MASK(64); + static void __init exynos5_dt_map_io(void) { exynos_init_io(NULL, 0); } +static int exynos5250_platform_notifier(struct notifier_block *nb, + unsigned long event, void *__dev) +{ + struct device *dev = __dev; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + dev->dma_mask = &dma_mask64; + dev->coherent_dma_mask = DMA_BIT_MASK(64); + + return NOTIFY_OK; +} + +static struct notifier_block exynos5250_platform_nb = { + .notifier_call = exynos5250_platform_notifier, +}; + static void __init exynos5_dt_machine_init(void) { struct device_node *i2c_np; @@ -52,6 +73,11 @@ static void __init exynos5_dt_machine_init(void) } } + if (config_enabled(CONFIG_ARM_LPAE) && + of_machine_is_compatible("samsung,exynos5250")) + bus_register_notifier(&platform_bus_type, + &exynos5250_platform_nb); + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index cd9fcb1cd7ab..b8466fb00f55 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -2,6 +2,7 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" if ARCH_MULTI_V7 select ARCH_HAS_CPUFREQ select ARCH_HAS_OPP + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_GIC diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index 73a2d905af8a..72de05f09cb8 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -1,9 +1,5 @@ if ARCH_IXP4XX -config ARCH_SUPPORTS_BIG_ENDIAN - bool - default y - menu "Intel IXP4xx Implementation Options" comment "IXP4xx Platforms" diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 80a8bcacd9d5..317cdb800099 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -1,5 +1,6 @@ config ARCH_MVEBU bool "Marvell SOCs with Device Tree support" if ARCH_MULTI_V7 + select ARCH_SUPPORTS_BIG_ENDIAN select CLKSRC_MMIO select COMMON_CLK select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 5476669ba905..ee7598fe75db 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -20,6 +20,8 @@ #define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0 #define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4 +#include <asm/assembler.h> + .text /* * r0: Coherency fabric base register address @@ -29,6 +31,7 @@ ENTRY(ll_set_cpu_coherent) /* Create bit by cpu index */ mov r3, #(1 << 24) lsl r1, r3, r1 +ARM_BE8(rev r1, r1) /* Add CPU to SMP group - Atomic */ add r3, r0, #ARMADA_XP_CFB_CTL_REG_OFFSET diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S index a06e0ede8c08..458ed3fb2626 100644 --- a/arch/arm/mach-mvebu/headsmp.S +++ b/arch/arm/mach-mvebu/headsmp.S @@ -21,6 +21,8 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> + /* * At this stage the secondary CPUs don't have acces yet to the MMU, so * we have to provide physical addresses @@ -35,6 +37,7 @@ * startup */ ENTRY(armada_xp_secondary_startup) + ARM_BE8(setend be ) @ go BE8 if entered LE /* Read CPU id */ mrc p15, 0, r1, c0, c0, 5 diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 5907e10c37fd..39858ba03084 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -1,6 +1,9 @@ config ARCH_VEXPRESS bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7 + select ARCH_HAS_CPUFREQ + select ARCH_HAS_OPP select ARCH_REQUIRE_GPIOLIB + select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC select ARM_TIMER_SP804 @@ -56,5 +59,23 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA config ARCH_VEXPRESS_CA9X4 bool "Versatile Express Cortex-A9x4 tile" + select ARM_ERRATA_643719 + +config ARCH_VEXPRESS_DCSCB + bool "Dual Cluster System Control Block (DCSCB) support" + depends on MCPM + select ARM_CCI + help + Support for the Dual Cluster System Configuration Block (DCSCB). + This is needed to provide CPU and cluster power management + on RTSM implementing big.LITTLE. + +config ARCH_VEXPRESS_TC2 + bool "TC2 cluster management" + depends on MCPM + select VEXPRESS_SPC + select ARM_CCI + help + Support for CPU and cluster power management on TC2. endmenu diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 42703e8b4d3b..14193dc7e6e8 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -6,5 +6,13 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o +obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o +CFLAGS_REMOVE_dcscb.o = -pg +obj-$(CONFIG_ARCH_VEXPRESS_TC2) += tc2_pm.o tc2_pm_setup.o +CFLAGS_REMOVE_tc2_pm.o = -pg +ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y) +obj-$(CONFIG_ARM_PSCI) += tc2_pm_psci.o +CFLAGS_REMOVE_tc2_pm_psci.o = -pg +endif obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h index f134cd4a85f1..bde4374ab6d5 100644 --- a/arch/arm/mach-vexpress/core.h +++ b/arch/arm/mach-vexpress/core.h @@ -6,6 +6,8 @@ void vexpress_dt_smp_map_io(void); +bool vexpress_smp_init_ops(void); + extern struct smp_operations vexpress_smp_ops; extern void vexpress_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c new file mode 100644 index 000000000000..b35700f8e01f --- /dev/null +++ b/arch/arm/mach-vexpress/dcscb.c @@ -0,0 +1,236 @@ +/* + * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block + * + * Created by: Nicolas Pitre, May 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/of_address.h> +#include <linux/vexpress.h> +#include <linux/arm-cci.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/psci.h> + + +#define RST_HOLD0 0x0 +#define RST_HOLD1 0x4 +#define SYS_SWRESET 0x8 +#define RST_STAT0 0xc +#define RST_STAT1 0x10 +#define EAG_CFG_R 0x20 +#define EAG_CFG_W 0x24 +#define KFC_CFG_R 0x28 +#define KFC_CFG_W 0x2c +#define DCS_CFG_R 0x30 + +/* + * We can't use regular spinlocks. In the switcher case, it is possible + * for an outbound CPU to call power_down() while its inbound counterpart + * is already live using the same logical CPU number which trips lockdep + * debugging. + */ +static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +static void __iomem *dcscb_base; +static int dcscb_use_count[4][2]; +static int dcscb_allcpus_mask[2]; + +static int dcscb_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int rst_hold, cpumask = (1 << cpu); + unsigned int all_mask = dcscb_allcpus_mask[cluster]; + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + if (cpu >= 4 || cluster >= 2) + return -EINVAL; + + /* + * Since this is called with IRQs enabled, and no arch_spin_lock_irq + * variant exists, we need to disable IRQs manually here. + */ + local_irq_disable(); + arch_spin_lock(&dcscb_lock); + + dcscb_use_count[cpu][cluster]++; + if (dcscb_use_count[cpu][cluster] == 1) { + rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4); + if (rst_hold & (1 << 8)) { + /* remove cluster reset and add individual CPU's reset */ + rst_hold &= ~(1 << 8); + rst_hold |= all_mask; + } + rst_hold &= ~(cpumask | (cpumask << 4)); + writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4); + } else if (dcscb_use_count[cpu][cluster] != 2) { + /* + * The only possible values are: + * 0 = CPU down + * 1 = CPU (still) up + * 2 = CPU requested to be up before it had a chance + * to actually make itself down. + * Any other value is a bug. + */ + BUG(); + } + + arch_spin_unlock(&dcscb_lock); + local_irq_enable(); + + return 0; +} + +static void dcscb_power_down(void) +{ + unsigned int mpidr, cpu, cluster, rst_hold, cpumask, all_mask; + bool last_man = false, skip_wfi = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpumask = (1 << cpu); + all_mask = dcscb_allcpus_mask[cluster]; + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cpu >= 4 || cluster >= 2); + + __mcpm_cpu_going_down(cpu, cluster); + + arch_spin_lock(&dcscb_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + dcscb_use_count[cpu][cluster]--; + if (dcscb_use_count[cpu][cluster] == 0) { + rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4); + rst_hold |= cpumask; + if (((rst_hold | (rst_hold >> 4)) & all_mask) == all_mask) { + rst_hold |= (1 << 8); + last_man = true; + } + writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4); + } else if (dcscb_use_count[cpu][cluster] == 1) { + /* + * A power_up request went ahead of us. + * Even if we do not want to shut this CPU down, + * the caller expects a certain state as if the WFI + * was aborted. So let's continue with cache cleaning. + */ + skip_wfi = true; + } else + BUG(); + + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { + arch_spin_unlock(&dcscb_lock); + + /* Flush all cache levels for this cluster. */ + v7_exit_coherency_flush(all); + + /* + * This is a harmless no-op. On platforms with a real + * outer cache this might either be needed or not, + * depending on where the outer cache sits. + */ + outer_flush_all(); + + /* + * Disable cluster-level coherency by masking + * incoming snoops and DVM messages: + */ + cci_disable_port_by_cpu(mpidr); + + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + } else { + arch_spin_unlock(&dcscb_lock); + + /* Disable and flush the local CPU cache. */ + v7_exit_coherency_flush(louis); + } + + __mcpm_cpu_down(cpu, cluster); + + /* Now we are prepared for power-down, do it: */ + dsb(); + if (!skip_wfi) + wfi(); + + /* Not dead at this point? Let our caller cope. */ +} + +static const struct mcpm_platform_ops dcscb_power_ops = { + .power_up = dcscb_power_up, + .power_down = dcscb_power_down, +}; + +static void __init dcscb_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cpu >= 4 || cluster >= 2); + dcscb_use_count[cpu][cluster] = 1; +} + +extern void dcscb_power_up_setup(unsigned int affinity_level); + +static int __init dcscb_init(void) +{ + struct device_node *node; + unsigned int cfg; + int ret; + + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + + if (!cci_probed()) + return -ENODEV; + + node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb"); + if (!node) + return -ENODEV; + dcscb_base = of_iomap(node, 0); + if (!dcscb_base) + return -EADDRNOTAVAIL; + cfg = readl_relaxed(dcscb_base + DCS_CFG_R); + dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1; + dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1; + dcscb_usage_count_init(); + + ret = mcpm_platform_register(&dcscb_power_ops); + if (!ret) + ret = mcpm_sync_init(dcscb_power_up_setup); + if (ret) { + iounmap(dcscb_base); + return ret; + } + + pr_info("VExpress DCSCB support installed\n"); + + /* + * Future entries into the kernel can now go + * through the cluster entry vectors. + */ + vexpress_flags_set(virt_to_phys(mcpm_entry_point)); + + return 0; +} + +early_initcall(dcscb_init); diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S new file mode 100644 index 000000000000..4bb7fbe0f621 --- /dev/null +++ b/arch/arm/mach-vexpress/dcscb_setup.S @@ -0,0 +1,38 @@ +/* + * arch/arm/include/asm/dcscb_setup.S + * + * Created by: Dave Martin, 2012-06-22 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + + +ENTRY(dcscb_power_up_setup) + + cmp r0, #0 @ check affinity level + beq 2f + +/* + * Enable cluster-level coherency, in preparation for turning on the MMU. + * The ACTLR SMP bit does not need to be set here, because cpu_resume() + * already restores that. + * + * A15/A7 may not require explicit L2 invalidation on reset, dependent + * on hardware integration decisions. + * For now, this code assumes that L2 is either already invalidated, + * or invalidation is not required. + */ + + b cci_enable_port_for_self + +2: @ Implementation-specific local CPU setup operations should go here, + @ if any. In this case, there is nothing to do. + + bx lr + +ENDPROC(dcscb_power_up_setup) diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h new file mode 100644 index 000000000000..d3b5a2225a0e --- /dev/null +++ b/arch/arm/mach-vexpress/include/mach/tc2.h @@ -0,0 +1,10 @@ +#ifndef __MACH_TC2_H +#define __MACH_TC2_H + +/* + * cpu and cluster limits + */ +#define TC2_MAX_CPUS 3 +#define TC2_MAX_CLUSTERS 2 + +#endif diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index dc1ace55d557..993c9ae5dc5e 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -12,9 +12,11 @@ #include <linux/errno.h> #include <linux/smp.h> #include <linux/io.h> +#include <linux/of.h> #include <linux/of_fdt.h> #include <linux/vexpress.h> +#include <asm/mcpm.h> #include <asm/smp_scu.h> #include <asm/mach/map.h> @@ -203,3 +205,21 @@ struct smp_operations __initdata vexpress_smp_ops = { .cpu_die = vexpress_cpu_die, #endif }; + +bool __init vexpress_smp_init_ops(void) +{ +#ifdef CONFIG_MCPM + /* + * The best way to detect a multi-cluster configuration at the moment + * is to look for the presence of a CCI in the system. + * Override the default vexpress_smp_ops if so. + */ + struct device_node *node; + node = of_find_compatible_node(NULL, NULL, "arm,cci-400"); + if (node && of_device_is_available(node)) { + mcpm_smp_set_ops(); + return true; + } +#endif + return false; +} diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c new file mode 100644 index 000000000000..9fc264a3bade --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -0,0 +1,277 @@ +/* + * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support + * + * Created by: Nicolas Pitre, October 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Some portions of this file were originally written by Achin Gupta + * Copyright: (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/irqchip/arm-gic.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/psci.h> + +#include <mach/motherboard.h> +#include <mach/tc2.h> + +#include <linux/vexpress.h> +#include <linux/arm-cci.h> + +/* + * We can't use regular spinlocks. In the switcher case, it is possible + * for an outbound CPU to call power_down() after its inbound counterpart + * is already live using the same logical CPU number which trips lockdep + * debugging. + */ +static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS]; + +static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster) +{ + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + if (cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)) + return -EINVAL; + + /* + * Since this is called with IRQs enabled, and no arch_spin_lock_irq + * variant exists, we need to disable IRQs manually here. + */ + local_irq_disable(); + arch_spin_lock(&tc2_pm_lock); + + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster]) + vexpress_spc_powerdown_enable(cluster, 0); + + tc2_pm_use_count[cpu][cluster]++; + if (tc2_pm_use_count[cpu][cluster] == 1) { + vexpress_spc_write_resume_reg(cluster, cpu, + virt_to_phys(mcpm_entry_point)); + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1); + } else if (tc2_pm_use_count[cpu][cluster] != 2) { + /* + * The only possible values are: + * 0 = CPU down + * 1 = CPU (still) up + * 2 = CPU requested to be up before it had a chance + * to actually make itself down. + * Any other value is a bug. + */ + BUG(); + } + + arch_spin_unlock(&tc2_pm_lock); + local_irq_enable(); + + return 0; +} + +static void tc2_pm_down(u64 residency) +{ + unsigned int mpidr, cpu, cluster; + bool last_man = false, skip_wfi = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + __mcpm_cpu_going_down(cpu, cluster); + + arch_spin_lock(&tc2_pm_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + tc2_pm_use_count[cpu][cluster]--; + if (tc2_pm_use_count[cpu][cluster] == 0) { + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1); + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster] && + (!residency || residency > 5000)) { + vexpress_spc_powerdown_enable(cluster, 1); + vexpress_spc_set_global_wakeup_intr(1); + last_man = true; + } + } else if (tc2_pm_use_count[cpu][cluster] == 1) { + /* + * A power_up request went ahead of us. + * Even if we do not want to shut this CPU down, + * the caller expects a certain state as if the WFI + * was aborted. So let's continue with cache cleaning. + */ + skip_wfi = true; + } else + BUG(); + + /* + * If the CPU is committed to power down, make sure + * the power controller will be in charge of waking it + * up upon IRQ, ie IRQ lines are cut from GIC CPU IF + * to the CPU by disabling the GIC CPU IF to prevent wfi + * from completing execution behind power controller back + */ + if (!skip_wfi) + gic_cpu_if_down(); + + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { + arch_spin_unlock(&tc2_pm_lock); + + if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) { + /* + * On the Cortex-A15 we need to disable + * L2 prefetching before flushing the cache. + */ + asm volatile( + "mcr p15, 1, %0, c15, c0, 3 \n\t" + "isb \n\t" + "dsb " + : : "r" (0x400) ); + } + + v7_exit_coherency_flush(all); + + cci_disable_port_by_cpu(mpidr); + + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + } else { + /* + * If last man then undo any setup done previously. + */ + if (last_man) { + vexpress_spc_powerdown_enable(cluster, 0); + vexpress_spc_set_global_wakeup_intr(0); + } + + arch_spin_unlock(&tc2_pm_lock); + + v7_exit_coherency_flush(louis); + } + + __mcpm_cpu_down(cpu, cluster); + + /* Now we are prepared for power-down, do it: */ + if (!skip_wfi) + wfi(); + + /* Not dead at this point? Let our caller cope. */ +} + +static void tc2_pm_power_down(void) +{ + tc2_pm_down(0); +} + +static void tc2_pm_suspend(u64 residency) +{ + extern void tc2_resume(void); + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + vexpress_spc_write_resume_reg(cluster, cpu, + virt_to_phys(tc2_resume)); + + tc2_pm_down(residency); +} + +static void tc2_pm_powered_up(void) +{ + unsigned int mpidr, cpu, cluster; + unsigned long flags; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + local_irq_save(flags); + arch_spin_lock(&tc2_pm_lock); + + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster]) { + vexpress_spc_powerdown_enable(cluster, 0); + vexpress_spc_set_global_wakeup_intr(0); + } + + if (!tc2_pm_use_count[cpu][cluster]) + tc2_pm_use_count[cpu][cluster] = 1; + + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0); + vexpress_spc_write_resume_reg(cluster, cpu, 0); + + arch_spin_unlock(&tc2_pm_lock); + local_irq_restore(flags); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_power_up, + .power_down = tc2_pm_power_down, + .suspend = tc2_pm_suspend, + .powered_up = tc2_pm_powered_up, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + tc2_pm_use_count[cpu][cluster] = 1; +} + +extern void tc2_pm_power_up_setup(unsigned int affinity_level); + +static int __init tc2_pm_init(void) +{ + int ret; + + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + + if (!vexpress_spc_check_loaded()) + return -ENODEV; + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(tc2_pm_power_up_setup); + if (!ret) + pr_info("TC2 power management initialized\n"); + return ret; +} + +early_initcall(tc2_pm_init); diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c new file mode 100644 index 000000000000..c2fdc22e4c06 --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_psci.c @@ -0,0 +1,173 @@ +/* + * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support + * + * Created by: Achin Gupta, December 2012 + * Copyright: (C) 2012 ARM Limited + * + * Some portions of this file were originally written by Nicolas Pitre + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/errno.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/psci.h> +#include <asm/atomic.h> +#include <asm/cputype.h> +#include <asm/cp15.h> + +#include <mach/motherboard.h> +#include <mach/tc2.h> + +#include <linux/vexpress.h> + +/* + * Platform specific state id understood by the firmware and used to + * program the power controller + */ +#define PSCI_POWER_STATE_ID 0 + +static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS]; + +static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int mpidr = (cluster << 8) | cpu; + int ret = 0; + + BUG_ON(!psci_ops.cpu_on); + + switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * This is a request to power up a cpu that linux thinks has + * been powered down. Retries are needed if the firmware has + * seen the power down request as yet. + */ + do + ret = psci_ops.cpu_on(mpidr, + virt_to_phys(mcpm_entry_point)); + while (ret == -EAGAIN); + + return ret; + case 2: + /* This power up request has overtaken a power down request */ + return ret; + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_power_down(void) +{ + struct psci_power_state power_state; + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + BUG_ON(!psci_ops.cpu_off); + + switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * Overtaken by a power up. Flush caches, exit coherency, + * return & fake a reset + */ + set_cr(get_cr() & ~CR_C); + + flush_cache_louis(); + + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + + return; + case 0: + /* A normal request to possibly power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_off(power_state); + + /* On success this function never returns */ + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_suspend(u64 unused) +{ + struct psci_power_state power_state; + + BUG_ON(!psci_ops.cpu_suspend); + + /* On TC2 always attempt to power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point)); + + /* On success this function never returns */ + BUG(); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_psci_power_up, + .power_down = tc2_pm_psci_power_down, + .suspend = tc2_pm_psci_suspend, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + atomic_set(&tc2_pm_use_count[cpu][cluster], 1); +} + +static int __init tc2_pm_psci_init(void) +{ + int ret; + + ret = psci_probe(); + if (ret) { + pr_debug("psci not found. Aborting psci init\n"); + return -ENODEV; + } + + if (!vexpress_spc_check_loaded()) { + pr_debug("spc not found. Aborting psci init\n"); + return -ENODEV; + } + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(NULL); + if (!ret) + pr_info("TC2 power management initialized\n"); + return ret; +} + +early_initcall(tc2_pm_psci_init); diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S new file mode 100644 index 000000000000..a18dafeeb0ee --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_setup.S @@ -0,0 +1,68 @@ +/* + * arch/arm/include/asm/tc2_pm_setup.S + * + * Created by: Nicolas Pitre, October 2012 + ( (based on dcscb_setup.S by Dave Martin) + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#include <linux/linkage.h> +#include <asm/mcpm.h> + + +#define SPC_PHYS_BASE 0x7FFF0000 +#define SPC_WAKE_INT_STAT 0xb2c + +#define SNOOP_CTL_A15 0x404 +#define SNOOP_CTL_A7 0x504 + +#define A15_SNOOP_MASK (0x3 << 7) +#define A7_SNOOP_MASK (0x1 << 13) + +#define A15_BX_ADDR0 0xB68 + + +ENTRY(tc2_resume) + mrc p15, 0, r0, c0, c0, 5 + ubfx r1, r0, #0, #4 @ r1 = cpu + ubfx r2, r0, #8, #4 @ r2 = cluster + add r1, r1, r2, lsl #2 @ r1 = index of CPU in WAKE_INT_STAT + ldr r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT + ldr r3, [r3] + lsr r3, r1 + tst r3, #1 + wfieq @ if no pending IRQ reenters wfi + b mcpm_entry_point +ENDPROC(tc2_resume) + +/* + * Enable cluster-level coherency, in preparation for turning on the MMU. + * The ACTLR SMP bit does not need to be set here, because cpu_resume() + * already restores that. + */ + +ENTRY(tc2_pm_power_up_setup) + + cmp r0, #0 + beq 2f + + b cci_enable_port_for_self + +2: @ Clear the BX addr register + ldr r3, =SPC_PHYS_BASE + A15_BX_ADDR0 + mrc p15, 0, r0, c0, c0, 5 @ MPIDR + ubfx r1, r0, #8, #4 @ cluster + ubfx r0, r0, #0, #4 @ cpu + add r3, r3, r1, lsl #4 + mov r1, #0 + str r1, [r3, r0, lsl #2] + dsb + + bx lr + +ENDPROC(tc2_pm_power_up_setup) diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 8802030df98d..057f99b62eaf 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/memblock.h> #include <linux/of_address.h> #include <linux/of_fdt.h> #include <linux/of_irq.h> @@ -373,6 +374,31 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express") .init_machine = v2m_init, MACHINE_END +static void __init v2m_dt_hdlcd_init(void) +{ + struct device_node *node; + int len, na, ns; + const __be32 *prop; + phys_addr_t fb_base, fb_size; + + node = of_find_compatible_node(NULL, NULL, "arm,hdlcd"); + if (!node) + return; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + if (WARN_ON(memblock_remove(fb_base, fb_size))) + return; +}; + static struct map_desc v2m_rs1_io_desc __initdata = { .virtual = V2M_PERIPH, .pfn = __phys_to_pfn(0x1c000000), @@ -423,6 +449,8 @@ void __init v2m_dt_init_early(void) pr_warning("vexpress: DT HBI (%x) is not matching " "hardware (%x)!\n", dt_hbi, hbi); } + + v2m_dt_hdlcd_init(); } static void __init v2m_dt_timer_init(void) @@ -456,6 +484,7 @@ static const char * const v2m_dt_match[] __initconst = { DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express") .dt_compat = v2m_dt_match, .smp = smp_ops(vexpress_smp_ops), + .smp_init = smp_init_ops(vexpress_smp_init_ops), .map_io = v2m_dt_map_io, .init_early = v2m_dt_init_early, .init_irq = irqchip_init, diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile index 042afc1f8c44..7ddbfa60227f 100644 --- a/arch/arm/mach-virt/Makefile +++ b/arch/arm/mach-virt/Makefile @@ -3,4 +3,3 @@ # obj-y := virt.o -obj-$(CONFIG_SMP) += platsmp.o diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c deleted file mode 100644 index f4143f5bfa5b..000000000000 --- a/arch/arm/mach-virt/platsmp.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Dummy Virtual Machine - does what it says on the tin. - * - * Copyright (C) 2012 ARM Ltd - * Author: Will Deacon <will.deacon@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/of.h> - -#include <asm/psci.h> -#include <asm/smp_plat.h> - -extern void secondary_startup(void); - -static void __init virt_smp_init_cpus(void) -{ -} - -static void __init virt_smp_prepare_cpus(unsigned int max_cpus) -{ -} - -static int __cpuinit virt_boot_secondary(unsigned int cpu, - struct task_struct *idle) -{ - if (psci_ops.cpu_on) - return psci_ops.cpu_on(cpu_logical_map(cpu), - __pa(secondary_startup)); - return -ENODEV; -} - -struct smp_operations __initdata virt_smp_ops = { - .smp_init_cpus = virt_smp_init_cpus, - .smp_prepare_cpus = virt_smp_prepare_cpus, - .smp_boot_secondary = virt_boot_secondary, -}; diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c index 061f283f579e..a67d2dd5bb60 100644 --- a/arch/arm/mach-virt/virt.c +++ b/arch/arm/mach-virt/virt.c @@ -36,11 +36,8 @@ static const char *virt_dt_match[] = { NULL }; -extern struct smp_operations virt_smp_ops; - DT_MACHINE_START(VIRT, "Dummy Virtual Machine") .init_irq = irqchip_init, .init_machine = virt_init, - .smp = smp_ops(virt_smp_ops), .dt_compat = virt_dt_match, MACHINE_END diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c21082d664ed..36e9f24e03b0 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -931,3 +931,9 @@ config ARCH_HAS_BARRIERS help This option allows the use of custom mandatory barriers included via the mach/barriers.h file. + +config ARCH_SUPPORTS_BIG_ENDIAN + bool + help + This option specifies the architecture can support big endian + operation. diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 80741992a9fc..3815a8262af0 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -38,9 +38,8 @@ ENTRY(v6_early_abort) bne do_DataAbort bic r1, r1, #1 << 11 @ clear bit 11 of FSR ldr r3, [r4] @ read aborted ARM instruction -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r3, r3 -#endif + ARM_BE8(rev r3, r3) + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b89078..924036473b16 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -25,6 +25,7 @@ #include <asm/cp15.h> #include <asm/system_info.h> #include <asm/unaligned.h> +#include <asm/opcodes.h> #include "fault.h" @@ -762,21 +763,25 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); fault = probe_kernel_address(ptr, tinstr); + tinstr = __mem_to_opcode_thumb16(tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ u16 tinst2 = 0; fault = probe_kernel_address(ptr + 1, tinst2); - instr = (tinstr << 16) | tinst2; + tinst2 = __mem_to_opcode_thumb16(tinst2); + instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { isize = 2; instr = thumb2arm(tinstr); } } - } else + } else { fault = probe_kernel_address(instrptr, instr); + instr = __mem_to_opcode_arm(instr); + } if (fault) { type = TYPE_FAULT; diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 515b00064da8..a84e0536ce74 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -146,18 +146,18 @@ flush_levels: ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size loop1: - mov r9, r4 @ create working copy of max way size + mov r9, r7 @ create working copy of max index loop2: - ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 - THUMB( lsl r6, r9, r5 ) + ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 + THUMB( lsl r6, r4, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 - ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 - THUMB( lsl r6, r7, r2 ) + ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 + THUMB( lsl r6, r9, r2 ) THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way + subs r9, r9, #1 @ decrement the index bge loop2 - subs r7, r7, #1 @ decrement the index + subs r4, r4, #1 @ decrement the way bge loop1 skip: add r10, r10, #2 @ increment cache number diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 6c9d7054d997..051e904a5379 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void) if (!pages) goto no_pages; - if (IS_ENABLED(CONFIG_CMA)) + if (IS_ENABLED(CONFIG_DMA_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, atomic_pool_init); else @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); - else if (!IS_ENABLED(CONFIG_CMA)) + else if (!IS_ENABLED(CONFIG_DMA_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page, caller); @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; - } else if (!IS_ENABLED(CONFIG_CMA)) { + } else if (!IS_ENABLED(CONFIG_DMA_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5dbf13f954f6..e207aa5f846f 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -446,8 +446,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (pud_none(*pud_k)) goto bad_area; - if (!pud_present(*pud)) + if (!pud_present(*pud)) { set_pud(pud, *pud_k); + /* + * There is a small window during free_pgtables() where the + * user *pud entry is 0 but the TLB has not been invalidated + * and we get a level 2 (pmd) translation fault caused by the + * intermediate TLB caching of the old level 1 (pud) entry. + */ + flush_tlb_kernel_page(addr); + } pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); @@ -470,8 +478,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, #endif if (pmd_none(pmd_k[index])) goto bad_area; + if (!pmd_present(pmd[index])) + copy_pmd(pmd, pmd_k); - copy_pmd(pmd, pmd_k); return 0; bad_area: diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 0ecc43fd6229..c12ae661d4ab 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -76,7 +76,7 @@ static int __init parse_tag_initrd2(const struct tag *tag) __tagtable(ATAG_INITRD2, parse_tag_initrd2); #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index d07352819580..b96c6e64943e 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -219,9 +219,7 @@ __v6_setup: @ complete invalidations adr r5, v6_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 19da84172cc3..769496e6e8e9 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -352,9 +352,7 @@ __v7_setup: #endif adr r5, v7_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE orr r5, r5, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6de423dbd385..78351ca8d51e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -19,6 +19,7 @@ #include <linux/if_vlan.h> #include <asm/cacheflush.h> #include <asm/hwcap.h> +#include <asm/opcodes.h> #include "bpf_jit_32.h" @@ -113,8 +114,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor) static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { + inst |= (cond << 28); + inst = __opcode_to_mem_arm(inst); + if (ctx->target != NULL) - ctx->target[ctx->idx] = inst | (cond << 28); + ctx->target[ctx->idx] = inst; ctx->idx++; } diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index b178d44e9eaa..40f27e52de75 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S @@ -10,8 +10,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> - - __INIT +#include <asm/assembler.h> /* * Realview/Versatile Express specific entry point for secondary CPUs. @@ -19,6 +18,7 @@ * until we're ready for them to initialise. */ ENTRY(versatile_secondary_startup) + ARM_BE8(setend be) mrc p15, 0, r0, c0, c0, 5 bic r0, #0xff000000 adr r4, 1f diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..956445f55ead 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,31 +1,54 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_HAS_OPP + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select CPU_PM if (SUSPEND || CPU_IDLE) + select DCACHE_WORD_ACCESS select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_EARLY_IOREMAP select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_DMA_CONTIGUOUS + select HAVE_DYNAMIC_FTRACE + select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_MEMBLOCK + select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN select MODULES_USE_ELF_RELA select NO_BOOTMEM @@ -61,11 +84,7 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config GENERIC_LOCKBREAK - def_bool y - depends on SMP && PREEMPT - -config RWSEM_GENERIC_SPINLOCK +config RWSEM_XCHGADD_ALGORITHM def_bool y config GENERIC_HWEIGHT @@ -77,7 +96,7 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y -config ZONE_DMA32 +config ZONE_DMA def_bool y config ARCH_DMA_ADDR_T_64BIT @@ -95,6 +114,9 @@ config SWIOTLB config IOMMU_HELPER def_bool SWIOTLB +config FIX_EARLYCON_MEM + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -111,6 +133,11 @@ config ARCH_VEXPRESS This enables support for the ARMv8 software model (Versatile Express). +config ARCH_XGENE + bool "AppliedMicro X-Gene SOC Family" + help + This enables support for AppliedMicro X-Gene SOC Family + endmenu menu "Bus support" @@ -130,6 +157,11 @@ config ARM64_64K_PAGES look-up. AArch32 emulation is not available when this feature is enabled. +config CPU_BIG_ENDIAN + bool "Build big-endian kernel" + help + Say Y if you plan on running a kernel in big-endian mode. + config SMP bool "Symmetric Multi-Processing" select USE_GENERIC_SMP_HELPERS @@ -144,11 +176,131 @@ config SMP If you don't know what to do here, say N. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + help + Improves the CPU scheduler's decision making when dealing with + MultiThreading at a cost of slightly increased overhead in some + places. If unsure say N here. + +config DISABLE_CPU_SCHED_DOMAIN_BALANCE + bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing" + help + Disables scheduler load-balancing at CPU sched domain level. + +config SCHED_HMP + bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling" + depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP + help + Experimental scheduler optimizations for heterogeneous platforms. + Attempts to introspectively select task affinity to optimize power + and performance. Basic support for multiple (>2) cpu types is in place, + but it has only been tested with two types of cpus. + There is currently no support for migration of task groups, hence + !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled + between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE). + +config SCHED_HMP_PRIO_FILTER + bool "(EXPERIMENTAL) Filter HMP migrations by task priority" + depends on SCHED_HMP + help + Enables task priority based HMP migration filter. Any task with + a NICE value above the threshold will always be on low-power cpus + with less compute capacity. + +config SCHED_HMP_PRIO_FILTER_VAL + int "NICE priority threshold" + default 5 + depends on SCHED_HMP_PRIO_FILTER + +config HMP_FAST_CPU_MASK + string "HMP scheduler fast CPU mask" + depends on SCHED_HMP + help + Leave empty to use device tree information. + Specify the cpuids of the fast CPUs in the system as a list string, + e.g. cpuid 0+1 should be specified as 0-1. + +config HMP_SLOW_CPU_MASK + string "HMP scheduler slow CPU mask" + depends on SCHED_HMP + help + Leave empty to use device tree information. + Specify the cpuids of the slow CPUs in the system as a list string, + e.g. cpuid 0+1 should be specified as 0-1. + +config HMP_VARIABLE_SCALE + bool "Allows changing the load tracking scale through sysfs" + depends on SCHED_HMP + help + When turned on, this option exports the thresholds and load average + period value for the load tracking patches through sysfs. + The values can be modified to change the rate of load accumulation + and the thresholds used for HMP migration. + The load_avg_period_ms is the time in ms to reach a load average of + 0.5 for an idle task of 0 load average ratio that start a busy loop. + The up_threshold and down_threshold is the value to go to a faster + CPU or to go back to a slower cpu. + The {up,down}_threshold are devided by 1024 before being compared + to the load average. + For examples, with load_avg_period_ms = 128 and up_threshold = 512, + a running task with a load of 0 will be migrated to a bigger CPU after + 128ms, because after 128ms its load_avg_ratio is 0.5 and the real + up_threshold is 0.5. + This patch has the same behavior as changing the Y of the load + average computation to + (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms) + but it remove intermadiate overflows in computation. + +config HMP_FREQUENCY_INVARIANT_SCALE + bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP" + depends on HMP_VARIABLE_SCALE && CPU_FREQ + help + Scales the current load contribution in line with the frequency + of the CPU that the task was executed on. + In this version, we use a simple linear scale derived from the + maximum frequency reported by CPUFreq. + Restricting tracked load to be scaled by the CPU's frequency + represents the consumption of possible compute capacity + (rather than consumption of actual instantaneous capacity as + normal) and allows the HMP migration's simple threshold + migration strategy to interact more predictably with CPUFreq's + asynchronous compute capacity changes. + +config SCHED_HMP_LITTLE_PACKING + bool "Small task packing for HMP" + depends on SCHED_HMP + default n + help + Allows the HMP Scheduler to pack small tasks into CPUs in the + smallest HMP domain. + Controlled by two sysfs files in sys/kernel/hmp. + packing_enable: 1 to enable, 0 to disable packing. Default 1. + packing_limit: runqueue load ratio where a RQ is considered + to be full. Default is NICE_0_LOAD * 9/8. + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 depends on SMP - default "4" + # These have to remain sorted largest to smallest + default "8" + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. source kernel/Kconfig.preempt @@ -180,8 +332,25 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y if !ARM64_64K_PAGES + +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + source "mm/Kconfig" +config FORCE_MAX_ZONEORDER + int + default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "11" + endmenu menu "Boot options" @@ -229,6 +398,25 @@ config SYSVIPC_COMPAT endmenu +menu "Power management options" + +source "kernel/power/Kconfig" + +source "drivers/cpufreq/Kconfig" +config ARCH_SUSPEND_POSSIBLE + def_bool y + +config ARM64_CPU_SUSPEND + def_bool PM_SLEEP + +endmenu + +menu "CPU Power Management" + +source "drivers/cpuidle/Kconfig" + +endmenu + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1a6bfe954d49..e1b0c4601b3e 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -13,6 +13,20 @@ config DEBUG_STACK_USAGE Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T output. +config STRICT_DEVMEM + bool "Filter access to /dev/mem" + depends on MMU + help + If this option is disabled, you allow userspace (root) access to all + of memory, including kernel and userspace memory. Accidental + access to this is obviously disastrous, but specific access can + be used by people debugging the kernel. + + If this option is switched on, the /dev/mem file only allows + userspace access to memory mapped peripherals. + + If in doubt, say Y. + config EARLY_PRINTK bool "Early printk support" default y diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..a254d2c2c0dc 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -20,9 +20,15 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only +ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) +KBUILD_CPPFLAGS += -mbig-endian +AS += -EB +LD += -EB +else KBUILD_CPPFLAGS += -mlittle-endian AS += -EL LD += -EL +endif comma = , @@ -60,6 +66,10 @@ zinstall install: vmlinux dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts dtbs +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 68457e9e0975..ef388176116d 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,4 +1,7 @@ -dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \ + fvp-base-gicv2-psci.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb +dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb targets += dtbs targets += $(dtb-y) diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts new file mode 100644 index 000000000000..1247ca1200b1 --- /dev/null +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -0,0 +1,26 @@ +/* + * dts file for AppliedMicro (APM) Mustang Board + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/dts-v1/; + +/include/ "apm-storm.dtsi" + +/ { + model = "APM X-Gene Mustang board"; + compatible = "apm,mustang", "apm,xgene-storm"; + + chosen { }; + + memory { + device_type = "memory"; + reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ + }; +}; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi new file mode 100644 index 000000000000..4917f3b81a44 --- /dev/null +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -0,0 +1,364 @@ +/* + * dts file for AppliedMicro (APM) X-Gene Storm SOC + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/ { + compatible = "apm,xgene-storm"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@000 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x000>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@001 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x001>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@100 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x100>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@101 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x101>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@200 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x200>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@201 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x201>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@300 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x300>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@301 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x301>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + }; + + gic: interrupt-controller@78010000 { + compatible = "arm,cortex-a15-gic"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0x0 0x78010000 0x0 0x1000>, /* GIC Dist */ + <0x0 0x78020000 0x0 0x1000>, /* GIC CPU */ + <0x0 0x78040000 0x0 0x2000>, /* GIC VCPU Control */ + <0x0 0x78060000 0x0 0x2000>; /* GIC VCPU */ + interrupts = <1 9 0xf04>; /* GIC Maintenence IRQ */ + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ + <1 13 0xff01>, /* Non-secure Phys IRQ */ + <1 14 0xff01>, /* Virt IRQ */ + <1 15 0xff01>; /* Hyp IRQ */ + clock-frequency = <50000000>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clocks { + #address-cells = <2>; + #size-cells = <2>; + ranges; + refclk: refclk { + compatible = "fixed-clock"; + #clock-cells = <1>; + clock-frequency = <100000000>; + clock-output-names = "refclk"; + }; + + pcppll: pcppll@17000100 { + compatible = "apm,xgene-pcppll-clock"; + #clock-cells = <1>; + clocks = <&refclk 0>; + clock-names = "pcppll"; + reg = <0x0 0x17000100 0x0 0x1000>; + clock-output-names = "pcppll"; + type = <0>; + }; + + socpll: socpll@17000120 { + compatible = "apm,xgene-socpll-clock"; + #clock-cells = <1>; + clocks = <&refclk 0>; + clock-names = "socpll"; + reg = <0x0 0x17000120 0x0 0x1000>; + clock-output-names = "socpll"; + type = <1>; + }; + + socplldiv2: socplldiv2 { + compatible = "fixed-factor-clock"; + #clock-cells = <1>; + clocks = <&socpll 0>; + clock-names = "socplldiv2"; + clock-mult = <1>; + clock-div = <2>; + clock-output-names = "socplldiv2"; + }; + + qmlclk: qmlclk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + clock-names = "qmlclk"; + reg = <0x0 0x1703C000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "qmlclk"; + }; + + ethclk: ethclk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + clock-names = "ethclk"; + reg = <0x0 0x17000000 0x0 0x1000>; + reg-names = "div-reg"; + divider-offset = <0x238>; + divider-width = <0x9>; + divider-shift = <0x0>; + clock-output-names = "ethclk"; + }; + + eth8clk: eth8clk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <ðclk 0>; + clock-names = "eth8clk"; + reg = <0x0 0x1702C000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "eth8clk"; + }; + + sataphy1clk: sataphy1clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy1clk"; + status = "disabled"; + csr-offset = <0x4>; + csr-mask = <0x00>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sataphy2clk: sataphy1clk@1f22c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f22c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy2clk"; + status = "ok"; + csr-offset = <0x4>; + csr-mask = <0x3a>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sataphy3clk: sataphy1clk@1f23c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f23c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy3clk"; + status = "ok"; + csr-offset = <0x4>; + csr-mask = <0x3a>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sata01clk: sata01clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata01clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + sata23clk: sata23clk@1f22c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f22c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata23clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + sata45clk: sata45clk@1f23c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f23c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata45clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + rtcclk: rtcclk@17000000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x17000000 0x0 0x2000>; + reg-names = "csr-reg"; + csr-offset = <0xc>; + csr-mask = <0x2>; + enable-offset = <0x10>; + enable-mask = <0x2>; + clock-output-names = "rtcclk"; + }; + }; + + serial0: serial@1c020000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0 0x1c020000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4c 0x4>; + }; + + phy1: phy@1f21a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f21a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy1clk 0>; + status = "disabled"; + apm,tx-boost-gain = <30 30 30 30 30 30>; + apm,tx-eye-tuning = <2 10 10 2 10 10>; + }; + + phy2: phy@1f22a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f22a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy2clk 0>; + status = "ok"; + apm,tx-boost-gain = <30 30 30 30 30 30>; + apm,tx-eye-tuning = <1 10 10 2 10 10>; + }; + + phy3: phy@1f23a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f23a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy3clk 0>; + status = "ok"; + apm,tx-boost-gain = <31 31 31 31 31 31>; + apm,tx-eye-tuning = <2 10 10 2 10 10>; + }; + + sata1: sata@1a000000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a000000 0x0 0x1000>, + <0x0 0x1f210000 0x0 0x1000>, + <0x0 0x1f21d000 0x0 0x1000>, + <0x0 0x1f21e000 0x0 0x1000>, + <0x0 0x1f217000 0x0 0x1000>; + interrupts = <0x0 0x86 0x4>; + status = "disabled"; + clocks = <&sata01clk 0>; + phys = <&phy1 0>; + phy-names = "sata-phy"; + }; + + sata2: sata@1a400000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a400000 0x0 0x1000>, + <0x0 0x1f220000 0x0 0x1000>, + <0x0 0x1f22d000 0x0 0x1000>, + <0x0 0x1f22e000 0x0 0x1000>, + <0x0 0x1f227000 0x0 0x1000>; + interrupts = <0x0 0x87 0x4>; + status = "ok"; + clocks = <&sata23clk 0>; + phys = <&phy2 0>; + phy-names = "sata-phy"; + }; + + sata3: sata@1a800000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a800000 0x0 0x1000>, + <0x0 0x1f230000 0x0 0x1000>, + <0x0 0x1f23d000 0x0 0x1000>, + <0x0 0x1f23e000 0x0 0x1000>; + interrupts = <0x0 0x88 0x4>; + status = "ok"; + clocks = <&sata45clk 0>; + phys = <&phy3 0>; + phy-names = "sata-phy"; + }; + + rtc: rtc@10510000 { + compatible = "apm,xgene-rtc"; + reg = <0x0 0x10510000 0x0 0x400>; + interrupts = <0x0 0x46 0x4>; + #clock-cells = <1>; + clocks = <&rtcclk 0>; + }; + }; +}; diff --git a/arch/arm64/boot/dts/clcd-panels.dtsi b/arch/arm64/boot/dts/clcd-panels.dtsi new file mode 100644 index 000000000000..0b0ff6ead4b2 --- /dev/null +++ b/arch/arm64/boot/dts/clcd-panels.dtsi @@ -0,0 +1,52 @@ +/* + * ARM Ltd. Versatile Express + * + */ + +/ { + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts new file mode 100644 index 000000000000..a46be6148b3a --- /dev/null +++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2013, ARM Limited. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of ARM nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/dts-v1/; + +/memreserve/ 0x80000000 0x00010000; + +/ { +}; + +/ { + model = "FVP Base"; + compatible = "arm,vfp-base", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0xc4000001>; + cpu_off = <0x84000002>; + cpu_on = <0xc4000003>; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + big0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x0>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + big1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x1>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + big2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x2>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + big3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x3>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little0: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x100>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little1: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x101>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little2: cpu@102 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x102>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little3: cpu@103 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x103>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + + cpu-map { + cluster0 { + core0 { + cpu = <&big0>; + }; + core1 { + cpu = <&big1>; + }; + core2 { + cpu = <&big2>; + }; + core3 { + cpu = <&big3>; + }; + }; + cluster1 { + core0 { + cpu = <&little0>; + }; + core1 { + cpu = <&little1>; + }; + core2 { + cpu = <&little2>; + }; + core3 { + cpu = <&little3>; + }; + }; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x00000000 0x80000000 0 0x80000000>, + <0x00000008 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2f000000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x2f000000 0 0x10000>, + <0x0 0x2c000000 0 0x2000>, + <0x0 0x2c010000 0 0x2000>, + <0x0 0x2c02F000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 13 0xff01>, + <1 14 0xff01>, + <1 11 0xff01>, + <1 10 0xff01>; + clock-frequency = <100000000>; + }; + + timer@2a810000 { + compatible = "arm,armv7-timer-mem"; + reg = <0x0 0x2a810000 0x0 0x10000>; + clock-frequency = <100000000>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + frame@2a820000 { + frame-number = <0>; + interrupts = <0 25 4>; + reg = <0x0 0x2a820000 0x0 0x10000>; + }; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <0 60 4>, + <0 61 4>, + <0 62 4>, + <0 63 4>; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts new file mode 100644 index 000000000000..9785a14ca604 --- /dev/null +++ b/arch/arm64/boot/dts/juno.dts @@ -0,0 +1,498 @@ +/* + * ARM Ltd. Juno Plaform + * + * Fast Models FVP v2 support + */ + +/dts-v1/; + +#include <dt-bindings/interrupt-controller/arm-gic.h> + +/ { + model = "Juno"; + compatible = "arm,juno", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + aliases { + serial0 = &soc_uart0; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x100>; + enable-method = "psci"; + }; + + cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x101>; + enable-method = "psci"; + }; + + cpu@102 { + device_type = "cpu"; + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x102>; + enable-method = "psci"; + }; + + cpu@103 { + device_type = "cpu"; + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x103>; + enable-method = "psci"; + }; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a57","arm,armv8"; + reg = <0x0 0x0>; + enable-method = "psci"; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a57","arm,armv8"; + reg = <0x0 0x1>; + enable-method = "psci"; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x00000000 0x80000000 0x0 0x80000000>, + <0x00000008 0x80000000 0x1 0x80000000>; + }; + + /* memory@14000000 { + device_type = "memory"; + reg = <0x00000000 0x14000000 0x0 0x02000000>; + }; */ + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x2c010000 0 0x1000>, + <0x0 0x2c02f000 0 0x1000>, + <0x0 0x2c04f000 0 0x2000>, + <0x0 0x2c06f000 0 0x2000>; + interrupts = <GIC_PPI 9 0xf04>; + }; + + msi0: msi@2c1c0000 { + compatible = "arm,gic-msi"; + reg = <0x0 0x2c1c0000 0 0x10000 + 0x0 0x2c1d0000 0 0x10000 + 0x0 0x2c1e0000 0 0x10000 + 0x0 0x2c1f0000 0 0x10000>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <GIC_PPI 13 0xff01>, + <GIC_PPI 14 0xff01>, + <GIC_PPI 11 0xff01>, + <GIC_PPI 10 0xff01>; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <GIC_SPI 60 4>, + <GIC_SPI 61 4>, + <GIC_SPI 62 4>, + <GIC_SPI 63 4>; + }; + + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0xC4000001>; + cpu_off = <0x84000002>; + cpu_on = <0xC4000003>; + migrate = <0xC4000005>; + }; + + pci0: pci@30000000 { + compatible = "arm,pcie-xr3"; + device_type = "pci"; + reg = <0 0x7ff30000 0 0x1000 + 0 0x7ff20000 0 0x10000 + 0 0x40000000 0 0x10000000>; + bus-range = <0 255>; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x01000000 0x0 0x00000000 0x00 0x5ff00000 0x0 0x00100000 + 0x02000000 0x0 0x00000000 0x40 0x00000000 0x0 0x80000000 + 0x42000000 0x0 0x80000000 0x40 0x80000000 0x0 0x80000000>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &gic 0 136 4 + 0 0 0 2 &gic 0 137 4 + 0 0 0 3 &gic 0 138 4 + 0 0 0 4 &gic 0 139 4>; + }; + + scpi: scpi@2b1f0000 { + compatible = "arm,scpi-mhu"; + reg = <0x0 0x2b1f0000 0x0 0x10000>, /* MHU registers */ + <0x0 0x2e000000 0x0 0x10000>; /* Payload area */ + interrupts = <0 36 4>, /* low priority interrupt */ + <0 35 4>, /* high priority interrupt */ + <0 37 4>; /* secure channel interrupt */ + #clock-cells = <1>; + clock-output-names = "a57", "a53", "gpu", "hdlcd0", "hdlcd1"; + }; + + hdlcd0_osc: scpi_osc@3 { + compatible = "arm,scpi-osc"; + #clock-cells = <0>; + clocks = <&scpi 3>; + frequency-range = <23000000 210000000>; + clock-output-names = "pxlclk0"; + }; + + hdlcd1_osc: scpi_osc@4 { + compatible = "arm,scpi-osc"; + #clock-cells = <0>; + clocks = <&scpi 4>; + frequency-range = <23000000 210000000>; + clock-output-names = "pxlclk1"; + }; + + soc_uartclk: refclk72738khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <7273800>; + clock-output-names = "juno:uartclk"; + }; + + soc_refclk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "juno:clk24mhz"; + }; + + mb_eth25mhz: clk25mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000000>; + clock-output-names = "ethclk25mhz"; + }; + + soc_usb48mhz: clk48mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <48000000>; + clock-output-names = "clk48mhz"; + }; + + soc_smc50mhz: clk50mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <50000000>; + clock-output-names = "smc_clk"; + }; + + soc_refclk100mhz: refclk100mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <100000000>; + clock-output-names = "apb_pclk"; + }; + + soc_faxiclk: refclk533mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <533000000>; + clock-output-names = "faxi_clk"; + }; + + soc_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + memory-controller@7ffd0000 { + compatible = "arm,pl354", "arm,primecell"; + reg = <0 0x7ffd0000 0 0x1000>; + interrupts = <0 86 4>, + <0 87 4>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + chip5-memwidth = <16>; + }; + + dma0: dma@0x7ff00000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0x0 0x7ff00000 0 0x1000>; + interrupts = <0 95 4>, + <0 88 4>, + <0 89 4>, + <0 90 4>, + <0 91 4>, + <0 108 4>, + <0 109 4>, + <0 110 4>, + <0 111 4>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + clocks = <&soc_faxiclk>; + clock-names = "apb_pclk"; + }; + + soc_uart0: uart@7ff80000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0 0x7ff80000 0x0 0x1000>; + interrupts = <0 83 4>; + clocks = <&soc_uartclk>, <&soc_refclk100mhz>; + clock-names = "uartclk", "apb_pclk"; + dmas = <&dma0 1 + &dma0 2>; + dma-names = "rx", "tx"; + }; + + /* this UART is reserved for secure software. + soc_uart1: uart@7ff70000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0 0x7ff70000 0x0 0x1000>; + interrupts = <0 84 4>; + clocks = <&soc_uartclk>, <&soc_refclk100mhz>; + clock-names = "uartclk", "apb_pclk"; + }; */ + + ulpi_phy: phy@0 { + compatible = "phy-ulpi-generic"; + reg = <0x0 0x94 0x0 0x4>; + phy-id = <0>; + }; + + ehci@7ffc0000 { + compatible = "snps,ehci-h20ahb"; + /* compatible = "arm,h20ahb-ehci"; */ + reg = <0x0 0x7ffc0000 0x0 0x10000>; + interrupts = <0 117 4>; + clocks = <&soc_usb48mhz>; + clock-names = "otg"; + phys = <&ulpi_phy>; + }; + + ohci@0x7ffb0000 { + compatible = "generic-ohci"; + reg = <0x0 0x7ffb0000 0x0 0x10000>; + interrupts = <0 116 4>; + clocks = <&soc_usb48mhz>; + clock-names = "otg"; + }; + + i2c@0x7ffa0000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,designware-i2c"; + reg = <0x0 0x7ffa0000 0x0 0x1000>; + interrupts = <0 104 4>; + clock-frequency = <400000>; + i2c-sda-hold-time-ns = <500>; + clocks = <&soc_smc50mhz>; + + dvi0: dvi-transmitter@70 { + compatible = "nxp,tda998x"; + reg = <0x70>; + }; + + dvi1: dvi-transmitter@71 { + compatible = "nxp,tda998x"; + reg = <0x71>; + }; + }; + + /* mmci@1c050000 { + compatible = "arm,pl180", "arm,primecell"; + reg = <0x0 0x1c050000 0x0 0x1000>; + interrupts = <0 73 4>, + <0 74 4>; + max-frequency = <12000000>; + vmmc-supply = <&soc_fixed_3v3>; + clocks = <&soc_refclk24mhz>, <&soc_refclk100mhz>; + clock-names = "mclk", "apb_pclk"; + }; */ + + hdlcd@7ff60000 { + compatible = "arm,hdlcd"; + reg = <0 0x7ff60000 0 0x1000>; + interrupts = <0 85 4>; + clocks = <&hdlcd0_osc>; + clock-names = "pxlclk"; + i2c-slave = <&dvi0>; + + /* display-timings { + native-mode = <&timing0>; + timing0: timing@0 { + /* 1024 x 768 framebufer, standard VGA timings * / + clock-frequency = <65000>; + hactive = <1024>; + vactive = <768>; + hfront-porch = <24>; + hback-porch = <160>; + hsync-len = <136>; + vfront-porch = <3>; + vback-porch = <29>; + vsync-len = <6>; + }; + }; */ + }; + + hdlcd@7ff50000 { + compatible = "arm,hdlcd"; + reg = <0 0x7ff50000 0 0x1000>; + interrupts = <0 93 4>; + clocks = <&hdlcd1_osc>; + clock-names = "pxlclk"; + i2c-slave = <&dvi1>; + + display-timings { + native-mode = <&timing1>; + timing1: timing@1 { + /* 1024 x 768 framebufer, standard VGA timings */ + clock-frequency = <65000>; + hactive = <1024>; + vactive = <768>; + hfront-porch = <24>; + hback-porch = <160>; + hsync-len = <136>; + vfront-porch = <3>; + vback-porch = <29>; + vsync-len = <6>; + }; + }; + }; + + smb { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 15>; + interrupt-map = <0 0 0 &gic 0 68 4>, + <0 0 1 &gic 0 69 4>, + <0 0 2 &gic 0 70 4>, + <0 0 3 &gic 0 160 4>, + <0 0 4 &gic 0 161 4>, + <0 0 5 &gic 0 162 4>, + <0 0 6 &gic 0 163 4>, + <0 0 7 &gic 0 164 4>, + <0 0 8 &gic 0 165 4>, + <0 0 9 &gic 0 166 4>, + <0 0 10 &gic 0 167 4>, + <0 0 11 &gic 0 168 4>, + <0 0 12 &gic 0 169 4>; + + motherboard { + model = "V2M-Juno"; + arm,hbi = <0x252>; + arm,vexpress,site = <0>; + arm,v2m-memory-map = "rs1"; + compatible = "arm,vexpress,v2p-p1", "simple-bus"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + #interrupt-cells = <1>; + ranges; + + usb@5,00000000 { + compatible = "nxp,usb-isp1763"; + reg = <5 0x00000000 0x20000>; + bus-width = <16>; + interrupts = <4>; + }; + + ethernet@2,00000000 { + compatible = "smsc,lan9118", "smsc,lan9115"; + reg = <2 0x00000000 0x10000>; + interrupts = <3>; + phy-mode = "mii"; + reg-io-width = <4>; + smsc,irq-active-high; + smsc,irq-push-pull; + clocks = <&mb_eth25mhz>; + vdd33a-supply = <&soc_fixed_3v3>; /* change this */ + vddvario-supply = <&soc_fixed_3v3>; /* and this */ + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + kmi@060000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x060000 0x1000>; + interrupts = <8>; + clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + kmi@070000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x070000 0x1000>; + interrupts = <8>; + clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + wdt@0f0000 { + compatible = "arm,sp805", "arm,primecell"; + reg = <0x0f0000 0x10000>; + interrupts = <7>; + clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>; + clock-names = "wdogclk", "apb_pclk"; + }; + + v2m_timer01: timer@110000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x110000 0x10000>; + interrupts = <9>; + clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>; + clock-names = "timclken1", "apb_pclk"; + }; + + v2m_timer23: timer@120000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x120000 0x10000>; + interrupts = <9>; + clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>; + clock-names = "timclken1", "apb_pclk"; + }; + + rtc@170000 { + compatible = "arm,pl031", "arm,primecell"; + reg = <0x170000 0x10000>; + interrupts = <0>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + }; + }; + }; + }; +}; diff --git a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts index 572005ea2217..28ed4ba3391a 100644 --- a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts +++ b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts @@ -27,37 +27,70 @@ serial3 = &v2m_serial3; }; + psci { + compatible = "arm,psci"; + method = "smc"; + /* + * Function IDs usage and compliancy with PSCI v0.2 still + * under discussion. Current IDs should be considered + * temporary for demonstration purposes. + */ + cpu_suspend = <0x84000001>; + cpu_off = <0x84000002>; + cpu_on = <0x84000003>; + }; + cpus { #address-cells = <2>; #size-cells = <0>; + idle-states { + entry-method = "arm,psci"; + + CPU_SLEEP_0: cpu-sleep-0 { + compatible = "arm,idle-state"; + entry-method-param = <0x0010000>; + entry-latency-us = <40>; + exit-latency-us = <100>; + min-residency-us = <150>; + }; + + CLUSTER_SLEEP_0: cluster-sleep-0 { + compatible = "arm,idle-state"; + entry-method-param = <0x1010000>; + entry-latency-us = <500>; + exit-latency-us = <1000>; + min-residency-us = <2500>; + }; + }; + cpu@0 { device_type = "cpu"; compatible = "arm,armv8"; reg = <0x0 0x0>; - enable-method = "spin-table"; - cpu-release-addr = <0x0 0x8000fff8>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; cpu@1 { device_type = "cpu"; compatible = "arm,armv8"; reg = <0x0 0x1>; - enable-method = "spin-table"; - cpu-release-addr = <0x0 0x8000fff8>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; cpu@2 { device_type = "cpu"; compatible = "arm,armv8"; reg = <0x0 0x2>; - enable-method = "spin-table"; - cpu-release-addr = <0x0 0x8000fff8>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; cpu@3 { device_type = "cpu"; compatible = "arm,armv8"; reg = <0x0 0x3>; - enable-method = "spin-table"; - cpu-release-addr = <0x0 0x8000fff8>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; }; @@ -157,3 +190,5 @@ /include/ "rtsm_ve-motherboard.dtsi" }; }; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi index b45e5f39f577..b683d4703582 100644 --- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi +++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi @@ -182,6 +182,15 @@ interrupts = <14>; clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>; clock-names = "clcdclk", "apb_pclk"; + mode = "XVGA"; + use_dma = <0>; + framebuffer = <0x18000000 0x00180000>; + }; + + virtio_block@0130000 { + compatible = "virtio,mmio"; + reg = <0x130000 0x200>; + interrupts = <42>; }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 8d9696adb440..8e323147c375 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -19,13 +18,17 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_XGENE=y CONFIG_SMP=y +CONFIG_PREEMPT=y CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y @@ -42,29 +45,42 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_BLK_DEV is not set +CONFIG_DMA_CMA=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_PATA_PLATFORM=y +CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y -CONFIG_MII=y CONFIG_SMC91X=y +CONFIG_SMSC911X=y # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_FB=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y @@ -86,3 +102,4 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y # CONFIG_FTRACE is not set CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DMA_CMA=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d199f2..bc5da00f8d84 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += delay.h generic-y += div64.h generic-y += dma.h generic-y += emergency-restart.h +generic-y += early_ioremap.h generic-y += errno.h generic-y += ftrace.h generic-y += hw_irq.h @@ -26,10 +27,10 @@ generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h generic-y += pci.h -generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h generic-y += resource.h +generic-y += rwsem.h generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index d56ed11ba9a3..cb2be3b86c6b 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -97,19 +97,49 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } -static inline void __cpuinit arch_counter_set_user_access(void) +static inline u32 arch_timer_get_cntkctl(void) { u32 cntkctl; - - /* Disable user access to the timers and the physical counter. */ asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); - cntkctl &= ~((3 << 8) | (1 << 0)); + return cntkctl; +} - /* Enable user access to the virtual counter and frequency. */ - cntkctl |= (1 << 1); +static inline void arch_timer_set_cntkctl(u32 cntkctl) +{ asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); } +static inline void __cpuinit arch_counter_set_user_access(void) +{ + u32 cntkctl = arch_timer_get_cntkctl(); + + /* Disable user access to the timers and the physical counter */ + /* Also disable virtual event stream */ + cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN + | ARCH_TIMER_USR_VT_ACCESS_EN + | ARCH_TIMER_VIRT_EVT_EN + | ARCH_TIMER_USR_PCT_ACCESS_EN); + + /* Enable user access to the virtual counter */ + cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + + arch_timer_set_cntkctl(cntkctl); +} + +static inline void arch_timer_evtstrm_enable(int divider) +{ + u32 cntkctl = arch_timer_get_cntkctl(); + cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK; + /* Set the divider and enable virtual event stream */ + cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) + | ARCH_TIMER_VIRT_EVT_EN; + arch_timer_set_cntkctl(cntkctl); + elf_hwcap |= HWCAP_EVTSTRM; +#ifdef CONFIG_COMPAT + compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; +#endif +} + static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 5aceb83b3f5c..fd3e3924041b 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -115,3 +115,34 @@ lr .req x30 // link register .align 7 b \label .endm + +/* + * Select code when configured for BE. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define CPU_BE(code...) code +#else +#define CPU_BE(code...) +#endif + +/* + * Select code when configured for LE. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define CPU_LE(code...) +#else +#define CPU_LE(code...) code +#endif + +/* + * Define a macro that constructs a 64-bit value by concatenating two + * 32-bit registers. Note that on big endian systems the order of the + * registers is swapped. + */ +#ifndef CONFIG_CPU_BIG_ENDIAN + .macro regs_to_64, rd, lbits, hbits +#else + .macro regs_to_64, rd, hbits, lbits +#endif + orr \rd, \lbits, \hbits, lsl #32 + .endm diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 836364468571..736c5916d367 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_add_return(int i, atomic_t *v) @@ -70,7 +69,7 @@ static inline int atomic_add_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -86,8 +85,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_sub_return(int i, atomic_t *v) @@ -102,7 +100,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -121,7 +119,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); return oldval; } @@ -173,7 +171,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) */ #define ATOMIC64_INIT(i) { (i) } -#define atomic64_read(v) (*(volatile long long *)&(v)->counter) +#define atomic64_read(v) (*(volatile long *)&(v)->counter) #define atomic64_set(v,i) (((v)->counter) = (i)) static inline void atomic64_add(u64 i, atomic64_t *v) @@ -187,8 +185,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_add_return(long i, atomic64_t *v) @@ -203,7 +200,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -219,8 +216,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_sub_return(long i, atomic64_t *v) @@ -235,7 +231,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -254,7 +250,7 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); return oldval; } diff --git a/arch/arm64/include/asm/bL_switcher.h b/arch/arm64/include/asm/bL_switcher.h new file mode 100644 index 000000000000..2bee500b7f54 --- /dev/null +++ b/arch/arm64/include/asm/bL_switcher.h @@ -0,0 +1,54 @@ +/* + * Based on the stubs for the ARM implementation which is: + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +#include <linux/notifier.h> +#include <linux/types.h> + +typedef void (*bL_switch_completion_handler)(void *cookie); + +static inline int bL_switch_request(unsigned int cpu, + unsigned int new_cluster_id) +{ + return -ENOTSUPP; +} + +/* + * Register here to be notified about runtime enabling/disabling of + * the switcher. + * + * The notifier chain is called with the switcher activation lock held: + * the switcher will not be enabled or disabled during callbacks. + * Callbacks must not call bL_switcher_{get,put}_enabled(). + */ +#define BL_NOTIFY_PRE_ENABLE 0 +#define BL_NOTIFY_POST_ENABLE 1 +#define BL_NOTIFY_PRE_DISABLE 2 +#define BL_NOTIFY_POST_DISABLE 3 + +static inline int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline bool bL_switcher_get_enabled(void) { return false; } +static inline void bL_switcher_put_enabled(void) { } +static inline int bL_switcher_trace_trigger(void) { return 0; } +static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; } + +#endif diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index d4a63338a53c..c98d0a88916a 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,9 +25,10 @@ #define wfi() asm volatile("wfi" : : : "memory") #define isb() asm volatile("isb" : : : "memory") -#define dsb() asm volatile("dsb sy" : : : "memory") +#define dmb(opt) asm volatile("dmb sy" : : : "memory") +#define dsb(opt) asm volatile("dsb sy" : : : "memory") -#define mb() dsb() +#define mb() dsb(sy) #define rmb() asm volatile("dsb ld" : : : "memory") #define wmb() asm volatile("dsb st" : : : "memory") diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 0c13554965b8..a5176cf32dad 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -85,6 +85,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma, } /* + * Cache maintenance functions used by the DMA API. No to be used directly. + */ +extern void __dma_map_area(const void *, size_t, int); +extern void __dma_unmap_area(const void *, size_t, int); +extern void __dma_flush_range(const void *, const void *); + +/* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user * space" model to handle this. @@ -116,7 +123,7 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { asm("ic ialluis"); - dsb(); + dsb(ish); } #define flush_dcache_mmap_lock(mapping) \ @@ -124,9 +131,6 @@ static inline void __flush_icache_all(void) #define flush_dcache_mmap_unlock(mapping) \ spin_unlock_irq(&(mapping)->tree_lock) -#define flush_icache_user_range(vma,page,addr,len) \ - flush_dcache_page(page) - /* * We don't appear to need to do anything here. In fact, if we did, we'd * duplicate cache flushing elsewhere performed by flush_dcache_page(). @@ -146,7 +150,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) * set_pte_at() called from vmap_pte_range() does not * have a DSB after cleaning the cache line. */ - dsb(); + dsb(ish); } static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 8a8ce0e73a38..014328e80a4b 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -34,7 +34,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 2: asm volatile("// __xchg2\n" @@ -43,7 +43,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 4: asm volatile("// __xchg4\n" @@ -52,7 +52,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 8: asm volatile("// __xchg8\n" @@ -61,7 +61,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; default: BUILD_BUG(); @@ -71,7 +71,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size } #define xchg(ptr,x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) @@ -158,19 +163,27 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, return ret; } -#define cmpxchg(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) - -#define cmpxchg_local(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) + +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) +#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n)) + #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 899af807ef0f..253e33bc94fb 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -26,7 +26,11 @@ #include <linux/ptrace.h> #define COMPAT_USER_HZ 100 +#ifdef __AARCH64EB__ +#define COMPAT_UTS_MACHINE "armv8b\0\0" +#else #define COMPAT_UTS_MACHINE "armv8l\0\0" +#endif typedef u32 compat_size_t; typedef s32 compat_ssize_t; @@ -73,13 +77,23 @@ struct compat_timeval { }; struct compat_stat { +#ifdef __AARCH64EB__ + short st_dev; + short __pad1; +#else compat_dev_t st_dev; +#endif compat_ino_t st_ino; compat_mode_t st_mode; compat_ushort_t st_nlink; __compat_uid16_t st_uid; __compat_gid16_t st_gid; +#ifdef __AARCH64EB__ + short st_rdev; + short __pad2; +#else compat_dev_t st_rdev; +#endif compat_off_t st_size; compat_off_t st_blksize; compat_off_t st_blocks; @@ -214,7 +228,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -#define compat_user_stack_pointer() (current_pt_regs()->compat_sp) +#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) static inline void __user *arch_compat_alloc_user_space(long len) { @@ -291,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread) #else /* !CONFIG_COMPAT */ -static inline int is_compat_task(void) -{ - return 0; -} - static inline int is_compat_thread(struct thread_info *thread) { return 0; diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h new file mode 100644 index 000000000000..152413076503 --- /dev/null +++ b/arch/arm64/include/asm/cpu_ops.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CPU_OPS_H +#define __ASM_CPU_OPS_H + +#include <linux/init.h> +#include <linux/threads.h> + +struct device_node; + +/** + * struct cpu_operations - Callback operations for hotplugging CPUs. + * + * @name: Name of the property as appears in a devicetree cpu node's + * enable-method property. + * @cpu_init: Reads any data necessary for a specific enable-method from the + * devicetree, for a given cpu node and proposed logical id. + * @cpu_prepare: Early one-time preparation step for a cpu. If there is a + * mechanism for doing so, tests whether it is possible to boot + * the given CPU. + * @cpu_boot: Boots a cpu into the kernel. + * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * synchronisation. Called from the cpu being booted. + * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific + * reason, which will cause the hot unplug to be aborted. Called + * from the cpu to be killed. + * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the + * cpu being killed. + * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing + * to wrong parameters or error conditions. Called from the + * CPU being suspended. Must be called with IRQs disabled. + */ +struct cpu_operations { + const char *name; + int (*cpu_init)(struct device_node *, unsigned int); + int (*cpu_prepare)(unsigned int); + int (*cpu_boot)(unsigned int); + void (*cpu_postboot)(void); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(unsigned int cpu); + void (*cpu_die)(unsigned int cpu); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + int (*cpu_suspend)(unsigned long); +#endif +}; + +extern const struct cpu_operations *cpu_ops[NR_CPUS]; +extern int __init cpu_read_ops(struct device_node *dn, int cpu); +extern void __init cpu_read_bootcpu_ops(void); + +#endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf2749488cd4..c404fb0df3a6 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -16,32 +16,35 @@ #ifndef __ASM_CPUTYPE_H #define __ASM_CPUTYPE_H -#define ID_MIDR_EL1 "midr_el1" -#define ID_MPIDR_EL1 "mpidr_el1" -#define ID_CTR_EL0 "ctr_el0" - -#define ID_AA64PFR0_EL1 "id_aa64pfr0_el1" -#define ID_AA64DFR0_EL1 "id_aa64dfr0_el1" -#define ID_AA64AFR0_EL1 "id_aa64afr0_el1" -#define ID_AA64ISAR0_EL1 "id_aa64isar0_el1" -#define ID_AA64MMFR0_EL1 "id_aa64mmfr0_el1" - #define INVALID_HWID ULONG_MAX #define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_LEVEL_BITS_SHIFT 3 +#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_LEVEL_SHIFT(level) \ + (((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT) + +#define MPIDR_AFFINITY_LEVEL(mpidr, level) \ + ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) + #define read_cpuid(reg) ({ \ u64 __val; \ - asm("mrs %0, " reg : "=r" (__val)); \ + asm("mrs %0, " #reg : "=r" (__val)); \ __val; \ }) #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 #define ARM_CPU_PART_CORTEX_A57 0xD070 +#define APM_CPU_PART_POTENZA 0x0000 + #ifndef __ASSEMBLY__ /* @@ -51,12 +54,12 @@ */ static inline u32 __attribute_const__ read_cpuid_id(void) { - return read_cpuid(ID_MIDR_EL1); + return read_cpuid(MIDR_EL1); } static inline u64 __attribute_const__ read_cpuid_mpidr(void) { - return read_cpuid(ID_MPIDR_EL1); + return read_cpuid(MPIDR_EL1); } static inline unsigned int __attribute_const__ read_cpuid_implementor(void) @@ -71,7 +74,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void) static inline u32 __attribute_const__ read_cpuid_cachetype(void) { - return read_cpuid(ID_CTR_EL0); + return read_cpuid(CTR_EL0); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7eaa0b302493..7c951a510b54 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -26,6 +26,53 @@ #define DBG_ESR_EVT_HWWP 0x2 #define DBG_ESR_EVT_BRK 0x6 +/* + * Break point instruction encoding + */ +#define BREAK_INSTR_SIZE 4 + +/* + * ESR values expected for dynamic and compile time BRK instruction + */ +#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff)) + +/* + * #imm16 values used for BRK instruction generation + * Allowed values for kgbd are 0x400 - 0x7ff + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + */ +#define KGDB_DYN_DGB_BRK_IMM 0x400 +#define KDBG_COMPILED_DBG_BRK_IMM 0x401 + +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * Extract byte from BRK instruction + */ +#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \ + ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff) + +/* + * Extract byte from BRK #imm16 + */ +#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \ + (((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff) + +#define KGDB_DYN_DGB_BRK_BYTE(x) \ + (KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x)) + +#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DGB_BRK_BYTE(0) +#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DGB_BRK_BYTE(1) +#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DGB_BRK_BYTE(2) +#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DGB_BRK_BYTE(3) + +#define CACHE_FLUSH_IS_SAFE 1 + enum debug_el { DBG_ACTIVE_EL0 = 0, DBG_ACTIVE_EL1, @@ -43,25 +90,29 @@ enum debug_el { #ifndef __ASSEMBLY__ struct task_struct; -#define local_dbg_save(flags) \ - do { \ - typecheck(unsigned long, flags); \ - asm volatile( \ - "mrs %0, daif // local_dbg_save\n" \ - "msr daifset, #8" \ - : "=r" (flags) : : "memory"); \ - } while (0) - -#define local_dbg_restore(flags) \ - do { \ - typecheck(unsigned long, flags); \ - asm volatile( \ - "msr daif, %0 // local_dbg_restore\n" \ - : : "r" (flags) : "memory"); \ - } while (0) - #define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ +#define DBG_HOOK_HANDLED 0 +#define DBG_HOOK_ERROR 1 + +struct step_hook { + struct list_head node; + int (*fn)(struct pt_regs *regs, unsigned int esr); +}; + +void register_step_hook(struct step_hook *hook); +void unregister_step_hook(struct step_hook *hook); + +struct break_hook { + struct list_head node; + u32 esr_val; + u32 esr_mask; + int (*fn)(struct pt_regs *regs, unsigned int esr); +}; + +void register_break_hook(struct break_hook *hook); +void unregister_break_hook(struct break_hook *hook); + u8 debug_monitors_arch(void); void enable_debug_monitors(enum debug_el el); @@ -83,6 +134,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif +#ifdef CONFIG_COMPAT +int aarch32_break_handler(struct pt_regs *regs); +#else +static int aarch32_break_handler(struct pt_regs *regs) +{ + return -EFAULT; +} +#endif + #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 0d8453c755a8..cf98b362094b 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -18,6 +18,9 @@ struct dev_archdata { struct dma_map_ops *dma_ops; +#ifdef CONFIG_IOMMU_API + void *iommu; /* private IOMMU data */ +#endif }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/dma-contiguous.h b/arch/arm64/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..14c4c0ca7f2a --- /dev/null +++ b/arch/arm64/include/asm/dma-contiguous.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ASM_DMA_CONTIGUOUS_H +#define _ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_DMA_CMA + +#include <linux/types.h> + +static inline void +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } + +#endif +#endif + +#endif diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 994776894198..00a41aab4a37 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -25,7 +25,10 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK +#define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; +extern struct dma_map_ops coherent_swiotlb_dma_ops; +extern struct dma_map_ops noncoherent_swiotlb_dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { @@ -35,6 +38,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return dev->archdata.dma_ops; } +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + dev->archdata.dma_ops = ops; +} + #include <asm-generic/dma-mapping-common.h> static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) @@ -81,8 +89,12 @@ static inline void dma_mark_clean(void *addr, size_t size) { } -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *vaddr; @@ -90,13 +102,14 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr)) return vaddr; - vaddr = ops->alloc(dev, size, dma_handle, flags, NULL); + vaddr = ops->alloc(dev, size, dma_handle, flags, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr); return vaddr; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dev_addr) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dev_addr, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -104,7 +117,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, dev_addr); - ops->free(dev, size, vaddr, dev_addr, NULL); + ops->free(dev, size, vaddr, dev_addr, attrs); } /* diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fe32c0e4ac01..01d3aab64b79 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -33,8 +33,6 @@ typedef unsigned long elf_greg_t; typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fpsimd_state elf_fpregset_t; -#define EM_AARCH64 183 - /* * AArch64 static relocation types. */ @@ -92,11 +90,24 @@ typedef struct user_fpsimd_state elf_fpregset_t; * These are used to set parameters in the core dumps. */ #define ELF_CLASS ELFCLASS64 +#ifdef __AARCH64EB__ +#define ELF_DATA ELFDATA2MSB +#else #define ELF_DATA ELFDATA2LSB +#endif #define ELF_ARCH EM_AARCH64 +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ #define ELF_PLATFORM_SIZE 16 +#ifdef __AARCH64EB__ +#define ELF_PLATFORM ("aarch64_be") +#else #define ELF_PLATFORM ("aarch64") +#endif /* * This is used to ensure we don't load something for the wrong architecture. @@ -151,8 +162,12 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk #ifdef CONFIG_COMPAT -#define EM_ARM 40 + +#ifdef __AARCH64EB__ +#define COMPAT_ELF_PLATFORM ("v8b") +#else #define COMPAT_ELF_PLATFORM ("v8l") +#endif #define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 78834123a32e..c4a7f940b387 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -42,7 +42,7 @@ #define ESR_EL1_EC_SP_ALIGN (0x26) #define ESR_EL1_EC_FP_EXC32 (0x28) #define ESR_EL1_EC_FP_EXC64 (0x2C) -#define ESR_EL1_EC_SERRROR (0x2F) +#define ESR_EL1_EC_SERROR (0x2F) #define ESR_EL1_EC_BREAKPT_EL0 (0x30) #define ESR_EL1_EC_BREAKPT_EL1 (0x31) #define ESR_EL1_EC_SOFTSTP_EL0 (0x32) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h new file mode 100644 index 000000000000..5f7bfe6df723 --- /dev/null +++ b/arch/arm64/include/asm/fixmap.h @@ -0,0 +1,67 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * Copyright (C) 2013 Mark Salter <msalter@redhat.com> + * + * Adapted from arch/x86_64 version. + * + */ + +#ifndef _ASM_ARM64_FIXMAP_H +#define _ASM_ARM64_FIXMAP_H + +#ifndef __ASSEMBLY__ +#include <linux/kernel.h> +#include <asm/page.h> + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. + * + * These 'compile-time allocated' memory buffers are + * page-sized. Use set_fixmap(idx,phys) to associate + * physical memory with fixmap indices. + * + */ +enum fixed_addresses { + FIX_EARLYCON_MEM_BASE, + __end_of_permanent_fixed_addresses, + + /* + * Temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define NR_FIX_BTMAPS 4 +#else +#define NR_FIX_BTMAPS 64 +#endif +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + __end_of_fixed_addresses +}; + +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) + +extern void __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#define __set_fixmap __early_set_fixmap + +#include <asm-generic/fixmap.h> + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_ARM64_FIXMAP_H */ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h new file mode 100644 index 000000000000..c5534facf941 --- /dev/null +++ b/arch/arm64/include/asm/ftrace.h @@ -0,0 +1,59 @@ +/* + * arch/arm64/include/asm/ftrace.h + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_FTRACE_H +#define __ASM_FTRACE_H + +#include <asm/insn.h> + +#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE + +#ifndef __ASSEMBLY__ +#include <linux/compat.h> + +extern void _mcount(unsigned long); +extern void *return_address(unsigned int); + +struct dyn_arch_ftrace { + /* No extra data needed for arm64 */ +}; + +extern unsigned long ftrace_graph_call; + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. + */ + return addr; +} + +#define ftrace_return_address(n) return_address(n) + +/* + * Because AArch32 mode does not share the same syscall table with AArch64, + * tracing compat syscalls may result in reporting bogus syscalls or even + * hang-up, so just do not trace them. + * See kernel/trace/trace_syscalls.c + * + * x86 code says: + * If the user realy wants these, then they should use the + * raw syscall tracepoints with filtering. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return is_compat_task(); +} +#endif /* ifndef __ASSEMBLY__ */ + +#endif /* __ASM_FTRACE_H */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index c582fa316366..6230baba7869 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,6 +30,7 @@ " cbnz %w3, 1b\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ +" .align 2\n" \ "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection\n" \ @@ -39,7 +40,7 @@ " .popsection\n" \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "memory") static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -126,7 +127,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .popsection\n" : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) - : "cc", "memory"); + : "memory"); *uval = val; return ret; diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 990c051e7829..ae4801d77514 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 4 +#define NR_IPI 5 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h @@ -0,0 +1,117 @@ +/* + * arch/arm64/include/asm/hugetlb.h + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include <asm-generic/hugetlb.h> +#include <asm/page.h> + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..024c46183c3c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -30,6 +30,13 @@ #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_EVTSTRM (1 << 21) + +#define COMPAT_HWCAP2_AES (1 << 0) +#define COMPAT_HWCAP2_PMULL (1 << 1) +#define COMPAT_HWCAP2_SHA1 (1 << 2) +#define COMPAT_HWCAP2_SHA2 (1 << 3) +#define COMPAT_HWCAP2_CRC32 (1 << 4) #ifndef __ASSEMBLY__ /* @@ -37,12 +44,13 @@ * instruction set this cpu supports. */ #define ELF_HWCAP (elf_hwcap) -#define COMPAT_ELF_HWCAP (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ - COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) -extern unsigned int elf_hwcap; +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP (compat_elf_hwcap) +#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; +#endif + +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h new file mode 100644 index 000000000000..62e7b8bcd2dc --- /dev/null +++ b/arch/arm64/include/asm/insn.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_INSN_H +#define __ASM_INSN_H + +#include <linux/types.h> + +/* A64 instructions are always 32 bits. */ +#define AARCH64_INSN_SIZE 4 + +#ifndef __ASSEMBLY__ + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section C3.1 "A64 instruction index by encoding": + * AArch64 main encoding table + * Bit position + * 28 27 26 25 Encoding Group + * 0 0 - - Unallocated + * 1 0 0 - Data processing, immediate + * 1 0 1 - Branch, exception generation and system instructions + * - 1 - 0 Loads and stores + * - 1 0 1 Data processing - register + * 0 1 1 1 Data processing - SIMD and floating point + * 1 1 1 1 Data processing - SIMD and floating point + * "-" means "don't care" + */ +enum aarch64_insn_encoding_class { + AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */ + AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */ + AARCH64_INSN_CLS_DP_REG, /* Data processing - register */ + AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */ + AARCH64_INSN_CLS_LDST, /* Loads and stores */ + AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and + * system instructions */ +}; + +enum aarch64_insn_hint_op { + AARCH64_INSN_HINT_NOP = 0x0 << 5, + AARCH64_INSN_HINT_YIELD = 0x1 << 5, + AARCH64_INSN_HINT_WFE = 0x2 << 5, + AARCH64_INSN_HINT_WFI = 0x3 << 5, + AARCH64_INSN_HINT_SEV = 0x4 << 5, + AARCH64_INSN_HINT_SEVL = 0x5 << 5, +}; + +enum aarch64_insn_imm_type { + AARCH64_INSN_IMM_ADR, + AARCH64_INSN_IMM_26, + AARCH64_INSN_IMM_19, + AARCH64_INSN_IMM_16, + AARCH64_INSN_IMM_14, + AARCH64_INSN_IMM_12, + AARCH64_INSN_IMM_9, + AARCH64_INSN_IMM_MAX +}; + +enum aarch64_insn_branch_type { + AARCH64_INSN_BRANCH_NOLINK, + AARCH64_INSN_BRANCH_LINK, +}; + +#define __AARCH64_INSN_FUNCS(abbr, mask, val) \ +static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ +{ return (code & (mask)) == (val); } \ +static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ +{ return (val); } + +__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000) +__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000) +__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) +__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) +__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) +__AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) +__AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) + +#undef __AARCH64_INSN_FUNCS + +bool aarch64_insn_is_nop(u32 insn); + +int aarch64_insn_read(void *addr, u32 *insnp); +int aarch64_insn_write(void *addr, u32 insn); +enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); +u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm); +u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, + enum aarch64_insn_branch_type type); +u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op); +u32 aarch64_insn_gen_nop(void); + +bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); + +int aarch64_insn_patch_text_nosync(void *addr, u32 insn); +int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); +int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 2e12258aa7e4..e1018b75c954 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -26,6 +26,7 @@ #include <asm/byteorder.h> #include <asm/barrier.h> #include <asm/pgtable.h> +#include <asm/early_ioremap.h> /* * Generic IO read/write. These perform native-endian accesses. @@ -118,7 +119,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * I/O port access primitives. */ #define IO_SPACE_LIMIT 0xffff -#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_2M)) +#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_32M)) static inline u8 inb(unsigned long addr) { @@ -225,18 +226,11 @@ extern void __memset_io(volatile void __iomem *, int, size_t); extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); extern void __iounmap(volatile void __iomem *addr); -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC)) - #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) #define iounmap __iounmap -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) - #define ARCH_HAS_IOREMAP_WC #include <asm-generic/iomap.h> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 0332fc077f6e..e1f7ecdde11f 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,6 +4,7 @@ #include <asm-generic/irq.h> extern void (*handle_arch_irq)(struct pt_regs *); +extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); #endif diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index aa11943b8502..0ed52c691868 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -87,5 +87,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) return flags & PSR_I_BIT; } +/* + * save and restore debug state + */ +#define local_dbg_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "mrs %0, daif // local_dbg_save\n" \ + "msr daifset, #8" \ + : "=r" (flags) : : "memory"); \ + } while (0) + +#define local_dbg_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "msr daif, %0 // local_dbg_restore\n" \ + : : "r" (flags) : "memory"); \ + } while (0) + +#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory") +#define local_dbg_disable() asm("msr daifset, #8" : : : "memory") + #endif #endif diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h new file mode 100644 index 000000000000..076a1c714049 --- /dev/null +++ b/arch/arm64/include/asm/jump_label.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/include/asm/jump_label.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_JUMP_LABEL_H +#define __ASM_JUMP_LABEL_H +#include <linux/types.h> +#include <asm/insn.h> + +#ifdef __KERNEL__ + +#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE + +static __always_inline bool arch_static_branch(struct static_key *key) +{ + asm goto("1: nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i"(key) : : l_yes); + + return false; +l_yes: + return true; +} + +#endif /* __KERNEL__ */ + +typedef u64 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h new file mode 100644 index 000000000000..3c8aafc1082f --- /dev/null +++ b/arch/arm64/include/asm/kgdb.h @@ -0,0 +1,84 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/include/kgdb.h + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM_KGDB_H +#define __ARM_KGDB_H + +#include <linux/ptrace.h> +#include <asm/debug-monitors.h> + +#ifndef __ASSEMBLY__ + +static inline void arch_kgdb_breakpoint(void) +{ + asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM)); +} + +extern void kgdb_handle_bus_error(void); +extern int kgdb_fault_expected; + +#endif /* !__ASSEMBLY__ */ + +/* + * gdb is expecting the following registers layout. + * + * General purpose regs: + * r0-r30: 64 bit + * sp,pc : 64 bit + * pstate : 64 bit + * Total: 34 + * FPU regs: + * f0-f31: 128 bit + * Total: 32 + * Extra regs + * fpsr & fpcr: 32 bit + * Total: 2 + * + */ + +#define _GP_REGS 34 +#define _FP_REGS 32 +#define _EXTRA_REGS 2 +/* + * general purpose registers size in bytes. + * pstate is only 4 bytes. subtract 4 bytes + */ +#define GP_REG_BYTES (_GP_REGS * 8) +#define DBG_MAX_REG_NUM (_GP_REGS + _FP_REGS + _EXTRA_REGS) + +/* + * Size of I/O buffer for gdb packet. + * considering to hold all register contents, size is set + */ + +#define BUFMAX 2048 + +/* + * Number of bytes required for gdb_regs buffer. + * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each + * GDB fails to connect for size beyond this with error + * "'g' packet reply is too long" + */ + +#define NUMREGBYTES ((_GP_REGS * 8) + (_FP_REGS * 16) + \ + (_EXTRA_REGS * 4)) + +#endif /* __ASM_KGDB_H */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 381f556b664e..212ded1662bf 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -33,18 +33,23 @@ #define UL(x) _AC(x, UL) /* - * PAGE_OFFSET - the virtual address of the start of the kernel image. + * PAGE_OFFSET - the virtual address of the start of the kernel image (top + * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ -#define PAGE_OFFSET UL(0xffffffc000000000) +#ifdef CONFIG_ARM64_64K_PAGES +#define VA_BITS (42) +#else +#define VA_BITS (39) +#endif +#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) -#define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M) -#define VA_BITS (39) +#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT @@ -127,6 +132,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x)) /* * virt_to_page(k) convert a _valid_ virtual address to struct page * diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 2494fc01896a..aff0292c8f4d 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -22,10 +22,14 @@ typedef struct { void *vdso; } mm_context_t; +#define INIT_MM_CONTEXT(name) \ + .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), + #define ASID(mm) ((mm)->context.id & 0xffff) extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); +extern void init_mem_pgprot(void); #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index e2bc385adb6b..a9eee33dfa62 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,12 +151,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); -#ifdef CONFIG_SMP - /* check for possible thread migration */ - if (!cpumask_empty(mm_cpumask(next)) && - !cpumask_test_cpu(cpu, mm_cpumask(next))) - __flush_icache_all(); -#endif if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h new file mode 100644 index 000000000000..453a179469a3 --- /dev/null +++ b/arch/arm64/include/asm/percpu.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PERCPU_H +#define __ASM_PERCPU_H + +#ifdef CONFIG_SMP + +static inline void set_my_cpu_offset(unsigned long off) +{ + asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); +} + +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long off; + register unsigned long *sp asm ("sp"); + + /* + * We want to allow caching the value, so avoid using volatile and + * instead use a fake stack read to hazard against barrier(). + */ + asm("mrs %0, tpidr_el1" : "=r" (off) : "Q" (*sp)); + + return off; +} +#define __my_cpu_offset __my_cpu_offset() + +#else /* !CONFIG_SMP */ + +#define set_my_cpu_offset(x) do { } while (0) + +#endif /* CONFIG_SMP */ + +#include <asm-generic/percpu.h> + +#endif /* __ASM_PERCPU_H */ diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h index 0a8ed3f94e93..2593b490c56a 100644 --- a/arch/arm64/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm64/include/asm/pgtable-2level-hwdef.h @@ -21,10 +21,10 @@ * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each * entry representing 512MB. The user and kernel address spaces are limited to - * 512GB and therefore we only use 1024 entries in the PGD. + * 4TB in the 64KB page configuration. */ #define PTRS_PER_PTE 8192 -#define PTRS_PER_PGD 1024 +#define PTRS_PER_PGD 8192 /* * PGDIR_SHIFT determines the size a top-level page table entry can map. diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h index 3c3ca7d361e4..5f101e63dfc1 100644 --- a/arch/arm64/include/asm/pgtable-2level-types.h +++ b/arch/arm64/include/asm/pgtable-2level-types.h @@ -16,6 +16,8 @@ #ifndef __ASM_PGTABLE_2LEVEL_TYPES_H #define __ASM_PGTABLE_2LEVEL_TYPES_H +#include <asm/types.h> + typedef u64 pteval_t; typedef u64 pgdval_t; typedef pgdval_t pmdval_t; diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-3level-types.h index 4489615f14a9..4e94424938a4 100644 --- a/arch/arm64/include/asm/pgtable-3level-types.h +++ b/arch/arm64/include/asm/pgtable-3level-types.h @@ -16,6 +16,8 @@ #ifndef __ASM_PGTABLE_3LEVEL_TYPES_H #define __ASM_PGTABLE_3LEVEL_TYPES_H +#include <asm/types.h> + typedef u64 pteval_t; typedef u64 pmdval_t; typedef u64 pgdval_t; diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d289b9..d25991747650 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -25,16 +25,27 @@ /* * Hardware page table definitions. * + * Level 1 descriptor (PUD). + */ + +#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) + +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) /* * Section */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) @@ -53,6 +64,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ @@ -92,5 +104,6 @@ #define TCR_TG1_64K (UL(1) << 30) #define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) +#define TCR_TBI0 (UL(1) << 37) #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3a710d7b14ce..a184bc132131 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,15 +25,16 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) +#define PTE_WRITE (_AT(pteval_t, 1) << 57) +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ -#define VMALLOC_START UL(0xffffff8000000000) +#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -51,60 +52,59 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) -/* - * The pgprot_* and protection_map entries will be fixed up at runtime to - * include the cachable and bufferable bits based on memory policy, as well as - * any architecture dependent bits like global/ASID and SMP shared mapping - * bits. - */ -#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF - -extern pgprot_t pgprot_default; - -#define __pgprot_modify(prot,mask,bits) \ - __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) +#ifdef CONFIG_SMP +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#else +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) +#endif -#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) - -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) -#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) - -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) -#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) - -#endif /* __ASSEMBLY__ */ - -#define __P000 __PAGE_NONE -#define __P001 __PAGE_READONLY -#define __P010 __PAGE_COPY -#define __P011 __PAGE_COPY -#define __P100 __PAGE_READONLY_EXEC -#define __P101 __PAGE_READONLY_EXEC -#define __P110 __PAGE_COPY_EXEC -#define __P111 __PAGE_COPY_EXEC - -#define __S000 __PAGE_NONE -#define __S001 __PAGE_READONLY -#define __S010 __PAGE_SHARED -#define __S011 __PAGE_SHARED -#define __S100 __PAGE_READONLY_EXEC -#define __S101 __PAGE_READONLY_EXEC -#define __S110 __PAGE_SHARED_EXEC -#define __S111 __PAGE_SHARED_EXEC +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) + +#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) + +#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) + +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) + +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC -#ifndef __ASSEMBLY__ /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -119,7 +119,7 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -129,26 +129,57 @@ extern struct page *empty_zero_page; /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) -#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & PTE_AF) -#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) +#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) +#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) -#define PTE_BIT_FUNC(fn,op) \ -static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) &= ~PTE_WRITE; + return pte; +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) |= PTE_WRITE; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~PTE_AF; + return pte; +} -PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY); -PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY); -PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY); -PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); -PTE_BIT_FUNC(mkold, &= ~PTE_AF); -PTE_BIT_FUNC(mkyoung, |= PTE_AF); -PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL); +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= PTE_AF; + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= PTE_SPECIAL; + return pte; +} static inline void set_pte(pte_t *ptep, pte_t pte) { @@ -163,8 +194,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_valid_user(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (!pte_dirty(pte)) - pte = pte_wrprotect(pte); + if (pte_dirty(pte) && pte_write(pte)) + pte_val(pte) &= ~PTE_RDONLY; + else + pte_val(pte) |= PTE_RDONLY; } set_pte(ptep, pte); @@ -173,11 +206,69 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Huge pte definitions. */ -#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) -#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) +#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +/* + * Hugetlb definitions. + */ +#define HUGE_MAX_HSTATE 2 +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define __HAVE_ARCH_PTE_SPECIAL +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +/* + * THP definitions. + */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) pte_special(pmd_pte(pmd)) +#endif + +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK)) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + +#define __pgprot_modify(prot,mask,bits) \ + __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -200,7 +291,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; - dsb(); + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -230,7 +321,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; - dsb(); + dsb(ishst); } static inline void pud_clear(pud_t *pudp) @@ -263,16 +354,21 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #endif /* Find an entry in the third-level page table.. */ -#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); +} + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; @@ -284,15 +380,17 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; * bits 0-1: present (must be zero) * bit 2: PTE_FILE * bits 3-8: swap type - * bits 9-63: swap offset + * bits 9-57: swap offset */ #define __SWP_TYPE_SHIFT 3 #define __SWP_TYPE_BITS 6 +#define __SWP_OFFSET_BITS 49 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) -#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) @@ -300,7 +398,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Ensure that there are not more swap files than can be encoded in the kernel - * the PTEs. + * PTEs. */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) @@ -308,13 +406,13 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; * Encode and decode a file entry: * bits 0-1: present (must be zero) * bit 2: PTE_FILE - * bits 3-63: file offset / PAGE_SIZE + * bits 3-57: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) #define pte_to_pgoff(x) (pte_val(x) >> 3) #define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) -#define PTE_FILE_MAX_BITS 61 +#define PTE_FILE_MAX_BITS 55 extern int kern_addr_valid(unsigned long addr); diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 7cdf466fd0c5..0c657bb54597 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -26,11 +26,14 @@ #include <asm/page.h> struct mm_struct; +struct cpu_suspend_ctx; extern void cpu_cache_off(void); extern void cpu_do_idle(void); extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); +extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr); +extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include <asm/memory.h> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ab239b2c456f..45b20cd6cbca 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -107,6 +107,11 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate = COMPAT_PSR_MODE_USR; if (pc & 1) regs->pstate |= COMPAT_PSR_T_BIT; + +#ifdef __AARCH64EB__ + regs->pstate |= COMPAT_PSR_E_BIT; +#endif + regs->compat_sp = sp; } #endif diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index 0604237ecd99..9a4b663670ff 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,25 +14,10 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 +struct cpuidle_driver; +void psci_init(void); -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); -}; - -extern struct psci_operations psci_ops; - -int psci_init(void); +int __init psci_dt_register_idle_states(struct cpuidle_driver *, + struct device_node *[]); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..a429b5940be2 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -42,6 +42,7 @@ #define COMPAT_PSR_MODE_UND 0x0000001b #define COMPAT_PSR_MODE_SYS 0x0000001f #define COMPAT_PSR_T_BIT 0x00000020 +#define COMPAT_PSR_E_BIT 0x00000200 #define COMPAT_PSR_F_BIT 0x00000040 #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 @@ -67,6 +68,7 @@ /* Architecturally defined mapping between AArch32 and AArch64 registers */ #define compat_usr(x) regs[(x)] +#define compat_fp regs[11] #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] @@ -131,7 +133,12 @@ struct pt_regs { (!((regs)->pstate & PSR_F_BIT)) #define user_stack_pointer(regs) \ - ((regs)->sp) + (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) + +static inline unsigned long regs_return_value(struct pt_regs *regs) +{ + return regs->regs[0]; +} /* * Are the current registers suitable for user mode? (used to maintain @@ -163,7 +170,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs) return 0; } -#define instruction_pointer(regs) (regs)->pc +#define instruction_pointer(regs) ((unsigned long)(regs)->pc) #ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); @@ -171,7 +178,5 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -extern int aarch32_break_trap(struct pt_regs *regs); - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/sigcontext.h deleted file mode 100644 index dca1094acc74..000000000000 --- a/arch/arm64/include/asm/sigcontext.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_SIGCONTEXT_H -#define __ASM_SIGCONTEXT_H - -#include <uapi/asm/sigcontext.h> - -/* - * Auxiliary context saved in the sigcontext.__reserved array. Not exported to - * user space as it will change with the addition of new context. User space - * should check the magic/size information. - */ -struct aux_context { - struct fpsimd_context fpsimd; - /* additional context to be added before "end" */ - struct _aarch64_ctx end; -}; -#endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 4b8023c5d146..a498f2cd2c2a 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -60,21 +60,14 @@ struct secondary_data { void *stack; }; extern struct secondary_data secondary_data; -extern void secondary_holding_pen(void); -extern volatile unsigned long secondary_holding_pen_release; +extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -struct device_node; +extern int __cpu_disable(void); -struct smp_enable_ops { - const char *name; - int (*init_cpu)(struct device_node *, int); - int (*prepare_cpu)(int); -}; - -extern const struct smp_enable_ops smp_spin_table_ops; -extern const struct smp_enable_ops smp_psci_ops; +extern void __cpu_die(unsigned int cpu); +extern void cpu_die(void); #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h index ed43a0d2b1b2..59e282311b58 100644 --- a/arch/arm64/include/asm/smp_plat.h +++ b/arch/arm64/include/asm/smp_plat.h @@ -21,6 +21,19 @@ #include <asm/types.h> +struct mpidr_hash { + u64 mask; + u32 shift_aff[4]; + u32 bits; +}; + +extern struct mpidr_hash mpidr_hash; + +static inline u32 mpidr_hash_size(void) +{ + return 1 << mpidr_hash.bits; +} + /* * Logical CPU mapping. */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 0defa0728a9b..c45b7b1b7197 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -22,17 +22,10 @@ /* * Spinlock implementation. * - * The old value is read exclusively and the new one, if unlocked, is written - * exclusively. In case of failure, the loop is restarted. - * * The memory barriers are implicit with the load-acquire and store-release * instructions. - * - * Unlocked value: 0 - * Locked value: 1 */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -41,32 +34,51 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp; + arch_spinlock_t lockval, newval; asm volatile( - " sevl\n" - "1: wfe\n" - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1b\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - : "=&r" (tmp), "+Q" (lock->lock) - : "r" (1) - : "cc", "memory"); + /* Atomically increment the next ticket. */ +" prfm pstl1strm, %3\n" +"1: ldaxr %w0, %3\n" +" add %w1, %w0, %w5\n" +" stxr %w2, %w1, %3\n" +" cbnz %w2, 1b\n" + /* Did we get the lock? */ +" eor %w1, %w0, %w0, ror #16\n" +" cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ +" sevl\n" +"2: wfe\n" +" ldaxrh %w2, %4\n" +" eor %w1, %w2, %w0, lsr #16\n" +" cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ +"3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->owner), "I" (1 << TICKET_SHIFT) + : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp; + arch_spinlock_t lockval; asm volatile( - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1f\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - "1:\n" - : "=&r" (tmp), "+Q" (lock->lock) - : "r" (1) - : "cc", "memory"); +" prfm pstl1strm, %2\n" +"1: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 2f\n" +" add %w0, %w0, %3\n" +" stxr %w1, %w0, %2\n" +" cbnz %w1, 1b\n" +"2:" + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : "I" (1 << TICKET_SHIFT) + : "memory"); return !tmp; } @@ -74,9 +86,28 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { asm volatile( - " stlr %w1, %0\n" - : "=Q" (lock->lock) : "r" (0) : "memory"); +" stlrh %w1, %0\n" + : "=Q" (lock->owner) + : "r" (lock->owner + 1) + : "memory"); +} + +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner == lock.next; +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + arch_spinlock_t lockval = ACCESS_ONCE(*lock); + return (lockval.next - lockval.owner) > 1; } +#define arch_spin_is_contended arch_spin_is_contended /* * Write lock implementation. @@ -101,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 2b\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); } static inline int arch_write_trylock(arch_rwlock_t *rw) @@ -115,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); return !tmp; } @@ -156,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " cbnz %w1, 2b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline void arch_read_unlock(arch_rwlock_t *rw) @@ -170,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) @@ -185,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); return !tmp2; } diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 9a494346efed..87692750ed94 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -20,14 +20,14 @@ # error "please don't include this file directly" #endif -/* We only require natural alignment for exclusive accesses. */ -#define __lock_aligned +#define TICKET_SHIFT 16 typedef struct { - volatile unsigned int lock; -} arch_spinlock_t; + u16 owner; + u16 next; +} __aligned(4) arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h new file mode 100644 index 000000000000..e9c149c042e0 --- /dev/null +++ b/arch/arm64/include/asm/suspend.h @@ -0,0 +1,27 @@ +#ifndef __ASM_SUSPEND_H +#define __ASM_SUSPEND_H + +#define NR_CTX_REGS 11 + +/* + * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on + * the stack, which must be 16-byte aligned on v8 + */ +struct cpu_suspend_ctx { + /* + * This struct must be kept in sync with + * cpu_do_{suspend/resume} in mm/proc.S + */ + u64 ctx_regs[NR_CTX_REGS]; + u64 sp; +} __aligned(16); + +struct sleep_save_sp { + phys_addr_t *save_ptr_stash; + phys_addr_t save_ptr_stash_phys; +}; + +extern void cpu_resume(void); +extern int cpu_suspend(unsigned long); + +#endif diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 70ba9d4ee978..383771eb0b87 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -18,6 +18,7 @@ #include <linux/err.h> +extern const void *sys_call_table[]; static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 23a3c4791d86..59f151f8241d 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -97,6 +97,9 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags: * TIF_SYSCALL_TRACE - syscall trace active + * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace + * TIF_SYSCALL_AUDIT - syscall auditing + * TIF_SECOMP - syscall secure computing * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -107,6 +110,9 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_SYSCALL_TRACE 8 +#define TIF_SYSCALL_AUDIT 9 +#define TIF_SYSCALL_TRACEPOINT 10 +#define TIF_SECCOMP 11 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 @@ -118,10 +124,17 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) + #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index b24a31a7e2c9..81a076eb37fa 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -16,14 +16,14 @@ #ifndef __ASM_TIMEX_H #define __ASM_TIMEX_H +#include <asm/arch_timer.h> + /* * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() ({ cycles_t c; read_current_timer(&c); c; }) +#define get_cycles() arch_counter_get_cntvct() #include <asm-generic/timex.h> -#define ARCH_HAS_READ_CURRENT_TIMER - #endif diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 5546653e5cc8..717031a762c2 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -190,4 +190,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #define tlb_migrate_finish(mm) do { } while (0) +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..3083a08f9622 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb; */ static inline void flush_tlb_all(void) { - dsb(); + dsb(ishst); asm("tlbi vmalle1is"); - dsb(); + dsb(ish); isb(); } @@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm) { unsigned long asid = (unsigned long)ASID(mm) << 48; - dsb(); + dsb(ishst); asm("tlbi aside1is, %0" : : "r" (asid)); - dsb(); + dsb(ish); } static inline void flush_tlb_page(struct vm_area_struct *vma, @@ -93,9 +93,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(vma->vm_mm) << 48); - dsb(); + dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); - dsb(); + dsb(ish); } /* @@ -114,9 +114,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * set_pte() does not have a DSB, so make sure that the page table * write is visible. */ - dsb(); + dsb(ishst); } +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif #endif diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h new file mode 100644 index 000000000000..e0171b393a14 --- /dev/null +++ b/arch/arm64/include/asm/topology.h @@ -0,0 +1,70 @@ +#ifndef __ASM_TOPOLOGY_H +#define __ASM_TOPOLOGY_H + +#ifdef CONFIG_SMP + +#include <linux/cpumask.h> + +struct cpu_topology { + int thread_id; + int core_id; + int cluster_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cpu_topology cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +#define mc_capable() (cpu_topology[0].cluster_id != -1) +#define smt_capable() (cpu_topology[0].thread_id != -1) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); + +#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE +/* Common values for CPUs */ +#ifndef SD_CPU_INIT +#define SD_CPU_INIT (struct sched_domain) { \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ + \ + .flags = 0*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 0*SD_BALANCE_WAKE \ + | 1*SD_WAKE_AFFINE \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + , \ + .last_balance = jiffies, \ + .balance_interval = 1, \ +} +#endif +#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */ + +#else + +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } + +#endif + +#include <asm-generic/topology.h> + +#endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 008f8481da65..3bf8f4e99a51 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -83,7 +83,7 @@ static inline void set_fs(mm_segment_t fs) * Returns 1 if the range is valid, 0 otherwise. * * This is equivalent to the following test: - * (u65)addr + (u65)size < (u65)current->addr_limit + * (u65)addr + (u65)size <= current->addr_limit * * This needs 65-bit arithmetic. */ @@ -91,7 +91,7 @@ static inline void set_fs(mm_segment_t fs) ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc" \ + asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ : "=&r" (flag), "=&r" (roksum) \ : "1" (addr), "Ir" (size), \ "r" (current_thread_info()->addr_limit) \ @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs) }) #define access_ok(type, addr, size) __range_ok(addr, size) +#define user_addr_max get_fs /* * The "__xxx" versions of the user access functions do not verify the address @@ -166,9 +167,10 @@ do { \ #define get_user(x, ptr) \ ({ \ - might_sleep(); \ - access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ? \ - __get_user((x), (ptr)) : \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ + __get_user((x), __p) : \ ((x) = 0, -EFAULT); \ }) @@ -227,9 +229,10 @@ do { \ #define put_user(x, ptr) \ ({ \ - might_sleep(); \ - access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ - __put_user((x), (ptr)) : \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ + __put_user((x), __p) : \ -EFAULT; \ }) @@ -238,9 +241,6 @@ extern unsigned long __must_check __copy_to_user(void __user *to, const void *fr extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); -extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); -extern unsigned long __must_check __strnlen_user(const char __user *s, long n); - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) @@ -274,24 +274,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } -static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user(dst, src, count); - return res; -} - -#define strlen_user(s) strnlen_user(s, ~0UL >> 1) +extern long strncpy_from_user(char *dest, const char __user *src, long count); -static inline long __must_check strnlen_user(const char __user *s, long n) -{ - unsigned long res = 0; - - if (__addr_ok(s)) - res = __strnlen_user(s, n); - - return res; -} +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 82ce217e94cf..c335479c2638 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -28,3 +28,5 @@ #endif #define __ARCH_WANT_SYS_CLONE #include <uapi/asm/unistd.h> + +#define NR_syscalls (__NR_syscalls) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 26e310c54344..215ad4649dd7 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -18,10 +18,10 @@ #ifndef __ASM__VIRT_H #define __ASM__VIRT_H -#define BOOT_CPU_MODE_EL2 (0x0e12b007) +#define BOOT_CPU_MODE_EL1 (0xe11) +#define BOOT_CPU_MODE_EL2 (0xe12) #ifndef __ASSEMBLY__ -#include <asm/cacheflush.h> /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -37,20 +37,9 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); phys_addr_t __hyp_get_vectors(void); -static inline void sync_boot_mode(void) -{ - /* - * As secondaries write to __boot_cpu_mode with caches disabled, we - * must flush the corresponding cache entries to ensure the visibility - * of their writes. - */ - __flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode)); -} - /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { - sync_boot_mode(); return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -58,7 +47,6 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { - sync_boot_mode(); return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..aab5bf09e9d9 --- /dev/null +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_WORD_AT_A_TIME_H +#define __ASM_WORD_AT_A_TIME_H + +#ifndef __AARCH64EB__ + +#include <linux/kernel.h> + +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, + const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +#define prep_zero_mask(a, bits, c) (bits) + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return fls64(mask) >> 3; +} + +#define zero_bytemask(mask) (mask) + +#else /* __AARCH64EB__ */ +#include <asm-generic/word-at-a-time.h> +#endif + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset; + + /* Load word from unaligned pointer addr */ + asm( + "1: ldr %0, %3\n" + "2:\n" + " .pushsection .fixup,\"ax\"\n" + " .align 2\n" + "3: and %1, %2, #0x7\n" + " bic %2, %2, #0x7\n" + " ldr %0, [%2]\n" + " lsl %1, %1, #0x3\n" +#ifndef __AARCH64EB__ + " lsr %0, %0, %1\n" +#else + " lsl %0, %0, %1\n" +#endif + " b 2b\n" + " .popsection\n" + " .pushsection __ex_table,\"a\"\n" + " .align 3\n" + " .quad 1b, 3b\n" + " .popsection" + : "=&r" (ret), "=&r" (offset) + : "r" (addr), "Q" (*(unsigned long *)addr)); + + return ret; +} + +#endif /* __ASM_WORD_AT_A_TIME_H */ diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index e4b78bdca19e..942376d37d22 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -9,6 +9,7 @@ header-y += byteorder.h header-y += fcntl.h header-y += hwcap.h header-y += kvm_para.h +header-y += perf_regs.h header-y += param.h header-y += ptrace.h header-y += setup.h diff --git a/arch/arm64/include/uapi/asm/byteorder.h b/arch/arm64/include/uapi/asm/byteorder.h index 2b92046aafc5..dc19e9537f0d 100644 --- a/arch/arm64/include/uapi/asm/byteorder.h +++ b/arch/arm64/include/uapi/asm/byteorder.h @@ -16,6 +16,10 @@ #ifndef __ASM_BYTEORDER_H #define __ASM_BYTEORDER_H +#ifdef __AARCH64EB__ +#include <linux/byteorder/big_endian.h> +#else #include <linux/byteorder/little_endian.h> +#endif #endif /* __ASM_BYTEORDER_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index eea497578b87..73cf0f54d57c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -21,6 +21,11 @@ */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) - +#define HWCAP_EVTSTRM (1 << 2) +#define HWCAP_AES (1 << 3) +#define HWCAP_PMULL (1 << 4) +#define HWCAP_SHA1 (1 << 5) +#define HWCAP_SHA2 (1 << 6) +#define HWCAP_CRC32 (1 << 7) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..172b8317ee49 --- /dev/null +++ b/arch/arm64/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +#ifndef _ASM_ARM64_PERF_REGS_H +#define _ASM_ARM64_PERF_REGS_H + +enum perf_event_arm_regs { + PERF_REG_ARM64_X0, + PERF_REG_ARM64_X1, + PERF_REG_ARM64_X2, + PERF_REG_ARM64_X3, + PERF_REG_ARM64_X4, + PERF_REG_ARM64_X5, + PERF_REG_ARM64_X6, + PERF_REG_ARM64_X7, + PERF_REG_ARM64_X8, + PERF_REG_ARM64_X9, + PERF_REG_ARM64_X10, + PERF_REG_ARM64_X11, + PERF_REG_ARM64_X12, + PERF_REG_ARM64_X13, + PERF_REG_ARM64_X14, + PERF_REG_ARM64_X15, + PERF_REG_ARM64_X16, + PERF_REG_ARM64_X17, + PERF_REG_ARM64_X18, + PERF_REG_ARM64_X19, + PERF_REG_ARM64_X20, + PERF_REG_ARM64_X21, + PERF_REG_ARM64_X22, + PERF_REG_ARM64_X23, + PERF_REG_ARM64_X24, + PERF_REG_ARM64_X25, + PERF_REG_ARM64_X26, + PERF_REG_ARM64_X27, + PERF_REG_ARM64_X28, + PERF_REG_ARM64_X29, + PERF_REG_ARM64_LR, + PERF_REG_ARM64_SP, + PERF_REG_ARM64_PC, + PERF_REG_ARM64_MAX, +}; +#endif /* _ASM_ARM64_PERF_REGS_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..6a81721f3382 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -5,19 +5,30 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg + # Object file lists. arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o +arm64-obj-$(CONFIG_SMP) += topology.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o -arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o +arm64-obj-$(CONFIG_KGDB) += kgdb.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 41b4f626d554..7f0512feaa13 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -29,16 +29,14 @@ #include <asm/checksum.h> - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); + /* user mem (segment) */ EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); @@ -58,3 +56,7 @@ EXPORT_SYMBOL(clear_bit); EXPORT_SYMBOL(test_and_clear_bit); EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d810bea3..c481a119b98a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -24,6 +24,8 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> @@ -104,5 +106,47 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); + DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); + DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); + DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif return 0; } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 000000000000..04efea8fe4bc --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,99 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, + &cpu_psci_ops, +#endif + NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = NULL; + u64 mpidr = cpu_logical_map(0); + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + u64 hwid; + const __be32 *prop; + + prop = of_get_property(dn, "reg", NULL); + if (!prop) + continue; + + hwid = of_read_number(prop, of_n_addr_cells(dn)); + if (hwid == mpidr) { + cpu_read_ops(dn, 0); + of_node_put(dn); + return; + } + } +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4e..fd3993cb060f 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@ extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = { { .cpu_id_val = 0x000f0000, .cpu_id_mask = 0x000f0000, diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..7f66fe150265 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/ptrace.h> #include <linux/stat.h> +#include <linux/uaccess.h> #include <asm/debug-monitors.h> #include <asm/local.h> @@ -137,7 +138,6 @@ void disable_debug_monitors(enum debug_el el) static void clear_os_lock(void *unused) { asm volatile("msr oslar_el1, %0" : : "r" (0)); - isb(); } static int __cpuinit os_lock_notify(struct notifier_block *self, @@ -156,8 +156,9 @@ static struct notifier_block __cpuinitdata os_lock_nb = { static int __cpuinit debug_monitors_init(void) { /* Clear the OS lock. */ - smp_call_function(clear_os_lock, NULL, 1); - clear_os_lock(NULL); + on_each_cpu(clear_os_lock, NULL, 1); + isb(); + local_dbg_enable(); /* Register hotplug handler. */ register_cpu_notifier(&os_lock_nb); @@ -187,6 +188,48 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) regs->pstate = spsr; } +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +static DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_add(&hook->node, &step_hook); + write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_del(&hook->node); + write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ + struct step_hook *hook; + int retval = DBG_HOOK_ERROR; + + read_lock(&step_hook_lock); + + list_for_each_entry(hook, &step_hook, node) { + retval = hook->fn(regs, esr); + if (retval == DBG_HOOK_HANDLED) + break; + } + + read_unlock(&step_hook_lock); + + return retval; +} + static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -214,7 +257,9 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { - /* TODO: route to KGDB */ + if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + pr_warning("Unexpected kernel single-step exception at EL1\n"); /* * Re-enable stepping since we know that we will be @@ -226,13 +271,113 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +static DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_add(&hook->node, &break_hook); + write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_del(&hook->node); + write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ + struct break_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + + read_lock(&break_hook_lock); + list_for_each_entry(hook, &break_hook, node) + if ((esr & hook->esr_mask) == hook->esr_val) + fn = hook->fn; + read_unlock(&break_hook_lock); + + return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} + +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c index fbb6e1843659..ffbbdde7aba1 100644 --- a/arch/arm64/kernel/early_printk.c +++ b/arch/arm64/kernel/early_printk.c @@ -26,6 +26,8 @@ #include <linux/amba/serial.h> #include <linux/serial_reg.h> +#include <asm/fixmap.h> + static void __iomem *early_base; static void (*printch)(char ch); @@ -141,8 +143,10 @@ static int __init setup_early_printk(char *buf) } /* no options parsing yet */ - if (paddr) - early_base = early_io_map(paddr, EARLYCON_IOBASE); + if (paddr) { + set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr); + early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE); + } printch = match->printch; early_console = &early_console_dev; diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 000000000000..b051871f2965 --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + * mov x0, x30 + * bl _mcount + * [function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp => 0:+-----+ + * in _mcount() | x29 | -> instrumented function's fp + * +-----+ + * | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp => +16:+-----+ + * when instrumented | | + * function calls | ... | + * _mcount() | | + * | | + * instrumented => +xx:+-----+ + * function's fp | x29 | -> parent's fp + * +-----+ + * | x30 | -> instrumented function's lr (= parent's pc) + * +-----+ + * | ... | + */ + + .macro mcount_enter + stp x29, x30, [sp, #-16]! + mov x29, sp + .endm + + .macro mcount_exit + ldp x29, x30, [sp], #16 + ret + .endm + + .macro mcount_adjust_addr rd, rn + sub \rd, \rn, #AARCH64_INSN_SIZE + .endm + + /* for instrumented function's parent */ + .macro mcount_get_parent_fp reg + ldr \reg, [x29] + ldr \reg, [\reg] + .endm + + /* for instrumented function */ + .macro mcount_get_pc0 reg + mcount_adjust_addr \reg, x30 + .endm + + .macro mcount_get_pc reg + ldr \reg, [x29, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr reg + ldr \reg, [x29] + ldr \reg, [\reg, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr_addr reg + ldr \reg, [x29] + add \reg, \reg, #8 + .endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ldr x0, =ftrace_trace_stop + ldr x0, [x0] // if ftrace_trace_stop + ret // return; +#endif + mcount_enter + + ldr x0, =ftrace_trace_function + ldr x2, [x0] + adr x0, ftrace_stub + cmp x0, x2 // if (ftrace_trace_function + b.eq skip_ftrace_call // != ftrace_stub) { + + mcount_get_pc x0 // function's pc + mcount_get_lr x1 // function's lr (= parent's pc) + blr x2 // (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call: // return; + mcount_exit // } +#else + mcount_exit // return; + // } +skip_ftrace_call: + ldr x1, =ftrace_graph_return + ldr x2, [x1] // if ((ftrace_graph_return + cmp x0, x2 // != ftrace_stub) + b.ne ftrace_graph_caller + + ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry + ldr x2, [x1] // != ftrace_graph_entry_stub)) + ldr x0, =ftrace_graph_entry_stub + cmp x0, x2 + b.ne ftrace_graph_caller // ftrace_graph_caller(); + + mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) + ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) + mcount_enter + + mcount_get_pc0 x0 // function's pc + mcount_get_lr x1 // function's lr + + .global ftrace_call +ftrace_call: // tracer(pc, lr); + nop // This will be replaced with "bl xxx" + // where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) + mcount_get_lr_addr x0 // pointer to function's saved lr + mcount_get_pc x1 // function's pc + mcount_get_parent_fp x2 // parent's fp + bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp) + + mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) + str x0, [sp, #-16]! + mov x0, x29 // parent's fp + bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); + mov x30, x0 // restore the original return address + ldr x0, [sp], #16 + ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ad781b21c08..fa789169f98b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -288,6 +288,8 @@ el1_dbg: /* * Debug exception handling */ + cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 mov x2, sp // struct pt_regs @@ -311,14 +313,14 @@ el1_irq: #endif #ifdef CONFIG_PREEMPT get_thread_info tsk - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x0, x24, #1 // increment it - str x0, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w0, w24, #1 // increment it + str w0, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - str x24, [tsk, #TI_PREEMPT] // restore preempt count - cbnz x24, 1f // preempt count != 0 + str w24, [tsk, #TI_PREEMPT] // restore preempt count + cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt @@ -423,6 +425,7 @@ el0_da: * Data abort handling */ mrs x0, far_el1 + bic x0, x0, #(0xff << 56) disable_step x1 isb enable_dbg @@ -476,6 +479,8 @@ el0_undef: * Undefined instruction */ mov x0, sp + // enable interrupts before calling the main handler + enable_irq b do_undefinstr el0_dbg: /* @@ -506,15 +511,15 @@ el0_irq_naked: #endif get_thread_info tsk #ifdef CONFIG_PREEMPT - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x23, x24, #1 // increment it - str x23, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w23, w24, #1 // increment it + str w23, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - ldr x0, [tsk, #TI_PREEMPT] - str x24, [tsk, #TI_PREEMPT] - cmp x0, x23 + ldr w0, [tsk, #TI_PREEMPT] + str w24, [tsk, #TI_PREEMPT] + cmp w0, w23 b.eq 1f mov x1, #0 str x1, [x1] // BUG @@ -641,8 +646,9 @@ el0_svc_naked: // compat entry point enable_irq get_thread_info tsk - ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing - tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys @@ -658,9 +664,8 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x1, sp - mov w0, #0 // trace entry - bl syscall_trace + mov x0, sp + bl syscall_trace_enter adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs @@ -675,9 +680,8 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 - mov x1, sp - mov w0, #1 // trace exit - bl syscall_trace + mov x0, sp + bl syscall_trace_exit b ret_to_user /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2fa308e4a1fa..522df9c7f3a4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> @@ -85,6 +86,66 @@ void fpsimd_flush_thread(void) preempt_enable(); } +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin(void) +{ + /* Avoid using the NEON in interrupt context */ + BUG_ON(in_interrupt()); + preempt_disable(); + + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); +} +EXPORT_SYMBOL(kernel_neon_begin); + +void kernel_neon_end(void) +{ + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + + preempt_enable(); +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_EXIT: + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + /* * FP/SIMD support code initialisation. */ @@ -103,6 +164,8 @@ static int __init fpsimd_init(void) else elf_hwcap |= HWCAP_ASIMD; + fpsimd_pm_init(); + return 0; } late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 000000000000..649890a3ac4e --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,177 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + /* + * Note: + * Due to modules and __init, code can disappear and change, + * we need to protect against faulting as well as code changing. + * We do this by aarch64_insn_*() which use the probe_kernel_*(). + * + * No lock is held here because all the modifications are run + * through stop_machine(). + */ + if (validate) { + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + if (aarch64_insn_patch_text_nosync((void *)pc, new)) + return -EPERM; + + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_branch_imm(pc, addr, true); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, addr, true); + new = aarch64_insn_gen_nop(); + + return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *)data = 0; + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 branch, nop; + + branch = aarch64_insn_gen_branch_imm(pc, + (unsigned long)ftrace_graph_caller, false); + nop = aarch64_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..2a794b6e3e76 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -112,11 +112,20 @@ .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .byte 0x41 // Magic number, "ARM\x64" + .byte 0x52 + .byte 0x4d + .byte 0x64 + .word 0 // reserved ENTRY(stext) mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET - bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag mrs x22, midr_el1 // x22=cpuid mov x0, x22 bl lookup_processor_type @@ -142,21 +151,30 @@ ENDPROC(stext) /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode - add x0, x0, x28 - b.eq 1f - str wzr, [x0] // Remember we don't have EL2... + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb ret /* Hyp configuration. */ -1: ldr w1, =BOOT_CPU_MODE_EL2 - str w1, [x0, #4] // This CPU has EL2 - mov x0, #(1 << 31) // 64-bit EL1 +2: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -173,7 +191,8 @@ ENTRY(el2_setup) /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits - movk x0, #0x30d0, lsl #16 +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -196,10 +215,25 @@ ENTRY(el2_setup) PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) /* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + ret +ENDPROC(set_cpu_boot_mode_flag) + +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * @@ -217,7 +251,6 @@ ENTRY(__boot_cpu_mode) .quad PAGE_OFFSET #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -227,8 +260,9 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl __calc_phys_offset // x24=phys offset - bl el2_setup // Drop to EL1 + bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 @@ -242,7 +276,16 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* @@ -340,26 +383,18 @@ ENDPROC(__calc_phys_offset) * Preserves: tbl, flags * Corrupts: phys, start, end, pstate */ - .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + .macro create_block_map, tbl, flags, phys, start, end lsr \phys, \phys, #BLOCK_SHIFT - .if \idmap - and \start, \phys, #PTRS_PER_PTE - 1 // table index - .else lsr \start, \start, #BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - .endif orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - .ifnc \start,\end lsr \end, \end, #BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index - .endif 9999: str \phys, [\tbl, \start, lsl #3] // store the entry - .ifnc \start,\end add \start, \start, #1 // next entry add \phys, \phys, #BLOCK_SIZE // next block cmp \start, \end b.ls 9999b - .endif .endm /* @@ -368,7 +403,7 @@ ENDPROC(__calc_phys_offset) * - identity mapping to enable the MMU (low address, TTBR0) * - first few MB of the kernel linear mapping to jump to once the MMU has * been enabled, including the FDT blob (TTBR1) - * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) + * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses @@ -391,9 +426,13 @@ __create_page_tables: * Create the identity mapping. */ add x0, x25, #PAGE_SIZE // section table address - adr x3, __turn_mmu_on // virtual/physical address + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) create_pgd_entry x25, x0, x3, x5, x6 - create_block_map x0, x7, x3, x5, x5, idmap=1 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -401,7 +440,7 @@ __create_page_tables: add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET create_pgd_entry x26, x0, x5, x3, x6 - ldr x6, =KERNEL_END - 1 + ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -421,15 +460,12 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: -#ifdef CONFIG_EARLY_PRINTK /* - * Create the pgd entry for the UART mapping. The full mapping is done - * later based earlyprintk kernel parameter. + * Create the pgd entry for the fixed mappings. */ - ldr x5, =EARLYCON_IOBASE // UART virtual address + ldr x5, =FIXADDR_TOP // Fixed mapping virtual address add x0, x26, #2 * PAGE_SIZE // section table address create_pgd_entry x26, x0, x5, x6, x7 -#endif ret ENDPROC(__create_page_tables) .ltorg @@ -438,8 +474,6 @@ ENDPROC(__create_page_tables) .type __switch_data, %object __switch_data: .quad __mmap_switched - .quad __data_loc // x4 - .quad _data // x5 .quad __bss_start // x6 .quad _end // x7 .quad processor_id // x4 @@ -454,15 +488,7 @@ __switch_data: __mmap_switched: adr x3, __switch_data + 8 - ldp x4, x5, [x3], #16 ldp x6, x7, [x3], #16 - cmp x4, x5 // Copy data segment if needed -1: ccmp x5, x6, #4, ne - b.eq 2f - ldr x16, [x4], #8 - str x16, [x5], #8 - b 1b -2: 1: cmp x6, x7 b.hs 2f str xzr, [x6], #8 // Clear BSS diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 5ab825c59db9..6de3460ede4c 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,13 +20,14 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include <linux/compat.h> +#include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> @@ -169,15 +170,68 @@ static enum debug_el debug_exception_level(int privilege) } } -/* - * Install a perf counter breakpoint. +enum hw_breakpoint_ops { + HW_BREAKPOINT_INSTALL, + HW_BREAKPOINT_UNINSTALL, + HW_BREAKPOINT_RESTORE +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + * operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + * slot index on success + * -ENOSPC if no slot is available/matches + * -EINVAL on wrong operations parameter */ -int arch_install_hw_breakpoint(struct perf_event *bp) +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, + struct perf_event *bp, + enum hw_breakpoint_ops ops) +{ + int i; + struct perf_event **slot; + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + switch (ops) { + case HW_BREAKPOINT_INSTALL: + if (!*slot) { + *slot = bp; + return i; + } + break; + case HW_BREAKPOINT_UNINSTALL: + if (*slot == bp) { + *slot = NULL; + return i; + } + break; + case HW_BREAKPOINT_RESTORE: + if (*slot == bp) + return i; + break; + default: + pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); + return -EINVAL; + } + } + return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, + enum hw_breakpoint_ops ops) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; + struct perf_event **slots; struct debug_info *debug_info = ¤t->thread.debug; int i, max_slots, ctrl_reg, val_reg, reg_enable; + enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); u32 ctrl; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { @@ -196,67 +250,54 @@ int arch_install_hw_breakpoint(struct perf_event *bp) reg_enable = !debug_info->wps_disabled; } - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (!*slot) { - *slot = bp; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return -ENOSPC; + i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); - /* Ensure debug monitors are enabled at the correct exception level. */ - enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) + return i; - /* Setup the address register. */ - write_wb_reg(val_reg, i, info->address); + switch (ops) { + case HW_BREAKPOINT_INSTALL: + /* + * Ensure debug monitors are enabled at the correct exception + * level. + */ + enable_debug_monitors(dbg_el); + /* Fall through */ + case HW_BREAKPOINT_RESTORE: + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); + + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, + reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + break; + case HW_BREAKPOINT_UNINSTALL: + /* Reset the control register. */ + write_wb_reg(ctrl_reg, i, 0); - /* Setup the control register. */ - ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + /* + * Release the debug monitors for the correct exception + * level. + */ + disable_debug_monitors(dbg_el); + break; + } return 0; } -void arch_uninstall_hw_breakpoint(struct perf_event *bp) +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; - int i, max_slots, base; - - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - /* Breakpoint */ - base = AARCH64_DBG_REG_BCR; - slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps; - } else { - /* Watchpoint */ - base = AARCH64_DBG_REG_WCR; - slots = __get_cpu_var(wp_on_reg); - max_slots = core_num_wrps; - } - - /* Remove the breakpoint. */ - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return; - - /* Reset the control register. */ - write_wb_reg(base, i, 0); + return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} - /* Release the debug monitors for the correct exception level. */ - disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL); } static int get_hbp_len(u8 hbp_len) @@ -806,18 +847,36 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void reset_ctrl_regs(void *unused) +static void hw_breakpoint_reset(void *unused) { int i; - - for (i = 0; i < core_num_brps; ++i) { - write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + struct perf_event **slots; + /* + * When a CPU goes through cold-boot, it does not have any installed + * slot, so it is safe to share the same function for restoring and + * resetting breakpoints; when a CPU is hotplugged in, it goes + * through the slots, which are all empty, hence it just resets control + * and value for debug registers. + * When this function is triggered on warm-boot through a CPU PM + * notifier some slots might be initialized; if so they are + * reprogrammed according to the debug slots content. + */ + for (slots = __get_cpu_var(bp_on_reg), i = 0; i < core_num_brps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } } - for (i = 0; i < core_num_wrps; ++i) { - write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + for (slots = __get_cpu_var(wp_on_reg), i = 0; i < core_num_wrps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } } } @@ -827,7 +886,7 @@ static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self, { int cpu = (long)hcpu; if (action == CPU_ONLINE) - smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } @@ -835,6 +894,14 @@ static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; +#ifdef CONFIG_ARM64_CPU_SUSPEND +extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +#else +static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +} +#endif + /* * One-time initialisation. */ @@ -850,8 +917,8 @@ static int __init arch_hw_breakpoint_init(void) * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ - smp_call_function(reset_ctrl_regs, NULL, 1); - reset_ctrl_regs(NULL); + smp_call_function(hw_breakpoint_reset, NULL, 1); + hw_breakpoint_reset(NULL); /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, @@ -861,6 +928,8 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&hw_breakpoint_reset_nb); + /* Register cpu_suspend hw breakpoint restore hook */ + cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); return 0; } diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 000000000000..92f36835486b --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ + return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ + if (!aarch64_insn_is_hint(insn)) + return false; + + switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_YIELD: + case AARCH64_INSN_HINT_WFE: + case AARCH64_INSN_HINT_WFI: + case AARCH64_INSN_HINT_SEV: + case AARCH64_INSN_HINT_SEVL: + return false; + default: + return true; + } +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + if (!ret) + *insnp = le32_to_cpu(val); + + return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ + insn = cpu_to_le32(insn); + return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) + return false; + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_svc(insn) || + aarch64_insn_is_hvc(insn) || + aarch64_insn_is_smc(insn) || + aarch64_insn_is_brk(insn) || + aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ + return __aarch64_insn_hotpatch_safe(old_insn) && + __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ + u32 *tp = addr; + int ret; + + /* A64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + + ret = aarch64_insn_write(tp, insn); + if (ret == 0) + flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); + + return ret; +} + +struct aarch64_insn_patch { + void **text_addrs; + u32 *new_insns; + int insn_cnt; + atomic_t cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ + int i, ret = 0; + struct aarch64_insn_patch *pp = arg; + + /* The first CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == 1) { + for (i = 0; ret == 0 && i < pp->insn_cnt; i++) + ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], + pp->new_insns[i]); + /* + * aarch64_insn_patch_text_nosync() calls flush_icache_range(), + * which ends with "dsb; isb" pair guaranteeing global + * visibility. + */ + atomic_set(&pp->cpu_count, -1); + } else { + while (atomic_read(&pp->cpu_count) != -1) + cpu_relax(); + isb(); + } + + return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ + struct aarch64_insn_patch patch = { + .text_addrs = addrs, + .new_insns = insns, + .insn_cnt = cnt, + .cpu_count = ATOMIC_INIT(0), + }; + + if (cnt <= 0) + return -EINVAL; + + return stop_machine(aarch64_insn_patch_text_cb, &patch, + cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ + int ret; + u32 insn; + + /* Unsafe to patch multiple instructions without synchronizaiton */ + if (cnt == 1) { + ret = aarch64_insn_read(addrs[0], &insn); + if (ret) + return ret; + + if (aarch64_insn_hotpatch_safe(insn, insns[0])) { + /* + * ARMv8 architecture doesn't guarantee all CPUs see + * the new instruction after returning from function + * aarch64_insn_patch_text_nosync(). So send IPIs to + * all other CPUs to achieve instruction + * synchronization. + */ + ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); + kick_all_cpus_sync(); + return ret; + } + } + + return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm) +{ + u32 immlo, immhi, lomask, himask, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + lomask = 0x3; + himask = 0x7ffff; + immlo = imm & lomask; + imm >>= 2; + immhi = imm & himask; + imm = (immlo << 24) | (immhi); + mask = (lomask << 24) | (himask); + shift = 5; + break; + case AARCH64_INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case AARCH64_INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + default: + pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + + /* Update the immediate field. */ + insn &= ~(mask << shift); + insn |= (imm & mask) << shift; + + return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, + enum aarch64_insn_branch_type type) +{ + u32 insn; + long offset; + + /* + * PC: A 64-bit Program Counter holding the address of the current + * instruction. A64 instructions must be word-aligned. + */ + BUG_ON((pc & 0x3) || (addr & 0x3)); + + /* + * B/BL support [-128M, 128M) offset + * ARM64 virtual address arrangement guarantees all kernel and module + * texts are within +/-128M. + */ + offset = ((long)addr - (long)pc); + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + + if (type == AARCH64_INSN_BRANCH_LINK) + insn = aarch64_insn_get_bl_value(); + else + insn = aarch64_insn_get_b_value(); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, + offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ + return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ + return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ecb3354292ed..473e5dbf8f39 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -81,3 +81,64 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 000000000000..263a166291fb --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + bool is_static) +{ + void *addr = (void *)entry->code; + u32 insn; + + if (type == JUMP_LABEL_ENABLE) { + insn = aarch64_insn_gen_branch_imm(entry->code, + entry->target, + AARCH64_INSN_BRANCH_NOLINK); + } else { + insn = aarch64_insn_gen_nop(); + } + + if (is_static) + aarch64_insn_patch_text_nosync(addr, insn); + else + aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, true); +} + +#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 000000000000..75c9cf1aafee --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,336 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irq.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <asm/traps.h> + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "x0", 8, offsetof(struct pt_regs, regs[0])}, + { "x1", 8, offsetof(struct pt_regs, regs[1])}, + { "x2", 8, offsetof(struct pt_regs, regs[2])}, + { "x3", 8, offsetof(struct pt_regs, regs[3])}, + { "x4", 8, offsetof(struct pt_regs, regs[4])}, + { "x5", 8, offsetof(struct pt_regs, regs[5])}, + { "x6", 8, offsetof(struct pt_regs, regs[6])}, + { "x7", 8, offsetof(struct pt_regs, regs[7])}, + { "x8", 8, offsetof(struct pt_regs, regs[8])}, + { "x9", 8, offsetof(struct pt_regs, regs[9])}, + { "x10", 8, offsetof(struct pt_regs, regs[10])}, + { "x11", 8, offsetof(struct pt_regs, regs[11])}, + { "x12", 8, offsetof(struct pt_regs, regs[12])}, + { "x13", 8, offsetof(struct pt_regs, regs[13])}, + { "x14", 8, offsetof(struct pt_regs, regs[14])}, + { "x15", 8, offsetof(struct pt_regs, regs[15])}, + { "x16", 8, offsetof(struct pt_regs, regs[16])}, + { "x17", 8, offsetof(struct pt_regs, regs[17])}, + { "x18", 8, offsetof(struct pt_regs, regs[18])}, + { "x19", 8, offsetof(struct pt_regs, regs[19])}, + { "x20", 8, offsetof(struct pt_regs, regs[20])}, + { "x21", 8, offsetof(struct pt_regs, regs[21])}, + { "x22", 8, offsetof(struct pt_regs, regs[22])}, + { "x23", 8, offsetof(struct pt_regs, regs[23])}, + { "x24", 8, offsetof(struct pt_regs, regs[24])}, + { "x25", 8, offsetof(struct pt_regs, regs[25])}, + { "x26", 8, offsetof(struct pt_regs, regs[26])}, + { "x27", 8, offsetof(struct pt_regs, regs[27])}, + { "x28", 8, offsetof(struct pt_regs, regs[28])}, + { "x29", 8, offsetof(struct pt_regs, regs[29])}, + { "x30", 8, offsetof(struct pt_regs, regs[30])}, + { "sp", 8, offsetof(struct pt_regs, sp)}, + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "pstate", 8, offsetof(struct pt_regs, pstate)}, + { "v0", 16, -1 }, + { "v1", 16, -1 }, + { "v2", 16, -1 }, + { "v3", 16, -1 }, + { "v4", 16, -1 }, + { "v5", 16, -1 }, + { "v6", 16, -1 }, + { "v7", 16, -1 }, + { "v8", 16, -1 }, + { "v9", 16, -1 }, + { "v10", 16, -1 }, + { "v11", 16, -1 }, + { "v12", 16, -1 }, + { "v13", 16, -1 }, + { "v14", 16, -1 }, + { "v15", 16, -1 }, + { "v16", 16, -1 }, + { "v17", 16, -1 }, + { "v18", 16, -1 }, + { "v19", 16, -1 }, + { "v20", 16, -1 }, + { "v21", 16, -1 }, + { "v22", 16, -1 }, + { "v23", 16, -1 }, + { "v24", 16, -1 }, + { "v25", 16, -1 }, + { "v26", 16, -1 }, + { "v27", 16, -1 }, + { "v28", 16, -1 }, + { "v29", 16, -1 }, + { "v30", 16, -1 }, + { "v31", 16, -1 }, + { "fpsr", 4, -1 }, + { "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + struct pt_regs *thread_regs; + + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + thread_regs = task_pt_regs(task); + memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static int compiled_break; + +static void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(regs, regs->pc + 4); + + compiled_break = 0; +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + int err; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + /* + * Packet D (Detach), k (kill). No special handling + * is required here. Handle same as c packet. + */ + case 'c': + /* + * Packet c (Continue) to continue executing. + * Set pc to required address. + * Try to read optional parameter and set pc. + * If this was a compiled breakpoint, we need to move + * to the next instruction else we will just breakpoint + * over and over again. + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, -1); + kgdb_single_step = 0; + + /* + * Received continue command, disable single step + */ + if (kernel_active_single_step()) + kernel_disable_single_step(); + + err = 0; + break; + case 's': + /* + * Update step address value with address passed + * with step packet. + * On debug exception return PC is copied to ELR + * So just update PC. + * If no step address is passed, resume from the address + * pointed by PC. Do not update PC + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + kgdb_single_step = 1; + + /* + * Enable single step handling + */ + if (!kernel_active_single_step()) + kernel_enable_single_step(linux_regs); + err = 0; + break; + default: + err = -1; + } + return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static struct break_hook kgdb_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), + .fn = kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), + .fn = kgdb_compiled_brk_fn +}; + +static struct step_hook kgdb_step_hook = { + .fn = kgdb_step_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + /* + * Want to be lowest priority + */ + .priority = -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + + register_break_hook(&kgdb_brkpt_hook); + register_break_hook(&kgdb_compiled_brkpt_hook); + register_step_hook(&kgdb_step_hook); + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_break_hook(&kgdb_brkpt_hook); + unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_step_hook(&kgdb_step_hook); + unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { + KGDB_DYN_BRK_INS_BYTE0, + KGDB_DYN_BRK_INS_BYTE1, + KGDB_DYN_BRK_INS_BYTE2, + KGDB_DYN_BRK_INS_BYTE3, + } +}; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8bc..63c48ffdf230 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@ * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ + +#include <asm/unistd32.h> + .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -35,33 +38,30 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xf57ff05f // dmb sy - .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] + .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 - .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b - .inst 0xf57ff05f // dmb sy .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr .align 5 __kuser_memory_barrier: // 0xffff0fa0 - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe12fff1e // bx lr .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xf57ff05f // dmb sy - .inst 0xe1923f9f // 1: ldrex r3, [r2] + .inst 0xe1923e9f // 1: ldaex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 - .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xeaffffef // b <__kuser_memory_barrier> + .inst 0xe12fff1e // bx lr .align 5 __kuser_get_tls: // 0xffff0fe0 @@ -75,3 +75,42 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + + /* + * ARM code + */ + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index ca0e3d55da99..df08a6e0287d 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,6 +25,10 @@ #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> +#include <asm/insn.h> + +#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX +#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 void *module_alloc(unsigned long size) { @@ -94,25 +98,18 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) return 0; } -enum aarch64_imm_type { - INSN_IMM_MOVNZ, - INSN_IMM_MOVK, - INSN_IMM_ADR, - INSN_IMM_26, - INSN_IMM_19, - INSN_IMM_16, - INSN_IMM_14, - INSN_IMM_12, - INSN_IMM_9, -}; - -static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, enum aarch64_insn_imm_type imm_type) { - u32 immlo, immhi, lomask, himask, mask; - int shift; + u64 imm, limit = 0; + s64 sval; + u32 insn = le32_to_cpu(*(u32 *)place); + + sval = do_reloc(op, place, val); + sval >>= lsb; + imm = sval & 0xffff; - switch (type) { - case INSN_IMM_MOVNZ: + if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* * For signed MOVW relocations, we have to manipulate the * instruction encoding depending on whether or not the @@ -131,70 +128,12 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) */ imm = ~imm; } - case INSN_IMM_MOVK: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_ADR: - lomask = 0x3; - himask = 0x7ffff; - immlo = imm & lomask; - imm >>= 2; - immhi = imm & himask; - imm = (immlo << 24) | (immhi); - mask = (lomask << 24) | (himask); - shift = 5; - break; - case INSN_IMM_26: - mask = BIT(26) - 1; - shift = 0; - break; - case INSN_IMM_19: - mask = BIT(19) - 1; - shift = 5; - break; - case INSN_IMM_16: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_14: - mask = BIT(14) - 1; - shift = 5; - break; - case INSN_IMM_12: - mask = BIT(12) - 1; - shift = 10; - break; - case INSN_IMM_9: - mask = BIT(9) - 1; - shift = 12; - break; - default: - pr_err("encode_insn_immediate: unknown immediate encoding %d\n", - type); - return 0; + imm_type = AARCH64_INSN_IMM_MOVK; } - /* Update the immediate field. */ - insn &= ~(mask << shift); - insn |= (imm & mask) << shift; - - return insn; -} - -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_imm_type imm_type) -{ - u64 imm, limit = 0; - s64 sval; - u32 insn = *(u32 *)place; - - sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; - /* Update the instruction with the new encoding. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* Shift out the immediate field. */ sval >>= 16; @@ -203,9 +142,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * For unsigned immediates, the overflow check is straightforward. * For signed immediates, the sign bit is actually the bit past the * most significant bit of the field. - * The INSN_IMM_16 immediate type is unsigned. + * The AARCH64_INSN_IMM_16 immediate type is unsigned. */ - if (imm_type != INSN_IMM_16) { + if (imm_type != AARCH64_INSN_IMM_16) { sval++; limit++; } @@ -218,11 +157,11 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, } static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, int len, enum aarch64_imm_type imm_type) + int lsb, int len, enum aarch64_insn_imm_type imm_type) { u64 imm, imm_mask; s64 sval; - u32 insn = *(u32 *)place; + u32 insn = le32_to_cpu(*(u32 *)place); /* Calculate the relocation value. */ sval = do_reloc(op, place, val); @@ -233,7 +172,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, imm = sval & imm_mask; /* Update the instruction's immediate field. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* * Extract the upper value bits (including the sign bit) and @@ -315,125 +255,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; /* Immediate instruction relocations. */ case R_AARCH64_LD_PREL_LO19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_ADR_PREL_LO21: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST16_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST32_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST64_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST128_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_TSTBR14: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, - INSN_IMM_14); + AARCH64_INSN_IMM_14); break; case R_AARCH64_CONDBR19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, - INSN_IMM_26); + AARCH64_INSN_IMM_26); break; default: diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cea1594ff933..dfcd8fadde3c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -22,6 +22,7 @@ #include <linux/bitmap.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/perf_event.h> @@ -363,26 +364,53 @@ validate_group(struct perf_event *event) } static void +armpmu_disable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + disable_percpu_irq(irq); +} + +static void armpmu_release_hardware(struct arm_pmu *armpmu) { - int i, irq, irqs; + int irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (!irqs) + return; - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, armpmu); + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) + return; + + if (irq_is_percpu(irq)) { + on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &cpu_hw_events); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq > 0) + free_irq(irq, armpmu); + } } } +static void +armpmu_enable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int i, err, irq, irqs; + int err, irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; if (!pmu_device) { @@ -391,39 +419,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) } irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { + if (!irqs) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) { + pr_err("failed to get valid irq for PMU device\n"); + return -ENODEV; + } - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, armpmu->handle_irq, + "arm-pmu", &cpu_hw_events); - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING, - "arm-pmu", armpmu); if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); + pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", + irq); armpmu_release_hardware(armpmu); return err; } - cpumask_set_cpu(i, &armpmu->active_irqs); + on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq <= 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } } return 0; @@ -1299,8 +1347,8 @@ early_initcall(init_hw_perf_events); * Callchain handling code. */ struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long lr; } __attribute__((packed)); /* @@ -1337,22 +1385,84 @@ user_backtrace(struct frame_tail __user *tail, return buftail.fp; } +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } perf_callchain_store(entry, regs->pc); - tail = (struct frame_tail __user *)regs->regs[29]; - while (entry->nr < PERF_MAX_STACK_DEPTH && - tail && !((unsigned long)tail & 0xf)) - tail = user_backtrace(tail, entry); + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = compat_user_backtrace(tail, entry); +#endif + } } /* @@ -1380,6 +1490,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 000000000000..422ebd63b619 --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,46 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> + +#include <asm/compat.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) + return 0; + + /* + * Compat (i.e. 32 bit) mode: + * - PC has been set in the pt_regs struct in kernel_entry, + * - Handle SP and LR here. + */ + if (compat_user_mode(regs)) { + if ((u32)idx == PERF_REG_ARM64_SP) + return regs->compat_sp; + if ((u32)idx == PERF_REG_ARM64_LR) + return regs->compat_lr; + } + + return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (is_compat_thread(task_thread_info(task))) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b5015..3193bf35dbc8 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@ #include <stdarg.h> +#include <linux/compat.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -33,6 +34,7 @@ #include <linux/kallsyms.h> #include <linux/init.h> #include <linux/cpu.h> +#include <linux/cpuidle.h> #include <linux/elfcore.h> #include <linux/pm.h> #include <linux/tick.h> @@ -71,8 +73,17 @@ static void setup_restart(void) void soft_restart(unsigned long addr) { + typedef void (*phys_reset_t)(unsigned long); + phys_reset_t phys_reset; + setup_restart(); - cpu_reset(addr); + + /* Switch to the identity mapping */ + phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); + phys_reset(addr); + + /* Should never get here */ + BUG(); } /* @@ -98,10 +109,19 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - cpu_do_idle(); - local_irq_enable(); + if (cpuidle_idle_call()) { + cpu_do_idle(); + local_irq_enable(); + } } +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif + void machine_shutdown(void) { #ifdef CONFIG_SMP @@ -143,15 +163,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); @@ -279,7 +310,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Complete any pending TLB or cache maintenance on this CPU in case * the thread migrates to a different CPU. */ - dsb(); + dsb(ish); /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -290,6 +321,7 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; + unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -297,9 +329,11 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); + stack_page = (unsigned long)task_stack_page(p); do { - int ret = unwind_frame(&frame); - if (ret < 0) + if (frame.sp < stack_page || + frame.sp >= stack_page + THREAD_SIZE || + unwind_frame(&frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 14f73c445ff5..0e32ab453e5b 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -15,14 +15,37 @@ #define pr_fmt(fmt) "psci: " fmt +#include <linux/cpuidle.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/smp.h> +#include <linux/slab.h> #include <asm/compiler.h> +#include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/psci.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> -struct psci_operations psci_ops; +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); +}; + +static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); @@ -34,6 +57,8 @@ enum psci_function { PSCI_FN_MAX, }; +static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state); + static u32 psci_function_id[PSCI_FN_MAX]; #define PSCI_RET_SUCCESS 0 @@ -74,6 +99,17 @@ static u32 psci_power_state_pack(struct psci_power_state state) << PSCI_POWER_STATE_AFFL_SHIFT); } +static void psci_power_state_unpack(u32 power_state, + struct psci_power_state *state) +{ + state->id = (power_state >> PSCI_POWER_STATE_ID_SHIFT) + & PSCI_POWER_STATE_ID_MASK; + state->type = (power_state >> PSCI_POWER_STATE_TYPE_SHIFT) + & PSCI_POWER_STATE_TYPE_MASK; + state->affinity_level = (power_state >> PSCI_POWER_STATE_AFFL_SHIFT) + & PSCI_POWER_STATE_AFFL_MASK; +} + /* * The following two functions are invoked via the invoke_psci_fn pointer * and will not be inlined, allowing us to piggyback on the AAPCS. @@ -156,22 +192,91 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; -int __init psci_init(void) +int __init psci_dt_register_idle_states(struct cpuidle_driver *drv, + struct device_node *state_nodes[]) +{ + int cpu, i; + struct psci_power_state *psci_states; + const struct cpu_operations *cpu_ops_ptr; + + if (!state_nodes) + return -EINVAL; + /* + * This is belt-and-braces: make sure that if the idle + * specified protocol is psci, the cpu_ops have been + * initialized to psci operations. Anything else is + * a recipe for mayhem. + */ + for_each_cpu(cpu, drv->cpumask) { + cpu_ops_ptr = cpu_ops[cpu]; + if (WARN_ON(!cpu_ops_ptr || strcmp(cpu_ops_ptr->name, "psci"))) + return -EOPNOTSUPP; + } + + psci_states = kcalloc(drv->state_count, sizeof(*psci_states), + GFP_KERNEL); + + if (!psci_states) { + pr_warn("psci idle state allocation failed\n"); + return -ENOMEM; + } + + for_each_cpu(cpu, drv->cpumask) { + if (per_cpu(psci_power_state, cpu)) { + pr_warn("idle states already initialized on cpu %u\n", + cpu); + continue; + } + per_cpu(psci_power_state, cpu) = psci_states; + } + + + for (i = 0; i < drv->state_count; i++) { + u32 psci_power_state; + + if (!state_nodes[i]) { + /* + * An index with a missing node pointer falls back to + * simple STANDBYWFI + */ + psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY; + continue; + } + + if (of_property_read_u32(state_nodes[i], "entry-method-param", + &psci_power_state)) { + pr_warn(" * %s missing entry-method-param property\n", + state_nodes[i]->full_name); + /* + * If entry-method-param property is missing, fall + * back to STANDBYWFI state + */ + psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY; + continue; + } + + pr_debug("psci-power-state %#x index %u\n", + psci_power_state, i); + psci_power_state_unpack(psci_power_state, &psci_states[i]); + } + + return 0; +} + +void __init psci_init(void) { struct device_node *np; const char *method; u32 id; - int err = 0; np = of_find_matching_node(NULL, psci_of_match); if (!np) - return -ENODEV; + return; pr_info("probing function IDs from device-tree\n"); if (of_property_read_string(np, "method", &method)) { pr_warning("missing \"method\" property\n"); - err = -ENXIO; goto out_put_node; } @@ -181,7 +286,6 @@ int __init psci_init(void) invoke_psci_fn = __invoke_psci_fn_smc; } else { pr_warning("invalid \"method\" property: %s\n", method); - err = -EINVAL; goto out_put_node; } @@ -207,5 +311,85 @@ int __init psci_init(void) out_put_node: of_node_put(np); + return; +} + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ + return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("failed to boot CPU%d (%d)\n", cpu, err); + return err; } + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +} +#endif + +#ifdef CONFIG_ARM64_CPU_SUSPEND +static int cpu_psci_cpu_suspend(unsigned long index) +{ + struct psci_power_state *state = __get_cpu_var(psci_power_state); + + if (!state) + return -EOPNOTSUPP; + + return psci_ops.cpu_suspend(state[index], virt_to_phys(cpu_resume)); +} +#endif + +const struct cpu_operations cpu_psci_ops = { + .name = "psci", + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + .cpu_suspend = cpu_psci_cpu_suspend, +#endif +}; + +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5341534b6d04..0bf195533088 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -41,6 +42,9 @@ #include <asm/traps.h> #include <asm/system_misc.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -53,28 +57,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -656,28 +638,27 @@ static int compat_gpr_get(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; switch (idx) { case 15: - reg = (void *)&task_pt_regs(target)->pc; + reg = task_pt_regs(target)->pc; break; case 16: - reg = (void *)&task_pt_regs(target)->pstate; + reg = task_pt_regs(target)->pstate; break; case 17: - reg = (void *)&task_pt_regs(target)->orig_x0; + reg = task_pt_regs(target)->orig_x0; break; default: - reg = (void *)&task_pt_regs(target)->regs[idx]; + reg = task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); - + ret = copy_to_user(ubuf, ®, sizeof(reg)); if (ret) break; - else - ubuf += sizeof(compat_ulong_t); + + ubuf += sizeof(reg); } return ret; @@ -705,28 +686,28 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; + + ubuf += sizeof(reg); switch (idx) { case 15: - reg = (void *)&newregs.pc; + newregs.pc = reg; break; case 16: - reg = (void *)&newregs.pstate; + newregs.pstate = reg; break; case 17: - reg = (void *)&newregs.orig_x0; + newregs.orig_x0 = reg; break; default: - reg = (void *)&newregs.regs[idx]; + newregs.regs[idx] = reg; } - ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); - - if (ret) - goto out; - else - ubuf += sizeof(compat_ulong_t); } if (valid_user_regs(&newregs.user_regs)) @@ -734,7 +715,6 @@ static int compat_gpr_set(struct task_struct *target, else ret = -EINVAL; -out: return ret; } @@ -815,33 +795,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1109,45 +1062,49 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { + int regno; unsigned long saved_reg; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return regs->syscallno; - - if (is_compat_task()) { - /* AArch32 uses ip (r12) for scratch */ - saved_reg = regs->regs[12]; - regs->regs[12] = dir; - } else { - /* - * Save X7. X7 is used to denote syscall entry/exit: - * X7 = 0 -> entry, = 1 -> exit - */ - saved_reg = regs->regs[7]; - regs->regs[7] = dir; - } + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; - if (dir) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) regs->syscallno = ~0UL; - if (is_compat_task()) - regs->regs[12] = saved_reg; - else - regs->regs[7] = saved_reg; + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->syscallno); return regs->syscallno; } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 000000000000..89102a6ffad5 --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->pc; + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 2; + data.addr = NULL; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)return_address; /* dummy */ + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..9bafffe12689 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -42,9 +42,11 @@ #include <linux/of_fdt.h> #include <linux/of_platform.h> +#include <asm/fixmap.h> #include <asm/cputype.h> #include <asm/elf.h> #include <asm/cputable.h> +#include <asm/cpu_ops.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -57,9 +59,20 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + static const char *cpu_name; static const char *machine_name; phys_addr_t __fdt_pointer __initdata; @@ -97,15 +110,95 @@ void __init early_print(const char *str, ...) printk("%s", buf); } -static void __init setup_processor(void) +void __init smp_setup_processor_id(void) { - struct cpu_info *cpu_info; + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity, fs[4], bits[4], ls; + u64 mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits %#llx\n", mask); /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc.S + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. */ + for (i = 0; i < 4; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR_EL1 by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 32 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR_EL1 through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + + fs[3] - (bits[2] + bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.shift_aff[3], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + +static void __init setup_processor(void) +{ + struct cpu_info *cpu_info; + u64 features, block; + cpu_info = lookup_processor_type(read_cpuid_id()); if (!cpu_info) { printk("CPU configuration botched (ID %08x), unable to continue.\n", @@ -118,8 +211,71 @@ static void __init setup_processor(void) printk("CPU: %s [%08x] revision %d\n", cpu_name, read_cpuid_id(), read_cpuid_id() & 15); - sprintf(init_utsname()->machine, "aarch64"); + sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; + + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the Aarch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -257,6 +413,8 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_ioremap_init(); + parse_early_param(); arm64_memblock_init(); @@ -269,8 +427,10 @@ void __init setup_arch(char **cmdline_p) psci_init(); cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); + smp_build_mpidr_hash(); #endif #ifdef CONFIG_VT @@ -288,7 +448,7 @@ static int __init arm64_device_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } -arch_initcall(arm64_device_init); +arch_initcall_sync(arm64_device_init); static DEFINE_PER_CPU(struct cpu, cpu_data); @@ -309,6 +469,12 @@ subsys_initcall(topology_init); static const char *hwcap_str[] = { "fp", "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", NULL }; @@ -328,9 +494,6 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", - loops_per_jiffy / (500000UL/HZ), - loops_per_jiffy / (5000UL/HZ) % 100); } /* dump out the processor features */ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 890a591f75dd..e3cf09626245 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/personality.h> @@ -25,7 +26,6 @@ #include <linux/tracehook.h> #include <linux/ratelimit.h> -#include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/elf.h> #include <asm/cacheflush.h> @@ -100,8 +100,7 @@ static int restore_sigframe(struct pt_regs *regs, { sigset_t set; int i, err; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -121,8 +120,11 @@ static int restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs); - if (err == 0) - err |= restore_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= restore_fpsimd_context(fpsimd_ctx); + } return err; } @@ -167,8 +169,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; + struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &sf->fp, err); @@ -185,12 +187,17 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) - err |= preserve_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= preserve_fpsimd_context(fpsimd_ctx); + aux += sizeof(*fpsimd_ctx); + } /* set the "end" magic */ - __put_user_error(0, &aux->end.magic, err); - __put_user_error(0, &aux->end.size, err); + end = aux; + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); return err; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe859..e51bbe79f5b5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -100,34 +100,6 @@ struct compat_rt_sigframe { #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_compat_sigreturn) -#define SVC_SYS_SIGRETURN (0xef000000 | __NR_compat_sigreturn) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_compat_rt_sigreturn) -#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_compat_rt_sigreturn) - -/* - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_compat_sigreturn) << 16) | \ - 0x2700 | __NR_compat_sigreturn) -#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ - 0x2700 | __NR_compat_rt_sigreturn) - -const compat_ulong_t aarch32_sigret_code[6] = { - /* - * AArch32 sigreturn code. - * We don't construct an OABI SWI - instead we just set the imm24 field - * to the EABI syscall number so that we create a sane disassembly. - */ - MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, -}; - static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { compat_sigset_t cset; @@ -474,12 +446,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; - if (thumb) { + if (thumb) spsr |= COMPAT_PSR_T_BIT; - spsr &= ~COMPAT_PSR_IT_MASK; - } else { + else spsr &= ~COMPAT_PSR_T_BIT; - } + + /* The IT state must be cleared for both ARM and Thumb-2 */ + spsr &= ~COMPAT_PSR_IT_MASK; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 000000000000..b1925729c692 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + * u32 aff0, aff1, aff2, aff3; + * u64 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * aff2 = mpidr_masked & 0xff00000000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask + and \mpidr, \mpidr, \mask // mask out MPIDR bits + and \dst, \mpidr, #0xff // mask=aff0 + lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 + and \mask, \mpidr, #0xff00 // mask = aff1 + lsr \mask ,\mask, \rs1 + orr \dst, \dst, \mask // dst|=(aff1>>rs1) + and \mask, \mpidr, #0xff0000 // mask = aff2 + lsr \mask ,\mask, \rs2 + orr \dst, \dst, \mask // dst|=(aff2>>rs2) + and \mask, \mpidr, #0xff00000000 // mask = aff3 + lsr \mask ,\mask, \rs3 + orr \dst, \dst, \mask // dst|=(aff3>>rs3) + .endm +/* + * Save CPU state for a suspend. This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + * x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) + stp x29, lr, [sp, #-96]! + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + mov x2, sp + sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx + mov x1, sp + /* + * x1 now points to struct cpu_suspend_ctx allocated on the stack + */ + str x2, [x1, #CPU_CTX_SP] + ldr x2, =sleep_save_sp + ldr x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP + mrs x7, mpidr_el1 + ldr x9, =mpidr_hash + ldr x10, [x9, #MPIDR_HASH_MASK] + /* + * Following code relies on the struct mpidr_hash + * members size. + */ + ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 + add x2, x2, x8, lsl #3 +#endif + bl __cpu_suspend_finisher + /* + * Never gets here, unless suspend fails. + * Successful cpu_suspend should return from cpu_resume, returning + * through this code path is considered an error + * If the return value is set to 0 force x0 = -EOPNOTSUPP + * to make sure a proper error condition is propagated + */ + cmp x0, #0 + mov x3, #-EOPNOTSUPP + csel x0, x3, x0, eq + add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(__cpu_suspend) + .ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) + ldr x3, =cpu_resume_after_mmu + msr sctlr_el1, x0 // restore sctlr_el1 + isb + br x3 // global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: + mov x0, #0 // return zero on success + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(cpu_resume_after_mmu) + + .data +ENTRY(cpu_resume) + bl el2_setup // if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP + mrs x1, mpidr_el1 + adr x4, mpidr_hash_ptr + ldr x5, [x4] + add x8, x4, x5 // x8 = struct mpidr_hash phys address + /* retrieve mpidr_hash members to compute the hash */ + ldr x2, [x8, #MPIDR_HASH_MASK] + ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 + /* x7 contains hash index, let's use it to grab context pointer */ +#else + mov x7, xzr +#endif + adr x0, sleep_save_sp + ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr x0, [x0, x7, lsl #3] + /* load sp from context */ + ldr x2, [x0, #CPU_CTX_SP] + adr x1, sleep_idmap_phys + /* load physical address of identity map page table in x1 */ + ldr x1, [x1] + mov sp, x2 + /* + * cpu_do_resume expects x0 to contain context physical address + * pointer and x1 to contain physical address of 1:1 page tables + */ + bl cpu_do_resume // PC relative jump, MMU off + b cpu_resume_mmu // Resume MMU, never returns +ENDPROC(cpu_resume) + + .align 3 +mpidr_hash_ptr: + /* + * offset of mpidr_hash symbol from current location + * used to obtain run-time mpidr_hash address with MMU off + */ + .quad mpidr_hash - . +/* + * physical address of identity mapped page tables + */ + .type sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) + .quad 0 +/* + * struct sleep_save_sp { + * phys_addr_t *save_ptr_stash; + * phys_addr_t save_ptr_stash_phys; + * }; + */ + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) + .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9c93e126328c..7c868a2ac38b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/cpu_ops.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -48,76 +49,34 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> +#define CREATE_TRACE_POINTS +#include <trace/events/arm-ipi.h> + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core * where to place its SVC stack */ struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_TIMER, }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not. This is necessary for the hotplug code to work - * reliably. - */ -static void __cpuinit write_pen_release(u64 val) -{ - void *start = (void *)&secondary_holding_pen_release; - unsigned long size = sizeof(secondary_holding_pen_release); - - secondary_holding_pen_release = val; - __flush_dcache_area(start, size); -} - /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. */ static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; - - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - - /* - * Update the pen release flag. - */ - write_pen_release(cpu_logical_map(cpu)); + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); - /* - * Send an event, causing the secondaries to read pen_release. - */ - sev(); - - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; + return -EOPNOTSUPP; } static DECLARE_COMPLETION(cpu_running); @@ -158,6 +117,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -167,8 +131,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); - /* * All kernel threads share the same mm context; grab a * reference and switch to it. @@ -177,6 +139,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + printk("CPU%u: Booted secondary processor\n", cpu); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -187,17 +152,15 @@ asmlinkage void __cpuinit secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); - /* - * Let the primary processor know we're out of the - * pen, then head off into the C entry point - */ - write_pen_release(INVALID_HWID); + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); /* - * Synchronise with the boot thread. + * Enable GIC and timers. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + notify_cpu_starting(cpu); + + smp_store_cpu_info(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -207,11 +170,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - /* - * Enable GIC and timers. - */ - notify_cpu_starting(cpu); - + local_dbg_enable(); local_irq_enable(); local_fiq_enable(); @@ -221,42 +180,117 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_startup_entry(CPUHP_ONLINE); } -void __init smp_cpus_done(unsigned int max_cpus) +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) { - unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; } -void __init smp_prepare_boot_cpu(void) +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) { -} + unsigned int cpu = smp_processor_id(); + int ret; -static void (*smp_cross_call)(const struct cpumask *, unsigned int); + ret = op_cpu_disable(cpu); + if (ret) + return ret; -static const struct smp_enable_ops *enable_ops[] __initconst = { - &smp_spin_table_ops, - &smp_psci_ops, - NULL, -}; + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); -static const struct smp_enable_ops *smp_enable_ops[NR_CPUS]; + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); -static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) -{ - const struct smp_enable_ops **ops = enable_ops; + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); - while (*ops) { - if (!strcmp(name, (*ops)->name)) - return *ops; + return 0; +} + +static DECLARE_COMPLETION(cpu_died); - ops++; +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; } + pr_notice("CPU%u: shutdown\n", cpu); +} - return NULL; +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + + idle_task_exit(); + + local_irq_disable(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); + + BUG(); } +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ + unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + + pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); +} + +void __init smp_prepare_boot_cpu(void) +{ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +} + +static void (*smp_cross_call)(const struct cpumask *, unsigned int); /* * Enumerate the possible CPU set from the device tree and build the @@ -265,9 +299,8 @@ static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) */ void __init smp_init_cpus(void) { - const char *enable_method; struct device_node *dn = NULL; - int i, cpu = 1; + unsigned int i, cpu = 1; bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { @@ -336,25 +369,10 @@ void __init smp_init_cpus(void) if (cpu >= NR_CPUS) goto next; - /* - * We currently support only the "spin-table" enable-method. - */ - enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method) { - pr_err("%s: missing enable-method property\n", - dn->full_name); + if (cpu_read_ops(dn, cpu) != 0) goto next; - } - - smp_enable_ops[cpu] = smp_get_enable_ops(enable_method); - if (!smp_enable_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); - goto next; - } - - if (smp_enable_ops[cpu]->init_cpu(dn, cpu)) + if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); @@ -384,8 +402,12 @@ next: void __init smp_prepare_cpus(unsigned int max_cpus) { - int cpu, err; - unsigned int ncores = num_possible_cpus(); + int err; + unsigned int cpu, ncores = num_possible_cpus(); + + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); /* * are we trying to boot more cores than exist? @@ -412,10 +434,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (cpu == smp_processor_id()) continue; - if (!smp_enable_ops[cpu]) + if (!cpu_ops[cpu]) continue; - err = smp_enable_ops[cpu]->prepare_cpu(cpu); + err = cpu_ops[cpu]->cpu_prepare(cpu); if (err) continue; @@ -446,6 +468,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_TIMER, "Timer broadcast interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -455,7 +478,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, prec >= 4 ? " " : ""); - for_each_present_cpu(cpu) + for_each_online_cpu(cpu) seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs[i])); seq_printf(p, " %s\n", ipi_types[i]); @@ -531,6 +554,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + irq_enter(); + tick_receive_broadcast(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -543,6 +574,13 @@ void smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_TIMER); +} +#endif + void smp_send_stop(void) { unsigned long timeout; diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c deleted file mode 100644 index 0c533301be77..000000000000 --- a/arch/arm64/kernel/smp_psci.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PSCI SMP initialisation - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/psci.h> -#include <asm/smp_plat.h> - -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) -{ - return 0; -} - -static int __init smp_psci_prepare_cpu(int cpu) -{ - int err; - - if (!psci_ops.cpu_on) { - pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu); - return -ENODEV; - } - - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - - return 0; -} - -const struct smp_enable_ops smp_psci_ops __initconst = { - .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, -}; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7c35fa682f76..0347d38eea29 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -16,15 +16,38 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/delay.h> #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; -static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { /* * Determine the address from which the CPU is polling. @@ -40,7 +63,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) return 0; } -static int __init smp_spin_table_prepare_cpu(int cpu) +static int smp_spin_table_cpu_prepare(unsigned int cpu) { void **release_addr; @@ -48,7 +71,16 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return -ENODEV; release_addr = __va(cpu_release_addr[cpu]); - release_addr[0] = (void *)__pa(secondary_holding_pen); + + /* + * We write the release address as LE regardless of the native + * endianess of the kernel. Therefore, any boot-loaders that + * read this address need to convert this address to the + * boot-loader's endianess before jumping. This is mandated by + * the boot protocol. + */ + release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); + __flush_dcache_area(release_addr, sizeof(release_addr[0])); /* @@ -59,8 +91,24 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_spin_table_ops __initconst = { +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + return 0; +} + +const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", - .init_cpu = smp_spin_table_init_cpu, - .prepare_cpu = smp_spin_table_prepare_cpu, + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 048334bb2651..55437ba1f5a4 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -35,7 +35,7 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; @@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); - if (fp < low || fp > high || fp & 0xf) + if (fp < low || fp > high - 0x18 || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 000000000000..1fa9ce4afd8f --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,140 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + * must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, + phys_addr_t *save_ptr) +{ + int cpu = smp_processor_id(); + + *save_ptr = virt_to_phys(ptr); + + cpu_do_suspend(ptr); + /* + * Only flush the context that must be retrieved with the MMU + * off. VA primitives ensure the flush is applied to all + * cache levels so context is pushed to DRAM. + */ + __flush_dcache_area(ptr, sizeof(*ptr)); + __flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ + /* Prevent multiple restore hook initializations */ + if (WARN_ON(hw_breakpoint_restore)) + return; + hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ + struct mm_struct *mm = current->active_mm; + int ret, cpu = smp_processor_id(); + unsigned long flags; + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + + /* + * From this point debug exceptions are disabled to prevent + * updates to mdscr register (saved and restored along with + * general purpose registers) from kernel debuggers. + */ + local_dbg_save(flags); + + /* + * mm context saved on the stack, it will be restored when + * the cpu comes out of reset through the identity mapped + * page tables, so that the thread address space is properly + * set-up on function return. + */ + ret = __cpu_suspend(arg); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); + } + + /* + * Restore pstate flags. OS lock and mdscr have been already + * restored, so from this point onwards, debugging is fully + * renabled if it was enabled when core started shutdown. + */ + local_dbg_restore(flags); + + return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ + void *ctx_ptr; + + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sleep_idmap_phys = virt_to_phys(idmap_pg_dir); + __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); + __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + + return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index a1b19ed7467c..423a5b3fc2be 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * extension. */ compat_sys_pread64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) compat_sys_pwrite64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) compat_sys_truncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) compat_sys_ftruncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) compat_sys_readahead_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) compat_sys_fadvise64_64_wrapper: mov w6, w1 - orr x1, x2, x3, lsl #32 - orr x2, x4, x5, lsl #32 + regs_to_64 x1, x2, x3 + regs_to_64 x2, x4, x5 mov w3, w6 b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) compat_sys_sync_file_range2_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) compat_sys_fallocate_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a551f88ae2c1..03dc3718eb13 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void) return arch_timer_read_counter() * sched_clock_mult; } -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_timer_read_counter(); - return 0; -} - void __init time_init(void) { u32 arch_timer_rate; diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 000000000000..db8bb29c3852 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,590 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/topology.h> +#include <asm/smp_plat.h> + + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) + return -1; + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) { + of_node_put(cpu_node); + return cpu; + } + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + + of_node_put(cpu_node); + return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, + int core_id) +{ + char name[10]; + bool leaf = true; + int i = 0; + int cpu; + struct device_node *t; + + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + of_node_put(t); + return -EINVAL; + } + of_node_put(t); + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return -EINVAL; + } + + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + return -EINVAL; + } + + return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + static int cluster_id __initdata; + int core_id = 0; + int i, ret; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + leaf = false; + ret = parse_cluster(c, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (depth == 0) { + pr_err("%s: cpu-map children should be clusters\n", + c->full_name); + of_node_put(c); + return -EINVAL; + } + + if (leaf) { + ret = parse_core(c, cluster_id, core_id++); + } else { + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; + + return 0; +} + +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + { "arm,cortex-a57", 3891 }, + { "arm,cortex-a53", 2048 }, + { NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static int __init parse_dt_topology(void) +{ + struct device_node *cn, *map; + int ret = 0; + int cpu; + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return 0; + } + + /* + * When topology is provided cpu-map is essentially a root + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); + if (!map) + goto out; + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. + */ + for_each_possible_cpu(cpu) { + if (cpu_topology[cpu].cluster_id == -1) { + pr_err("CPU%d: No topology information specified\n", + cpu); + ret = -EINVAL; + } + } + +out_map: + of_node_put(map); +out: + of_node_put(cn); + return ret; +} + +static void __init parse_dt_cpu_power(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn; + unsigned long min_capacity = ULONG_MAX; + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int cpu; + + __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), + GFP_NOWAIT); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + + /* Too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("Missing device node for CPU %d\n", cpu); + continue; + } + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) { + pr_warn("%s: Unknown CPU type\n", cn->full_name); + continue; + } + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s: Missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equal we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + return; + else if (4 * max_capacity < (3 * (max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; +} + +/* + * Look for a customed capacity of a CPU in the cpu_topo_data table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_power(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + pr_info("CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + if (cpuid_topo->cluster_id == -1) { + /* + * DT does not contain topology information for this cpu. + */ + pr_debug("CPU%u: No topology information configured\n", cpuid); + return; + } + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } +} + +#ifdef CONFIG_SCHED_HMP + +/* + * Retrieve logical cpu index corresponding to a given MPIDR[23:0] + * - mpidr: MPIDR[23:0] to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u32 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} + +static const char * const little_cores[] = { + "arm,cortex-a53", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) + return true; + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 8) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr+1)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr+1)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct cpumask hmp_fast_cpu_mask; + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) { + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + } + + return -EINVAL; +} + +void store_cpu_topology(unsigned int cpuid) +{ + update_siblings_masks(cpuid); + update_cpu_power(cpuid); +} + +static void __init reset_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpu_topo->thread_id = -1; + cpu_topo->core_id = 0; + cpu_topo->cluster_id = -1; + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + } +} + +static void __init reset_cpu_power(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + set_power_scale(cpu, SCHED_POWER_SCALE); +} + +void __init init_cpu_topology(void) +{ + reset_cpu_topology(); + + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ + if (parse_dt_topology()) + reset_cpu_topology(); + + reset_cpu_power(); + parse_dt_cpu_power(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..7ffadddb645d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include <linux/syscalls.h> #include <asm/atomic.h> +#include <asm/debug-monitors.h> #include <asm/traps.h> #include <asm/stacktrace.h> #include <asm/exception.h> @@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 0ea7a22bcdf2..50384fec56c4 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -58,7 +58,10 @@ static struct page *vectors_page[1]; static int alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long vpage; vpage = get_zeroed_page(GFP_ATOMIC); @@ -72,7 +75,7 @@ static int alloc_vectors_page(void) /* sigreturn code */ memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - aarch32_sigret_code, sizeof(aarch32_sigret_code)); + __aarch32_sigret_code_start, sigret_sz); flush_icache_range(vpage, vpage + PAGE_SIZE); vectors_page[0] = virt_to_page(vpage); @@ -103,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) static int __init vdso_init(void) { - struct page *pg; - char *vbase; - int i, ret = 0; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", vdso_pages + 1, vdso_pages, 1L, &vdso_start); /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); - if (vdso_pagelist == NULL) { - pr_err("Failed to allocate vDSO pagelist!\n"); + if (vdso_pagelist == NULL) return -ENOMEM; - } /* Grab the vDSO code pages. */ - for (i = 0; i < vdso_pages; i++) { - pg = virt_to_page(&vdso_start + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso_pagelist[i] = pg; - } - - /* Sanity check the shared object header. */ - vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); - if (vbase == NULL) { - pr_err("Failed to map vDSO pagelist!\n"); - return -ENOMEM; - } else if (memcmp(vbase, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - ret = -EINVAL; - goto unmap; - } + for (i = 0; i < vdso_pages; i++) + vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Grab the vDSO data page. */ - pg = virt_to_page(vdso_data); - get_page(pg); - vdso_pagelist[i] = pg; + vdso_pagelist[i] = virt_to_page(vdso_data); -unmap: - vunmap(vbase); - return ret; + return 0; } arch_initcall(vdso_init); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..18a08e10357f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -41,7 +41,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - *(.smp.pen.text) __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -56,7 +55,8 @@ SECTIONS } RO_DATA(PAGE_SIZE) - + EXCEPTION_TABLE(8) + NOTES _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -82,41 +82,12 @@ SECTIONS PERCPU_SECTION(64) __init_end = .; - . = ALIGN(THREAD_SIZE); - __data_loc = .; - - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(64) - READ_MOSTLY_DATA(64) - - /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } - _edata_loc = __data_loc + SIZEOF(.data); - - NOTES + . = ALIGN(PAGE_SIZE); + _data = .; + _sdata = .; + RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + _edata = .; BSS_SECTION(0, 0, 0) _end = .; diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 59acc0ef0462..328ce1a99daa 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,6 +1,4 @@ -lib-y := bitops.o delay.o \ - strncpy_from_user.o strnlen_user.o clear_user.o \ - copy_from_user.o copy_to_user.o copy_in_user.o \ - copy_page.o clear_page.o \ - memchr.o memcpy.o memmove.o memset.o \ +lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ + copy_to_user.o copy_in_user.o copy_page.o \ + clear_page.o memchr.o memcpy.o memmove.o memset.o \ strchr.o strrchr.o diff --git a/arch/arm64/lib/strncpy_from_user.S b/arch/arm64/lib/strncpy_from_user.S deleted file mode 100644 index 56e448a831a0..000000000000 --- a/arch/arm64/lib/strncpy_from_user.S +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Based on arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * x0 = dst, x1 = src, x2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) - mov x4, x1 -1: subs x2, x2, #1 - bmi 2f -USER(9f, ldrb w3, [x1], #1 ) - strb w3, [x0], #1 - cbnz w3, 1b - sub x1, x1, #1 // take NUL character out of count -2: sub x0, x1, x4 - ret -ENDPROC(__strncpy_from_user) - - .section .fixup,"ax" - .align 0 -9: strb wzr, [x0] // null terminate - mov x0, #-EFAULT - ret - .previous diff --git a/arch/arm64/lib/strnlen_user.S b/arch/arm64/lib/strnlen_user.S deleted file mode 100644 index 7f7b176a5646..000000000000 --- a/arch/arm64/lib/strnlen_user.S +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Based on arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - - .text - .align 5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n if too long - */ -ENTRY(__strnlen_user) - mov x2, x0 -1: subs x1, x1, #1 - b.mi 2f -USER(9f, ldrb w3, [x0], #1 ) - cbnz w3, 1b -2: sub x0, x0, x2 - ret -ENDPROC(__strnlen_user) - - .section .fixup,"ax" - .align 0 -9: mov x0, #0 - ret - .previous diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o tlb.o proc.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 48a386094fa3..0e379c44544b 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -30,7 +30,7 @@ * * Corrupted registers: x0-x7, x9-x11 */ -ENTRY(__flush_dcache_all) +__flush_dcache_all: dsb sy // ensure ordering with previous memory accesses mrs x0, clidr_el1 // read clidr and x3, x0, #0x7000000 // extract loc from clidr @@ -146,7 +146,7 @@ ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) /* - * __flush_kern_dcache_page(kaddr) + * __flush_dcache_area(kaddr, size) * * Ensure that the data held in the page kaddr is written back to the * page in question. @@ -166,3 +166,88 @@ ENTRY(__flush_dcache_area) dsb sy ret ENDPROC(__flush_dcache_area) + +/* + * __dma_inv_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_inv_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + tst x1, x3 // end cache line aligned? + bic x1, x1, x3 + b.eq 1f + dc civac, x1 // clean & invalidate D / U line +1: tst x0, x3 // start cache line aligned? + bic x0, x0, x3 + b.eq 2f + dc civac, x0 // clean & invalidate D / U line + b 3f +2: dc ivac, x0 // invalidate D / U line +3: add x0, x0, x2 + cmp x0, x1 + b.lo 2b + dsb sy + ret +ENDPROC(__dma_inv_range) + +/* + * __dma_clean_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_clean_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc cvac, x0 // clean D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_clean_range) + +/* + * __dma_flush_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(__dma_flush_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_flush_range) + +/* + * __dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(__dma_map_area) + add x1, x1, x0 + cmp w2, #DMA_FROM_DEVICE + b.eq __dma_inv_range + b __dma_clean_range +ENDPROC(__dma_map_area) + +/* + * __dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(__dma_unmap_area) + add x1, x1, x0 + cmp w2, #DMA_TO_DEVICE + b.ne __dma_inv_range + ret +ENDPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bd7579ec9e6..f39a55d58918 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -21,34 +21,277 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/dma-mapping.h> +#include <linux/dma-contiguous.h> +#include <linux/of.h> +#include <linux/platform_device.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> +#include <linux/amba/bus.h> #include <asm/cacheflush.h> struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) +static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, + bool coherent) { - if (IS_ENABLED(CONFIG_ZONE_DMA32) && + if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + return pgprot_writecombine(prot); + else if (!coherent) + return pgprot_dmacoherent(prot); + return prot; +} + +static void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) - flags |= GFP_DMA32; - return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + flags |= GFP_DMA; + if (IS_ENABLED(CONFIG_DMA_CMA)) { + struct page *page; + + size = PAGE_ALIGN(size); + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size)); + if (!page) + return NULL; + + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + return page_address(page); + } else { + return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + } +} + +static void __dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + if (dev == NULL) { + WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); + return; + } + + if (IS_ENABLED(CONFIG_DMA_CMA)) { + phys_addr_t paddr = dma_to_phys(dev, dma_handle); + + dma_release_from_contiguous(dev, + phys_to_page(paddr), + size >> PAGE_SHIFT); + } else { + swiotlb_free_coherent(dev, size, vaddr, dma_handle); + } +} + +static void *__dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + struct page *page, **map; + void *ptr, *coherent_ptr; + int order, i; + + size = PAGE_ALIGN(size); + order = get_order(size); + + ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); + if (!ptr) + goto no_mem; + map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA); + if (!map) + goto no_map; + + /* remove any dirty cache lines on the kernel alias */ + __dma_flush_range(ptr, ptr + size); + + /* create a coherent mapping */ + page = virt_to_page(ptr); + for (i = 0; i < (size >> PAGE_SHIFT); i++) + map[i] = page + i; + coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, + __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false)); + kfree(map); + if (!coherent_ptr) + goto no_map; + + return coherent_ptr; + +no_map: + __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); +no_mem: + *dma_handle = ~0; + return NULL; +} + +static void __dma_free_noncoherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + + vunmap(vaddr); + __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); +} + +static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + dma_addr_t dev_addr; + + dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + + return dev_addr; +} + + +static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i, ret; + + ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); + for_each_sg(sgl, sg, ret, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + + return ret; +} + +static void __swiotlb_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); } -static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) +static void __swiotlb_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) { - swiotlb_free_coherent(dev, size, vaddr, dma_handle); + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); } -static struct dma_map_ops arm64_swiotlb_dma_ops = { - .alloc = arm64_swiotlb_alloc_coherent, - .free = arm64_swiotlb_free_coherent, +static void __swiotlb_sync_single_for_device(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + swiotlb_sync_single_for_device(dev, dev_addr, size, dir); + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); +} + +static void __swiotlb_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); +} + +static void __swiotlb_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); +} + +/* vma->vm_page_prot must be set appropriately before calling this function */ +static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + int ret = -ENXIO; + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> + PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + + return ret; +} + +static int __swiotlb_mmap_noncoherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false); + return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + +static int __swiotlb_mmap_coherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + /* Just use whatever page_prot attributes were specified */ + return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + +struct dma_map_ops noncoherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_noncoherent, + .free = __dma_free_noncoherent, + .mmap = __swiotlb_mmap_noncoherent, + .map_page = __swiotlb_map_page, + .unmap_page = __swiotlb_unmap_page, + .map_sg = __swiotlb_map_sg_attrs, + .unmap_sg = __swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, + .sync_single_for_device = __swiotlb_sync_single_for_device, + .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = __swiotlb_sync_sg_for_device, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, +}; +EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); + +struct dma_map_ops coherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_coherent, + .free = __dma_free_coherent, + .mmap = __swiotlb_mmap_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, @@ -60,12 +303,47 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; +EXPORT_SYMBOL(coherent_swiotlb_dma_ops); + +static int dma_bus_notifier(struct notifier_block *nb, + unsigned long event, void *_dev) +{ + struct device *dev = _dev; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (of_property_read_bool(dev->of_node, "dma-coherent")) + set_dma_ops(dev, &coherent_swiotlb_dma_ops); + + return NOTIFY_OK; +} + +static struct notifier_block platform_bus_nb = { + .notifier_call = dma_bus_notifier, +}; + +static struct notifier_block amba_bus_nb = { + .notifier_call = dma_bus_notifier, +}; + +extern int swiotlb_late_init_with_default_size(size_t default_size); -void __init arm64_swiotlb_init(void) +static int __init swiotlb_late_init(void) { - dma_ops = &arm64_swiotlb_dma_ops; - swiotlb_init(1); + size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); + + /* + * These must be registered before of_platform_populate(). + */ + bus_register_notifier(&platform_bus_type, &platform_bus_nb); + bus_register_notifier(&amba_bustype, &amba_bus_nb); + + dma_ops = &noncoherent_swiotlb_dma_ops; + + return swiotlb_late_init_with_default_size(swiotlb_size); } +arch_initcall(swiotlb_late_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f51d669c8ebd..df4f2fd187c3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; @@ -359,17 +359,6 @@ static int __kprobes do_translation_fault(unsigned long addr, } /* - * Some section permission faults need to be handled gracefully. They can - * happen due to a __{get,put}_user during an oops. - */ -static int do_sect_fault(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - do_bad_area(addr, esr, regs); - return 0; -} - -/* * This abort handler always returns "fault". */ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -392,12 +381,12 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 7c716634a671..e4193e3adc7f 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -70,11 +70,6 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, #endif } -void __flush_dcache_page(struct page *page) -{ - __flush_dcache_area(page_address(page), PAGE_SIZE); -} - void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); @@ -84,7 +79,7 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) return; if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_page(page); + __flush_dcache_area(page_address(page), PAGE_SIZE); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c @@ -0,0 +1,70 @@ +/* + * arch/arm64/mm/hugetlbpage.c + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/mm/hugetlbpage.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} +#endif + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !(pmd_val(pmd) & PMD_TABLE_BIT); +} + +int pud_huge(pud_t pud) +{ + return !(pud_val(pud) & PUD_TABLE_BIT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..5c47534fe47d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,8 @@ #include <linux/memblock.h> #include <linux/sort.h> #include <linux/of_fdt.h> +#include <linux/dma-mapping.h> +#include <linux/dma-contiguous.h> #include <asm/prom.h> #include <asm/sections.h> @@ -44,8 +46,7 @@ static unsigned long phys_initrd_size __initdata = 0; phys_addr_t memstart_addr __read_mostly = 0; -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; @@ -67,22 +68,22 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) - static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma32 = min; + unsigned long max_dma = min; memset(zone_size, 0, sizeof(zone_size)); -#ifdef CONFIG_ZONE_DMA32 /* 4GB maximum for 32-bit only capable devices */ - max_dma32 = max(min, min(max, MAX_DMA32_PFN)); - zone_size[ZONE_DMA32] = max_dma32 - min; -#endif - zone_size[ZONE_NORMAL] = max - max_dma32; + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + unsigned long max_dma_phys = + (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1); + max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + zone_size[ZONE_DMA] = max_dma - min; + } + zone_size[ZONE_NORMAL] = max - max_dma; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -92,15 +93,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; -#ifdef CONFIG_ZONE_DMA32 - if (start < max_dma32) { - unsigned long dma_end = min(end, max_dma32); - zhole_size[ZONE_DMA32] -= dma_end - start; + + if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) { + unsigned long dma_end = min(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; } -#endif - if (end > max_dma32) { + + if (end > max_dma) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma32); + unsigned long normal_start = max(start, max_dma); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } @@ -173,6 +174,8 @@ void __init arm64_memblock_init(void) memblock_reserve(base, size); } + dma_contiguous_reserve(0); + memblock_allow_resize(); memblock_dump_all(); } @@ -283,8 +286,6 @@ void __init mem_init(void) unsigned long reserved_pages, free_pages; struct memblock_region *reg; - arm64_swiotlb_init(); - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #ifndef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 1725cd6db37a..00d315ae1de9 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -25,6 +25,10 @@ #include <linux/vmalloc.h> #include <linux/io.h> +#include <asm/fixmap.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, pgprot_t prot, void *caller) { @@ -82,3 +86,95 @@ void __iounmap(volatile void __iomem *io_addr) vunmap(addr); } EXPORT_SYMBOL(__iounmap); + +void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) +{ + /* For normal memory we already have a cacheable mapping. */ + if (pfn_valid(__phys_to_pfn(phys_addr))) + return (void __iomem *)__phys_to_virt(phys_addr); + + return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +#ifndef CONFIG_ARM64_64K_PAGES +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#endif + +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = pgd_offset_k(addr); + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * __init early_ioremap_pte(unsigned long addr) +{ + pmd_t *pmd = early_ioremap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_ioremap_init(void) +{ + pmd_t *pmd; + + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); +#ifndef CONFIG_ARM64_64K_PAGES + /* need to populate pmd for 4k pagesize only */ + pmd_populate_kernel(&init_mm, pmd, bm_pte); +#endif + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p\n", + pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", + FIX_BTMAP_BEGIN); + } + + early_ioremap_setup(); +} + +void __init __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = early_ioremap_pte(addr); + + if (pgprot_val(flags)) + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index 916701e6d040..d519f4f50c8c 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,3 +1,2 @@ -extern void __flush_dcache_page(struct page *page); extern void __init bootmem_init(void); extern void __init arm64_swiotlb_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ba7477efad5c..28e56293ad4f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,11 +43,6 @@ struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); -pgprot_t pgprot_default; -EXPORT_SYMBOL(pgprot_default); - -static pmdval_t prot_sect_kernel; - struct cachepolicy { const char policy[16]; u64 mair; @@ -122,33 +117,6 @@ static int __init early_cachepolicy(char *p) } early_param("cachepolicy", early_cachepolicy); -/* - * Adjust the PMD section entries according to the CPU in use. - */ -static void __init init_mem_pgprot(void) -{ - pteval_t default_pgprot; - int i; - - default_pgprot = PTE_ATTRINDX(MT_NORMAL); - prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL); - -#ifdef CONFIG_SMP - /* - * Mark memory with the "shared" attribute for SMP systems - */ - default_pgprot |= PTE_SHARED; - prot_sect_kernel |= PMD_SECT_S; -#endif - - for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); - protection_map[i] = __pgprot(v | default_pgprot); - } - - pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot); -} - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -205,7 +173,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect_kernel)); + set_pmd(pmd, __pmd(phys | PROT_SECT_NORMAL_EXEC)); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -260,50 +228,22 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, } while (pgd++, addr = next, addr != end); } -#ifdef CONFIG_EARLY_PRINTK -/* - * Create an early I/O mapping using the pgd/pmd entries already populated - * in head.S as this function is called too early to allocated any memory. The - * mapping size is 2MB with 4KB pages or 64KB or 64KB pages. - */ -void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt) +static void __init map_mem(void) { - unsigned long size, mask; - bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES); - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + struct memblock_region *reg; + phys_addr_t limit; /* - * No early pte entries with !ARM64_64K_PAGES configuration, so using - * sections (pmd). + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * aligned to 2MB as per Documentation/arm64/booting.txt). */ - size = page64k ? PAGE_SIZE : SECTION_SIZE; - mask = ~(size - 1); - - pgd = pgd_offset_k(virt); - pud = pud_offset(pgd, virt); - if (pud_none(*pud)) - return NULL; - pmd = pmd_offset(pud, virt); - - if (page64k) { - if (pmd_none(*pmd)) - return NULL; - pte = pte_offset_kernel(pmd, virt); - set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE)); - } else { - set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE)); - } - - return (void __iomem *)((virt & mask) + (phys & ~mask)); -} -#endif - -static void __init map_mem(void) -{ - struct memblock_region *reg; + limit = PHYS_OFFSET + PGDIR_SIZE; + memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -313,8 +253,27 @@ static void __init map_mem(void) if (start >= end) break; +#ifndef CONFIG_ARM64_64K_PAGES + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. + * When 64K pages are enabled, the pte page table for the + * first PGDIR_SIZE is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, PMD_SIZE); + if (end < limit) { + limit = end & PMD_MASK; + memblock_set_current_limit(limit); + } +#endif + create_mapping(start, __phys_to_virt(start), end - start); } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } /* @@ -325,13 +284,6 @@ void __init paging_init(void) { void *zero_page; - /* - * Maximum PGDIR_SIZE addressable via the initial direct kernel - * mapping in swapper_pg_dir. - */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); - - init_mem_pgprot(); map_mem(); /* @@ -430,7 +382,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!p) return -ENOMEM; - set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel)); + set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 7083cdada657..62c6101df260 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -32,17 +32,10 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *new_pgd; - if (PGD_SIZE == PAGE_SIZE) - new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)get_zeroed_page(GFP_KERNEL); else - new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL); - - if (!new_pgd) - return NULL; - - return new_pgd; + return kzalloc(PGD_SIZE, GFP_KERNEL); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 8957b822010b..005d29e2977d 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -38,8 +38,7 @@ */ .macro dcache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR - lsr \tmp, \tmp, #16 - and \tmp, \tmp, #0xf // cache line size encoding + ubfm \tmp, \tmp, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f84fcf71f129..e0ef63cd05dc 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -80,8 +80,77 @@ ENTRY(cpu_do_idle) ret ENDPROC(cpu_do_idle) +#ifdef CONFIG_ARM64_CPU_SUSPEND +/** + * cpu_do_suspend - save CPU registers context + * + * x0: virtual address of context pointer + */ +ENTRY(cpu_do_suspend) + mrs x2, tpidr_el0 + mrs x3, tpidrro_el0 + mrs x4, contextidr_el1 + mrs x5, mair_el1 + mrs x6, cpacr_el1 + mrs x7, ttbr1_el1 + mrs x8, tcr_el1 + mrs x9, vbar_el1 + mrs x10, mdscr_el1 + mrs x11, oslsr_el1 + mrs x12, sctlr_el1 + stp x2, x3, [x0] + stp x4, x5, [x0, #16] + stp x6, x7, [x0, #32] + stp x8, x9, [x0, #48] + stp x10, x11, [x0, #64] + str x12, [x0, #80] + ret +ENDPROC(cpu_do_suspend) + +/** + * cpu_do_resume - restore CPU register context + * + * x0: Physical address of context pointer + * x1: ttbr0_el1 to be restored + * + * Returns: + * sctlr_el1 value in x0 + */ +ENTRY(cpu_do_resume) + /* + * Invalidate local tlb entries before turning on MMU + */ + tlbi vmalle1 + ldp x2, x3, [x0] + ldp x4, x5, [x0, #16] + ldp x6, x7, [x0, #32] + ldp x8, x9, [x0, #48] + ldp x10, x11, [x0, #64] + ldr x12, [x0, #80] + msr tpidr_el0, x2 + msr tpidrro_el0, x3 + msr contextidr_el1, x4 + msr mair_el1, x5 + msr cpacr_el1, x6 + msr ttbr0_el1, x1 + msr ttbr1_el1, x7 + msr tcr_el1, x8 + msr vbar_el1, x9 + msr mdscr_el1, x10 + /* + * Restore oslsr_el1 by writing oslar_el1 + */ + ubfx x11, x11, #1, #1 + msr oslar_el1, x11 + mov x0, x12 + dsb nsh // Make sure local tlb invalidation completed + isb + ret +ENDPROC(cpu_do_resume) +#endif + /* - * cpu_switch_mm(pgd_phys, tsk) + * cpu_do_switch_mm(pgd_phys, tsk) * * Set the translation table base pointer to be pgd_phys. * @@ -104,19 +173,13 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - /* - * Preserve the link register across the function call. - */ - mov x28, lr - bl __flush_dcache_all - mov lr, x28 ic iallu // I+BTB cache invalidate + tlbi vmalle1is // invalidate I + D TLBs dsb sy mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD msr mdscr_el1, xzr // Reset mdscr_el1 - tlbi vmalle1is // invalidate I + D TLBs /* * Memory region attributes for LPAE: * @@ -147,7 +210,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ - TCR_ASID16 | (1 << 31) + TCR_ASID16 | TCR_TBI0 | (1 << 31) #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K @@ -162,9 +225,9 @@ ENDPROC(__cpu_setup) * CE0 XWHW CZ ME TEEA S * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved - * .... .100 .... 01.1 11.1 ..01 0001 1101 < software settings + * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings */ .type crval, #object crval: - .word 0x030802e2 // clear + .word 0x000802e2 // clear .word 0x0405d11d // set diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S index 8ae80a18e8ec..19da91e0cd27 100644 --- a/arch/arm64/mm/tlb.S +++ b/arch/arm64/mm/tlb.S @@ -35,7 +35,7 @@ */ ENTRY(__cpu_flush_user_tlb_range) vma_vm_mm x3, x2 // get vma->vm_mm - mmid x3, x3 // get vm_mm->context.id + mmid w3, x3 // get vm_mm->context.id dsb sy lsr x0, x0, #12 // align address lsr x1, x1, #12 diff --git a/arch/blackfin/include/asm/ftrace.h b/arch/blackfin/include/asm/ftrace.h index 8a029505d7b7..2f1c3c2657ad 100644 --- a/arch/blackfin/include/asm/ftrace.h +++ b/arch/blackfin/include/asm/ftrace.h @@ -66,16 +66,7 @@ extern inline void *return_address(unsigned int level) #endif /* CONFIG_FRAME_POINTER */ -#define HAVE_ARCH_CALLER_ADDR - -/* inline function or macro may lead to unexpected result */ -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)return_address(1)) -#define CALLER_ADDR2 ((unsigned long)return_address(2)) -#define CALLER_ADDR3 ((unsigned long)return_address(3)) -#define CALLER_ADDR4 ((unsigned long)return_address(4)) -#define CALLER_ADDR5 ((unsigned long)return_address(5)) -#define CALLER_ADDR6 ((unsigned long)return_address(6)) +#define ftrace_return_address(n) return_address(n) #endif /* __ASSEMBLY__ */ diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c index bdb56f09d0ac..287d0e64dfba 100644 --- a/arch/c6x/kernel/devicetree.c +++ b/arch/c6x/kernel/devicetree.c @@ -33,8 +33,7 @@ void __init early_init_devtree(void *params) #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d05b8455c44c..bdc48111f0df 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -419,10 +419,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 0a2c68f9f9b0..62e2e8f2c5d6 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -136,8 +136,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 5712bb532245..32b87882ac87 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -58,8 +58,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c index 5869e3fa5dd3..150215a91711 100644 --- a/arch/openrisc/kernel/prom.c +++ b/arch/openrisc/kernel/prom.c @@ -96,8 +96,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h index 72c0fafaa039..544ed8ef87eb 100644 --- a/arch/parisc/include/asm/ftrace.h +++ b/arch/parisc/include/asm/ftrace.h @@ -24,15 +24,7 @@ extern void return_to_handler(void); extern unsigned long return_address(unsigned int); -#define HAVE_ARCH_CALLER_ADDR - -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 return_address(1) -#define CALLER_ADDR2 return_address(2) -#define CALLER_ADDR3 return_address(3) -#define CALLER_ADDR4 return_address(4) -#define CALLER_ADDR5 return_address(5) -#define CALLER_ADDR6 return_address(6) +#define ftrace_return_address(n) return_address(n) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index bc2da154f68b..ac204e022922 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -43,9 +43,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, extern void kdump_move_device_tree(void); -/* CPU OF node matching */ -struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); - /* cache lookup */ struct device_node *of_find_next_cache_node(struct device_node *np); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..eb4de78b7d9b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -550,8 +550,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); @@ -827,49 +826,10 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* Find the device node for a given logical cpu number, also returns the cpu - * local thread number (index in ibm,interrupt-server#s) if relevant and - * asked for (non NULL) - */ -struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { - int hardid; - struct device_node *np; - - hardid = get_hard_smp_processor_id(cpu); - - for_each_node_by_type(np, "cpu") { - const u32 *intserv; - unsigned int plen, t; - - /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist - * fallback to "reg" property and assume no threads - */ - intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", - &plen); - if (intserv == NULL) { - const u32 *reg = of_get_property(np, "reg", NULL); - if (reg == NULL) - continue; - if (*reg == hardid) { - if (thread) - *thread = 0; - return np; - } - } else { - plen /= sizeof(u32); - for (t = 0; t < plen; t++) { - if (hardid == intserv[t]) { - if (thread) - *thread = t; - return np; - } - } - } - } - return NULL; + return (int)phys_id == get_hard_smp_processor_id(cpu); } -EXPORT_SYMBOL(of_get_cpu_node); #if defined(CONFIG_DEBUG_FS) && defined(DEBUG) static struct debugfs_blob_wrapper flat_dt_blob; diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 13e9966464c2..e79fb6ebaa42 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -40,15 +40,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) /* arch/sh/kernel/return_address.c */ extern void *return_address(unsigned int); -#define HAVE_ARCH_CALLER_ADDR - -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)return_address(1)) -#define CALLER_ADDR2 ((unsigned long)return_address(2)) -#define CALLER_ADDR3 ((unsigned long)return_address(3)) -#define CALLER_ADDR4 ((unsigned long)return_address(4)) -#define CALLER_ADDR5 ((unsigned long)return_address(5)) -#define CALLER_ADDR6 ((unsigned long)return_address(6)) +#define ftrace_return_address(n) return_address(n) #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..787072769a80 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h index c09241659971..b4b38bacb404 100644 --- a/arch/x86/include/asm/dma-contiguous.h +++ b/arch/x86/include/asm/dma-contiguous.h @@ -4,7 +4,6 @@ #ifdef __KERNEL__ #include <linux/types.h> -#include <asm-generic/dma-contiguous.h> static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b1581527a236..2fbad6b9f23c 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,169 +16,6 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() - * and returns the corresponding pte. While this is not necessary for the - * !shared pmd case because we can allocate the pmd later as well, it makes the - * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise - * racing tasks could either miss the sharing (see huge_pte_offset) or select a - * bad pmd for sharing. - */ -static pte_t * -huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - pte_t *pte; - - if (!vma_shareable(vma, addr)) - return (pte_t *)pmd_alloc(mm, pud, addr); - - mutex_lock(&mapping->i_mmap_mutex); - vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); - return pte; -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); - else - pte = (pte_t *)pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) @@ -240,30 +77,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_PSE); } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - #endif /* x86_64 also uses this file */ diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h deleted file mode 100644 index 36dc7a684397..000000000000 --- a/arch/xtensa/include/asm/ftrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * arch/xtensa/include/asm/ftrace.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2013 Tensilica Inc. - */ -#ifndef _XTENSA_FTRACE_H -#define _XTENSA_FTRACE_H - -#include <asm/processor.h> - -#define HAVE_ARCH_CALLER_ADDR -#define CALLER_ADDR0 ({ unsigned long a0, a1; \ - __asm__ __volatile__ ( \ - "mov %0, a0\n" \ - "mov %1, a1\n" \ - : "=r"(a0), "=r"(a1) : : ); \ - MAKE_PC_FROM_RA(a0, a1); }) -#ifdef CONFIG_FRAME_POINTER -extern unsigned long return_address(unsigned level); -#define CALLER_ADDR1 return_address(1) -#define CALLER_ADDR2 return_address(2) -#define CALLER_ADDR3 return_address(3) -#else -#define CALLER_ADDR1 (0) -#define CALLER_ADDR2 (0) -#define CALLER_ADDR3 (0) -#endif - -#endif /* _XTENSA_FTRACE_H */ diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 14c6c3a6f04b..a5214542f312 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -170,8 +170,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (void *)__va(start); initrd_end = (void *)__va(end); |