aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r--arch/arm/kernel/entry-armv.S1
-rw-r--r--arch/arm/kernel/kprobes-test-arm.c4
-rw-r--r--arch/arm/kernel/kprobes-thumb.c2
-rw-r--r--arch/arm/kernel/perf_event.c2
-rw-r--r--arch/arm/kernel/ptrace.c3
-rw-r--r--arch/arm/kernel/signal.c46
-rw-r--r--arch/arm/kernel/signal.h2
-rw-r--r--arch/arm/kernel/topology.c278
-rw-r--r--arch/arm/kernel/traps.c2
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
10 files changed, 307 insertions, 35 deletions
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 437f0c42651..0d1851ca6eb 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -495,6 +495,7 @@ ENDPROC(__und_usr)
* The out of line fixup for the ldrt above.
*/
.pushsection .fixup, "ax"
+ .align 2
4: mov pc, r9
.popsection
.pushsection __ex_table,"a"
diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c
index ba32b393b3f..38c1a3b103a 100644
--- a/arch/arm/kernel/kprobes-test-arm.c
+++ b/arch/arm/kernel/kprobes-test-arm.c
@@ -187,8 +187,8 @@ void kprobe_arm_test_cases(void)
TEST_BF_R ("mov pc, r",0,2f,"")
TEST_BF_RR("mov pc, r",0,2f,", asl r",1,0,"")
TEST_BB( "sub pc, pc, #1b-2b+8")
-#if __LINUX_ARM_ARCH__ >= 6
- TEST_BB( "sub pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before ARMv6 */
+#if __LINUX_ARM_ARCH__ == 6 && !defined(CONFIG_CPU_V7)
+ TEST_BB( "sub pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before and after ARMv6 */
#endif
TEST_BB_R( "sub pc, pc, r",14, 1f-2f+8,"")
TEST_BB_R( "rsb pc, r",14,1f-2f+8,", pc")
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c
index 8f96ec778e8..6123daf397a 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/kernel/kprobes-thumb.c
@@ -660,7 +660,7 @@ static const union decode_item t32_table_1111_100x[] = {
/* LDRSB (literal) 1111 1001 x001 1111 xxxx xxxx xxxx xxxx */
/* LDRH (literal) 1111 1000 x011 1111 xxxx xxxx xxxx xxxx */
/* LDRSH (literal) 1111 1001 x011 1111 xxxx xxxx xxxx xxxx */
- DECODE_EMULATEX (0xfe5f0000, 0xf81f0000, t32_simulate_ldr_literal,
+ DECODE_SIMULATEX(0xfe5f0000, 0xf81f0000, t32_simulate_ldr_literal,
REGS(PC, NOSPPCX, 0, 0, 0)),
/* STRB (immediate) 1111 1000 0000 xxxx xxxx 1xxx xxxx xxxx */
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 186c8cb982c..a02eada3aa5 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -503,7 +503,7 @@ __hw_perf_event_init(struct perf_event *event)
event_requires_mode_exclusion(&event->attr)) {
pr_debug("ARM performance counters do not support "
"mode exclusion\n");
- return -EPERM;
+ return -EOPNOTSUPP;
}
/*
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 5700a7ae7f0..14e38261cd3 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -25,7 +25,6 @@
#include <linux/regset.h>
#include <linux/audit.h>
#include <linux/tracehook.h>
-#include <linux/unistd.h>
#include <asm/pgtable.h>
#include <asm/traps.h>
@@ -918,8 +917,6 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0,
regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
- if (why == 0 && test_and_clear_thread_flag(TIF_SYSCALL_RESTARTSYS))
- scno = __NR_restart_syscall - __NR_SYSCALL_BASE;
if (!test_thread_flag(TIF_SYSCALL_TRACE))
return scno;
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index fd2392a17ac..536c5d6b340 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -27,6 +27,7 @@
*/
#define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE))
#define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE))
+#define SWI_SYS_RESTART (0xef000000|__NR_restart_syscall|__NR_OABI_SYSCALL_BASE)
/*
* With EABI, the syscall number has to be loaded into r7.
@@ -47,6 +48,18 @@ const unsigned long sigreturn_codes[7] = {
};
/*
+ * Either we support OABI only, or we have EABI with the OABI
+ * compat layer enabled. In the later case we don't know if
+ * user space is EABI or not, and if not we must not clobber r7.
+ * Always using the OABI syscall solves that issue and works for
+ * all those cases.
+ */
+const unsigned long syscall_restart_code[2] = {
+ SWI_SYS_RESTART, /* swi __NR_restart_syscall */
+ 0xe49df004, /* ldr pc, [sp], #4 */
+};
+
+/*
* atomically swap in the new signal mask, and wait for a signal.
*/
asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t mask)
@@ -592,10 +605,12 @@ static void do_signal(struct pt_regs *regs, int syscall)
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
- case -ERESTART_RESTARTBLOCK:
regs->ARM_r0 = regs->ARM_ORIG_r0;
regs->ARM_pc = restart_addr;
break;
+ case -ERESTART_RESTARTBLOCK:
+ regs->ARM_r0 = -EINTR;
+ break;
}
}
@@ -611,14 +626,12 @@ static void do_signal(struct pt_regs *regs, int syscall)
* debugger has chosen to restart at a different PC.
*/
if (regs->ARM_pc == restart_addr) {
- if (retval == -ERESTARTNOHAND ||
- retval == -ERESTART_RESTARTBLOCK
+ if (retval == -ERESTARTNOHAND
|| (retval == -ERESTARTSYS
&& !(ka.sa.sa_flags & SA_RESTART))) {
regs->ARM_r0 = -EINTR;
regs->ARM_pc = continue_addr;
}
- clear_thread_flag(TIF_SYSCALL_RESTARTSYS);
}
handle_signal(signr, &ka, &info, regs);
@@ -632,8 +645,29 @@ static void do_signal(struct pt_regs *regs, int syscall)
* ignore the restart.
*/
if (retval == -ERESTART_RESTARTBLOCK
- && regs->ARM_pc == restart_addr)
- set_thread_flag(TIF_SYSCALL_RESTARTSYS);
+ && regs->ARM_pc == continue_addr) {
+ if (thumb_mode(regs)) {
+ regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE;
+ regs->ARM_pc -= 2;
+ } else {
+#if defined(CONFIG_AEABI) && !defined(CONFIG_OABI_COMPAT)
+ regs->ARM_r7 = __NR_restart_syscall;
+ regs->ARM_pc -= 4;
+#else
+ u32 __user *usp;
+
+ regs->ARM_sp -= 4;
+ usp = (u32 __user *)regs->ARM_sp;
+
+ if (put_user(regs->ARM_pc, usp) == 0) {
+ regs->ARM_pc = KERN_RESTART_CODE;
+ } else {
+ regs->ARM_sp += 4;
+ force_sigsegv(0, current);
+ }
+#endif
+ }
+ }
}
restore_saved_sigmask();
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
index 5ff067b7c75..6fcfe8398aa 100644
--- a/arch/arm/kernel/signal.h
+++ b/arch/arm/kernel/signal.h
@@ -8,5 +8,7 @@
* published by the Free Software Foundation.
*/
#define KERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500)
+#define KERN_RESTART_CODE (KERN_SIGRETURN_CODE + sizeof(sigreturn_codes))
extern const unsigned long sigreturn_codes[7];
+extern const unsigned long syscall_restart_code[2];
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 8200deaa14f..61302df56ef 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -17,11 +17,160 @@
#include <linux/percpu.h>
#include <linux/node.h>
#include <linux/nodemask.h>
+#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <asm/cputype.h>
#include <asm/topology.h>
+/*
+ * cpu power scale management
+ */
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+ per_cpu(cpu_scale, cpu) = power;
+}
+
+#ifdef CONFIG_OF
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit. The final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 2*SCHED_POWER_SCALE.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+struct cpu_efficiency table_efficiency[] = {
+ {"arm,cortex-a15", 3891},
+ {"arm,cortex-a7", 2048},
+ {NULL, },
+};
+
+struct cpu_capacity {
+ unsigned long hwid;
+ unsigned long capacity;
+};
+
+struct cpu_capacity *cpu_capacity;
+
+unsigned long middle_capacity = 1;
+
+static void __init parse_dt_topology(void)
+{
+ struct cpu_efficiency *cpu_eff;
+ struct device_node *cn = NULL;
+ unsigned long min_capacity = (unsigned long)(-1);
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int alloc_size, cpu = 0;
+
+ alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity);
+ cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT);
+
+ while ((cn = of_find_node_by_type(cn, "cpu"))) {
+ const u32 *rate, *reg;
+ int len;
+
+ if (cpu >= num_possible_cpus())
+ break;
+
+ for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+ if (of_device_is_compatible(cn, cpu_eff->compatible))
+ break;
+
+ if (cpu_eff->compatible == NULL)
+ continue;
+
+ rate = of_get_property(cn, "clock-frequency", &len);
+ if (!rate || len != 4) {
+ pr_err("%s missing clock-frequency property\n",
+ cn->full_name);
+ continue;
+ }
+
+ reg = of_get_property(cn, "reg", &len);
+ if (!reg || len != 4) {
+ pr_err("%s missing reg property\n", cn->full_name);
+ continue;
+ }
+
+ capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+ /* Save min capacity of the system */
+ if (capacity < min_capacity)
+ min_capacity = capacity;
+
+ /* Save max capacity of the system */
+ if (capacity > max_capacity)
+ max_capacity = capacity;
+
+ cpu_capacity[cpu].capacity = capacity;
+ cpu_capacity[cpu++].hwid = be32_to_cpup(reg);
+ }
+
+ if (cpu < num_possible_cpus())
+ cpu_capacity[cpu].hwid = (unsigned long)(-1);
+
+ middle_capacity = (min_capacity + max_capacity) >> 11;
+}
+
+void update_cpu_power(unsigned int cpu, unsigned long hwid)
+{
+ unsigned int idx = 0;
+
+ /* look for the cpu's hwid in the cpu capacity table */
+ for (idx = 0; idx < num_possible_cpus(); idx++) {
+ if (cpu_capacity[idx].hwid == hwid)
+ break;
+
+ if (cpu_capacity[idx].hwid == -1)
+ return;
+ }
+
+ if (idx == num_possible_cpus())
+ return;
+
+ set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity);
+
+ printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
+ cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+#else
+static inline void parse_dt_topology(void) {}
+static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
+#endif
+
+
+/*
+ * cpu topology management
+ */
+
#define MPIDR_SMP_BITMASK (0x3 << 30)
#define MPIDR_SMP_VALUE (0x2 << 30)
@@ -31,6 +180,7 @@
* These masks reflect the current use of the affinity levels.
* The affinity level can be up to 16 bits according to ARM ARM
*/
+#define MPIDR_HWID_BITMASK 0xFFFFFF
#define MPIDR_LEVEL0_MASK 0x3
#define MPIDR_LEVEL0_SHIFT 0
@@ -41,6 +191,9 @@
#define MPIDR_LEVEL2_MASK 0xFF
#define MPIDR_LEVEL2_SHIFT 16
+/*
+ * cpu topology table
+ */
struct cputopo_arm cpu_topology[NR_CPUS];
const struct cpumask *cpu_coregroup_mask(int cpu)
@@ -48,6 +201,32 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
return &cpu_topology[cpu].core_sibling;
}
+void update_siblings_masks(unsigned int cpuid)
+{
+ struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ int cpu;
+
+ /* update core and thread sibling masks */
+ for_each_possible_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+
+ if (cpuid_topo->socket_id != cpu_topo->socket_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->core_id != cpu_topo->core_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+ }
+ smp_wmb();
+}
+
/*
* store_cpu_topology is called at boot when only one cpu is running
* and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
@@ -57,7 +236,6 @@ void store_cpu_topology(unsigned int cpuid)
{
struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
unsigned int mpidr;
- unsigned int cpu;
/* If the cpu topology has been already set, just return */
if (cpuid_topo->core_id != -1)
@@ -99,26 +277,9 @@ void store_cpu_topology(unsigned int cpuid)
cpuid_topo->socket_id = -1;
}
- /* update core and thread sibling masks */
- for_each_possible_cpu(cpu) {
- struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
-
- if (cpuid_topo->socket_id == cpu_topo->socket_id) {
- cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
- if (cpu != cpuid)
- cpumask_set_cpu(cpu,
- &cpuid_topo->core_sibling);
-
- if (cpuid_topo->core_id == cpu_topo->core_id) {
- cpumask_set_cpu(cpuid,
- &cpu_topo->thread_sibling);
- if (cpu != cpuid)
- cpumask_set_cpu(cpu,
- &cpuid_topo->thread_sibling);
- }
- }
- }
- smp_wmb();
+ update_siblings_masks(cpuid);
+
+ update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK);
printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
cpuid, cpu_topology[cpuid].thread_id,
@@ -126,6 +287,75 @@ void store_cpu_topology(unsigned int cpuid)
cpu_topology[cpuid].socket_id, mpidr);
}
+
+#ifdef CONFIG_SCHED_HMP
+
+static const char * const little_cores[] = {
+ "arm,cortex-a7",
+ NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+ const char * const *lc;
+ for (lc = little_cores; *lc; lc++)
+ if (of_device_is_compatible(cn, *lc))
+ return true;
+ return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+ struct cpumask *slow)
+{
+ struct device_node *cn = NULL;
+ int cpu = 0;
+
+ cpumask_clear(fast);
+ cpumask_clear(slow);
+
+ /*
+ * Use the config options if they are given. This helps testing
+ * HMP scheduling on systems without a big.LITTLE architecture.
+ */
+ if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+ if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+ WARN(1, "Failed to parse HMP fast cpu mask!\n");
+ if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+ WARN(1, "Failed to parse HMP slow cpu mask!\n");
+ return;
+ }
+
+ /*
+ * Else, parse device tree for little cores.
+ */
+ while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+ if (cpu >= num_possible_cpus())
+ break;
+
+ if (is_little_cpu(cn))
+ cpumask_set_cpu(cpu, slow);
+ else
+ cpumask_set_cpu(cpu, fast);
+
+ cpu++;
+ }
+
+ if (!cpumask_empty(fast) && !cpumask_empty(slow))
+ return;
+
+ /*
+ * We didn't find both big and little cores so let's call all cores
+ * fast as this will keep the system running, with all cores being
+ * treated equal.
+ */
+ cpumask_setall(fast);
+ cpumask_clear(slow);
+}
+
+#endif /* CONFIG_SCHED_HMP */
+
+
/*
* init_cpu_topology is called at boot when only one cpu is running
* which prevent simultaneous write access to cpu_topology array
@@ -134,7 +364,7 @@ void init_cpu_topology(void)
{
unsigned int cpu;
- /* init core mask */
+ /* init core mask and power*/
for_each_possible_cpu(cpu) {
struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
@@ -143,6 +373,10 @@ void init_cpu_topology(void)
cpu_topo->socket_id = -1;
cpumask_clear(&cpu_topo->core_sibling);
cpumask_clear(&cpu_topo->thread_sibling);
+
+ set_power_scale(cpu, SCHED_POWER_SCALE);
}
smp_wmb();
+
+ parse_dt_topology();
}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 4928d89758f..3647170e9a1 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -820,6 +820,8 @@ void __init early_trap_init(void *vectors_base)
*/
memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
sigreturn_codes, sizeof(sigreturn_codes));
+ memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE),
+ syscall_restart_code, sizeof(syscall_restart_code));
flush_icache_range(vectors, vectors + PAGE_SIZE);
modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 43a31fb0631..36ff15bbfdd 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -183,7 +183,9 @@ SECTIONS
}
#endif
+#ifdef CONFIG_SMP
PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
#ifdef CONFIG_XIP_KERNEL
__data_loc = ALIGN(4); /* location in binary */