aboutsummaryrefslogtreecommitdiff
path: root/arch/arc/include
diff options
context:
space:
mode:
authorVineet Gupta <vgupta@synopsys.com>2013-01-18 15:12:23 +0530
committerVineet Gupta <vgupta@synopsys.com>2013-02-15 23:16:02 +0530
commit41195d236e84458bebd4fdc218610a92231ac791 (patch)
treec0049630c1a21a071c9c942086041029ebdf2866 /arch/arc/include
parent0ef88a54aa341f754707414500158addbf35c780 (diff)
ARC: SMP support
ARC common code to enable a SMP system + ISS provided SMP extensions. ARC700 natively lacks SMP support, hence some of the core features are are only enabled if SoCs have the necessary h/w pixie-dust. This includes: -Inter Processor Interrupts (IPI) -Cache coherency -load-locked/store-conditional ... The low level exception handling would be completely broken in SMP because we don't have hardware assisted stack switching. Thus a fair bit of this code is repurposing the MMU_SCRATCH reg for event handler prologues to keep them re-entrant. Many thanks to Rajeshwar Ranga for his initial "major" contributions to SMP Port (back in 2008), and to Noam Camus and Gilad Ben-Yossef for help with resurrecting that in 3.2 kernel (2012). Note that this platform code is again singleton design pattern - so multiple SMP platforms won't build at the moment - this deficiency is addressed in subsequent patches within this series. Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Rajeshwar Ranga <rajeshwar.ranga@gmail.com> Cc: Noam Camus <noamc@ezchip.com> Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Diffstat (limited to 'arch/arc/include')
-rw-r--r--arch/arc/include/asm/entry.h49
-rw-r--r--arch/arc/include/asm/mmu_context.h4
-rw-r--r--arch/arc/include/asm/mutex.h9
-rw-r--r--arch/arc/include/asm/pgtable.h4
-rw-r--r--arch/arc/include/asm/processor.h8
-rw-r--r--arch/arc/include/asm/smp.h107
6 files changed, 181 insertions, 0 deletions
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 23ef2de1e09..23daa326fc9 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -389,11 +389,19 @@
* to be saved again on kernel mode stack, as part of ptregs.
*-------------------------------------------------------------*/
.macro EXCPN_PROLOG_FREEUP_REG reg
+#ifdef CONFIG_SMP
+ sr \reg, [ARC_REG_SCRATCH_DATA0]
+#else
st \reg, [@ex_saved_reg1]
+#endif
.endm
.macro EXCPN_PROLOG_RESTORE_REG reg
+#ifdef CONFIG_SMP
+ lr \reg, [ARC_REG_SCRATCH_DATA0]
+#else
ld \reg, [@ex_saved_reg1]
+#endif
.endm
/*--------------------------------------------------------------
@@ -508,7 +516,11 @@
/* restore original r9 , saved in int1_saved_reg
* It will be saved on stack in macro: SAVE_CALLER_SAVED
*/
+#ifdef CONFIG_SMP
+ lr r9, [ARC_REG_SCRATCH_DATA0]
+#else
ld r9, [@int1_saved_reg]
+#endif
/* now we are ready to save the remaining context :) */
st orig_r8_IS_IRQ1, [sp, 8] /* Event Type */
@@ -639,6 +651,41 @@
bmsk \reg, \reg, 7
.endm
+#ifdef CONFIG_SMP
+
+/*-------------------------------------------------
+ * Retrieve the current running task on this CPU
+ * 1. Determine curr CPU id.
+ * 2. Use it to index into _current_task[ ]
+ */
+.macro GET_CURR_TASK_ON_CPU reg
+ GET_CPU_ID \reg
+ ld.as \reg, [@_current_task, \reg]
+.endm
+
+/*-------------------------------------------------
+ * Save a new task as the "current" task on this CPU
+ * 1. Determine curr CPU id.
+ * 2. Use it to index into _current_task[ ]
+ *
+ * Coded differently than GET_CURR_TASK_ON_CPU (which uses LD.AS)
+ * because ST r0, [r1, offset] can ONLY have s9 @offset
+ * while LD can take s9 (4 byte insn) or LIMM (8 byte insn)
+ */
+
+.macro SET_CURR_TASK_ON_CPU tsk, tmp
+ GET_CPU_ID \tmp
+ add2 \tmp, @_current_task, \tmp
+ st \tsk, [\tmp]
+#ifdef CONFIG_ARC_CURR_IN_REG
+ mov r25, \tsk
+#endif
+
+.endm
+
+
+#else /* Uniprocessor implementation of macros */
+
.macro GET_CURR_TASK_ON_CPU reg
ld \reg, [@_current_task]
.endm
@@ -650,6 +697,8 @@
#endif
.endm
+#endif /* SMP / UNI */
+
/* ------------------------------------------------------------------
* Get the ptr to some field of Current Task at @off in task struct
* -Uses r25 for Current task ptr if that is enabled
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index d12f3dec8b7..0d71fb11b57 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -147,8 +147,10 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
+#ifndef CONFIG_SMP
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
+#endif
/*
* Get a new ASID if task doesn't have a valid one. Possible when
@@ -197,7 +199,9 @@ static inline void destroy_context(struct mm_struct *mm)
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
+#ifndef CONFIG_SMP
write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
+#endif
/* Unconditionally get a new ASID */
get_new_mmu_context(next);
diff --git a/arch/arc/include/asm/mutex.h b/arch/arc/include/asm/mutex.h
index 3be5e64da13..a2f88ff9f50 100644
--- a/arch/arc/include/asm/mutex.h
+++ b/arch/arc/include/asm/mutex.h
@@ -6,4 +6,13 @@
* published by the Free Software Foundation.
*/
+/*
+ * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to
+ * atomic dec based which can "count" any number of lock contenders.
+ * This ideally needs to be fixed in core, but for now switching to dec ver.
+ */
+#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2)
+#include <asm-generic/mutex-dec.h>
+#else
#include <asm-generic/mutex-xchg.h>
+#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index dcb0701528a..b7e36684c09 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -354,11 +354,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* Thus use this macro only when you are certain that "current" is current
* e.g. when dealing with signal frame setup code etc
*/
+#ifndef CONFIG_SMP
#define pgd_offset_fast(mm, addr) \
({ \
pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \
pgd_base + pgd_index(addr); \
})
+#else
+#define pgd_offset_fast(mm, addr) pgd_offset(mm, addr)
+#endif
extern void paging_init(void);
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index b7b15561006..5f26b2c1cba 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -58,7 +58,15 @@ unsigned long thread_saved_pc(struct task_struct *t);
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
+/*
+ * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
+ * get optimised away by gcc
+ */
+#ifdef CONFIG_SMP
+#define cpu_relax() __asm__ __volatile__ ("" : : : "memory")
+#else
#define cpu_relax() do { } while (0)
+#endif
#define copy_segments(tsk, mm) do { } while (0)
#define release_segments(mm) do { } while (0)
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 4341f3ba7d9..f91f1946272 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -9,6 +9,69 @@
#ifndef __ASM_ARC_SMP_H
#define __ASM_ARC_SMP_H
+#ifdef CONFIG_SMP
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+/* including cpumask.h leads to cyclic deps hence this Forward declaration */
+struct cpumask;
+
+/*
+ * APIs provided by arch SMP code to generic code
+ */
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+/*
+ * APIs provided by arch SMP code to rest of arch code
+ */
+extern void __init smp_init_cpus(void);
+extern void __init first_lines_of_secondary(void);
+
+/*
+ * API expected BY platform smp code (FROM arch smp code)
+ *
+ * smp_ipi_irq_setup:
+ * Takes @cpu and @irq to which the arch-common ISR is hooked up
+ */
+extern int smp_ipi_irq_setup(int cpu, int irq);
+
+/*
+ * APIs expected FROM platform smp code
+ *
+ * arc_platform_smp_cpuinfo:
+ * returns a string containing info for /proc/cpuinfo
+ *
+ * arc_platform_smp_init_cpu:
+ * Called from start_kernel_secondary to do any CPU local setup
+ * such as starting a timer, setting up IPI etc
+ *
+ * arc_platform_smp_wait_to_boot:
+ * Called from early bootup code for non-Master CPUs to "park" them
+ *
+ * arc_platform_smp_wakeup_cpu:
+ * Called from __cpu_up (Master CPU) to kick start another one
+ *
+ * arc_platform_ipi_send:
+ * Takes @cpumask to which IPI(s) would be sent.
+ * The actual msg-id/buffer is manager in arch-common code
+ *
+ * arc_platform_ipi_clear:
+ * Takes @cpu which got IPI at @irq to do any IPI clearing
+ */
+extern const char *arc_platform_smp_cpuinfo(void);
+extern void arc_platform_smp_init_cpu(void);
+extern void arc_platform_smp_wait_to_boot(int cpu);
+extern void arc_platform_smp_wakeup_cpu(int cpu, unsigned long pc);
+extern void arc_platform_ipi_send(const struct cpumask *callmap);
+extern void arc_platform_ipi_clear(int cpu, int irq);
+
+#endif /* CONFIG_SMP */
+
/*
* ARC700 doesn't support atomic Read-Modify-Write ops.
* Originally Interrupts had to be disabled around code to gaurantee atomicity.
@@ -18,10 +81,52 @@
*
* (1) These insn were introduced only in 4.10 release. So for older released
* support needed.
+ *
+ * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be
+ * gaurantted by the platform (not something which core handles).
+ * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
+ * disabling for atomicity.
+ *
+ * However exported spinlock API is not usable due to cyclic hdr deps
+ * (even after system.h disintegration upstream)
+ * asm/bitops.h -> linux/spinlock.h -> linux/preempt.h
+ * -> linux/thread_info.h -> linux/bitops.h -> asm/bitops.h
+ *
+ * So the workaround is to use the lowest level arch spinlock API.
+ * The exported spinlock API is smart enough to be NOP for !CONFIG_SMP,
+ * but same is not true for ARCH backend, hence the need for 2 variants
*/
#ifndef CONFIG_ARC_HAS_LLSC
#include <linux/irqflags.h>
+#ifdef CONFIG_SMP
+
+#include <asm/spinlock.h>
+
+extern arch_spinlock_t smp_atomic_ops_lock;
+extern arch_spinlock_t smp_bitops_lock;
+
+#define atomic_ops_lock(flags) do { \
+ local_irq_save(flags); \
+ arch_spin_lock(&smp_atomic_ops_lock); \
+} while (0)
+
+#define atomic_ops_unlock(flags) do { \
+ arch_spin_unlock(&smp_atomic_ops_lock); \
+ local_irq_restore(flags); \
+} while (0)
+
+#define bitops_lock(flags) do { \
+ local_irq_save(flags); \
+ arch_spin_lock(&smp_bitops_lock); \
+} while (0)
+
+#define bitops_unlock(flags) do { \
+ arch_spin_unlock(&smp_bitops_lock); \
+ local_irq_restore(flags); \
+} while (0)
+
+#else /* !CONFIG_SMP */
#define atomic_ops_lock(flags) local_irq_save(flags)
#define atomic_ops_unlock(flags) local_irq_restore(flags)
@@ -29,6 +134,8 @@
#define bitops_lock(flags) local_irq_save(flags)
#define bitops_unlock(flags) local_irq_restore(flags)
+#endif /* !CONFIG_SMP */
+
#endif /* !CONFIG_ARC_HAS_LLSC */
#endif