From a55ee1ff751f88252207160087d8197bb7538d4c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Feb 2013 12:56:18 -0800
Subject: sparc64: Fix gfp_flags setting in tsb_grow().

We should "|= more_flags" rather than "= more_flags".

Reported-by: David Rientjes <rientjes@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/tsb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 7f647434749..428982b9bec 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -314,7 +314,7 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 retry_tsb_alloc:
 	gfp_flags = GFP_KERNEL;
 	if (new_size > (PAGE_SIZE * 2))
-		gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
+		gfp_flags |= __GFP_NOWARN | __GFP_NORETRY;
 
 	new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
 					gfp_flags, numa_node_id());
-- 
cgit v1.2.3


From bcd896bae0166b4443503482a26ecf84d9ba60ab Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Feb 2013 13:20:08 -0800
Subject: sparc64: Handle hugepage TSB being NULL.

Accomodate the possibility that the TSB might be NULL at
the point that update_mmu_cache() is invoked.  This is
necessary because we will sometimes need to defer the TSB
allocation to the first fault that happens in the 'mm'.

Seperate out the hugepage PTE test into a seperate function
so that the logic is clearer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/init_64.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index c3b72423c84..0d0bc392c35 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -314,16 +314,31 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde
 	struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
 	unsigned long tag;
 
+	if (unlikely(!tsb))
+		return;
+
 	tsb += ((address >> tsb_hash_shift) &
 		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
 	tag = (address >> 22UL);
 	tsb_insert(tsb, tag, tte);
 }
 
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static inline bool is_hugetlb_pte(pte_t pte)
+{
+	if ((tlb_type == hypervisor &&
+	     (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
+	    (tlb_type != hypervisor &&
+	     (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U))
+		return true;
+	return false;
+}
+#endif
+
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
-	unsigned long tsb_index, tsb_hash_shift, flags;
 	struct mm_struct *mm;
+	unsigned long flags;
 	pte_t pte = *ptep;
 
 	if (tlb_type != hypervisor) {
@@ -335,25 +350,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 	mm = vma->vm_mm;
 
-	tsb_index = MM_TSB_BASE;
-	tsb_hash_shift = PAGE_SHIFT;
-
 	spin_lock_irqsave(&mm->context.lock, flags);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
-		if ((tlb_type == hypervisor &&
-		     (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
-		    (tlb_type != hypervisor &&
-		     (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) {
-			tsb_index = MM_TSB_HUGE;
-			tsb_hash_shift = HPAGE_SHIFT;
-		}
-	}
+	if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
+		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+					address, pte_val(pte));
+	else
 #endif
-
-	__update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
-				address, pte_val(pte));
+		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
+					address, pte_val(pte));
 
 	spin_unlock_irqrestore(&mm->context.lock, flags);
 }
-- 
cgit v1.2.3


From 0fbebed682ff2788dee58e8d7f7dda46e33aa10b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Feb 2013 22:34:10 -0800
Subject: sparc64: Fix tsb_grow() in atomic context.

If our first THP installation for an MM is via the set_pmd_at() done
during khugepaged's collapsing we'll end up in tsb_grow() trying to do
a GFP_KERNEL allocation with several locks held.

Simply using GFP_ATOMIC in this situation is not the best option
because we really can't have this fail, so we'd really like to keep
this an order 0 GFP_KERNEL allocation if possible.

Also, doing the TSB allocation from khugepaged is a really bad idea
because we'll allocate it potentially from the wrong NUMA node in that
context.

So what we do is defer the hugepage TSB allocation until the first TLB
miss we take on a hugepage.  This is slightly tricky because we have
to handle two unusual cases:

1) Taking the first hugepage TLB miss in the window trap handler.
   We'll call the winfix_trampoline when that is detected.

2) An initial TSB allocation via TLB miss races with a hugetlb
   fault on another cpu running the same MM.  We handle this by
   unconditionally loading the TSB we see into the current cpu
   even if it's non-NULL at hugetlb_setup time.

Reported-by: Meelis Roos <mroos@ut.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/hugetlb.h |  1 -
 arch/sparc/include/asm/page_64.h |  4 ++--
 arch/sparc/kernel/tsb.S          | 39 +++++++++++++++++++++++++++++++++++----
 arch/sparc/mm/fault_64.c         |  9 +++++++--
 arch/sparc/mm/init_64.c          | 24 +++++++++++++++++++-----
 arch/sparc/mm/tlb.c              | 11 +++++++++--
 6 files changed, 72 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 9661e9bc7bb..7eb57d24504 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -12,7 +12,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 {
-	hugetlb_setup(mm);
 }
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 4b39f74d6ca..e15538899f3 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -27,8 +27,8 @@
 #ifndef __ASSEMBLY__
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-struct mm_struct;
-extern void hugetlb_setup(struct mm_struct *mm);
+struct pt_regs;
+extern void hugetlb_setup(struct pt_regs *regs);
 #endif
 
 #define WANT_PAGE_VIRTUAL
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index d4bdc7a6237..a313e4a9399 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -136,12 +136,43 @@ tsb_miss_page_table_walk_sun4v_fastpath:
 	 nop
 
 	/* It is a huge page, use huge page TSB entry address we
-	 * calculated above.
+	 * calculated above.  If the huge page TSB has not been
+	 * allocated, setup a trap stack and call hugetlb_setup()
+	 * to do so, then return from the trap to replay the TLB
+	 * miss.
+	 *
+	 * This is necessary to handle the case of transparent huge
+	 * pages where we don't really have a non-atomic context
+	 * in which to allocate the hugepage TSB hash table.  When
+	 * the 'mm' faults in the hugepage for the first time, we
+	 * thus handle it here.  This also makes sure that we can
+	 * allocate the TSB hash table on the correct NUMA node.
 	 */
 	TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
-	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g2
-	cmp		%g2, -1
-	movne		%xcc, %g2, %g1
+	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1
+	cmp		%g1, -1
+	bne,pt		%xcc, 60f
+	 nop
+
+661:	rdpr		%pstate, %g5
+	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	SET_GL(1)
+	nop
+	.previous
+
+	rdpr	%tl, %g3
+	cmp	%g3, 1
+	bne,pn	%xcc, winfix_trampoline
+	 nop
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	call	hugetlb_setup
+	 add	%sp, PTREGS_OFF, %o0
+	ba,pt	%xcc, rtrap
+	 nop
+
 60:
 #endif
 
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 097aee763af..5062ff389e8 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -472,8 +472,13 @@ good_area:
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	mm_rss = mm->context.huge_pte_count;
 	if (unlikely(mm_rss >
-		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
-		tsb_grow(mm, MM_TSB_HUGE, mm_rss);
+		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
+		if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
+			tsb_grow(mm, MM_TSB_HUGE, mm_rss);
+		else
+			hugetlb_setup(regs);
+
+	}
 #endif
 	return;
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 0d0bc392c35..82bbf048a5b 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2718,14 +2718,28 @@ static void context_reload(void *__data)
 		load_secondary_context(mm);
 }
 
-void hugetlb_setup(struct mm_struct *mm)
+void hugetlb_setup(struct pt_regs *regs)
 {
-	struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
+	struct mm_struct *mm = current->mm;
+	struct tsb_config *tp;
 
-	if (likely(tp->tsb != NULL))
-		return;
+	if (in_atomic() || !mm) {
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+		pr_alert("Unexpected HugeTLB setup in atomic context.\n");
+		die_if_kernel("HugeTSB in atomic", regs);
+	}
+
+	tp = &mm->context.tsb_block[MM_TSB_HUGE];
+	if (likely(tp->tsb == NULL))
+		tsb_grow(mm, MM_TSB_HUGE, 0);
 
-	tsb_grow(mm, MM_TSB_HUGE, 0);
 	tsb_context_switch(mm);
 	smp_tsb_sync(mm);
 
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 3e8fec391fe..ba6ae7ffdc2 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -135,8 +135,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			mm->context.huge_pte_count++;
 		else
 			mm->context.huge_pte_count--;
-		if (mm->context.huge_pte_count == 1)
-			hugetlb_setup(mm);
+
+		/* Do not try to allocate the TSB hash table if we
+		 * don't have one already.  We have various locks held
+		 * and thus we'll end up doing a GFP_KERNEL allocation
+		 * in an atomic context.
+		 *
+		 * Instead, we let the first TLB miss on a hugepage
+		 * take care of this.
+		 */
 	}
 
 	if (!pmd_none(orig)) {
-- 
cgit v1.2.3


From 76968ad2eac6456270353de168b21f04f4b3d1d3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 20 Feb 2013 12:38:40 -0800
Subject: sparc64: Fix huge PMD to PTE translation for sun4u in TLB miss
 handler.

When we set the sun4u version of the PTE execute bit, it's:

	or	REG, _PAGE_EXEC_4U, REG

_PAGE_EXEC_4U is 0x1000, unfortunately the immedate field of the
'or' instruction is a signed 13-bit value.  So the above actually
assembles into:

	or	REG, -4096, REG

completely corrupting the final PTE value.

Set it with a:

	sethi	%hi(_PAGE_EXEC_4U), TMP
	or	REG, TMP, REG

sequence instead.

This fixes "git gc" crashes on sun4u machines.

Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/tsb.h | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index b4c258de444..e696432b950 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -157,17 +157,26 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	andn		REG2, 0x7, REG2; \
 	add		REG1, REG2, REG1;
 
-	/* This macro exists only to make the PMD translator below easier
-	 * to read.  It hides the ELF section switch for the sun4v code
-	 * patching.
+	/* These macros exists only to make the PMD translator below
+	 * easier to read.  It hides the ELF section switch for the
+	 * sun4v code patching.
 	 */
-#define OR_PTE_BIT(REG, NAME)				\
+#define OR_PTE_BIT_1INSN(REG, NAME)			\
 661:	or		REG, _PAGE_##NAME##_4U, REG;	\
 	.section	.sun4v_1insn_patch, "ax";	\
 	.word		661b;				\
 	or		REG, _PAGE_##NAME##_4V, REG;	\
 	.previous;
 
+#define OR_PTE_BIT_2INSN(REG, TMP, NAME)		\
+661:	sethi		%hi(_PAGE_##NAME##_4U), TMP;	\
+	or		REG, TMP, REG;			\
+	.section	.sun4v_2insn_patch, "ax";	\
+	.word		661b;				\
+	mov		-1, TMP;			\
+	or		REG, _PAGE_##NAME##_4V, REG;	\
+	.previous;
+
 	/* Load into REG the PTE value for VALID, CACHE, and SZHUGE.  */
 #define BUILD_PTE_VALID_SZHUGE_CACHE(REG)				   \
 661:	sethi		%uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG;		   \
@@ -214,12 +223,13 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 andn		REG1, PMD_HUGE_PROTBITS, REG2;			      \
 	sllx		REG2, PMD_PADDR_SHIFT, REG2;			      \
 	/* REG2 now holds PFN << PAGE_SHIFT */				      \
-	andcc		REG1, PMD_HUGE_EXEC, %g0;			      \
-	bne,a,pt	%xcc, 1f;					      \
-	 OR_PTE_BIT(REG2, EXEC);					      \
-1:	andcc		REG1, PMD_HUGE_WRITE, %g0;			      \
+	andcc		REG1, PMD_HUGE_WRITE, %g0;			      \
 	bne,a,pt	%xcc, 1f;					      \
-	 OR_PTE_BIT(REG2, W);						      \
+	 OR_PTE_BIT_1INSN(REG2, W);					      \
+1:	andcc		REG1, PMD_HUGE_EXEC, %g0;			      \
+	be,pt		%xcc, 1f;					      \
+	 nop;								      \
+	OR_PTE_BIT_2INSN(REG2, REG1, EXEC);				      \
 	/* REG1 can now be clobbered, build final PTE */		      \
 1:	BUILD_PTE_VALID_SZHUGE_CACHE(REG1);				      \
 	ba,pt		%xcc, PTE_LABEL;				      \
-- 
cgit v1.2.3


From f9fd3488f6a3c2c5cc8613e4fd7fbbaa57f6bf8f Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 15 Feb 2013 15:52:06 +0100
Subject: sparc32: refactor smp boot

Introduce a common smp_callin() function to call
from trampoline_32.S.
Add platform specific functions to handle the
platform details.

This is in preparation for a patch that will
unify the smp boot stuff for all architectures.
sparc32 was significantly different to warrant
this patch in preparation.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/kernel.h        | 12 ++++++
 arch/sparc/kernel/leon_smp.c      | 33 ++++-----------
 arch/sparc/kernel/smp_32.c        | 86 +++++++++++++++++++++++++++++++++++++++
 arch/sparc/kernel/sun4d_smp.c     | 29 ++++---------
 arch/sparc/kernel/sun4m_smp.c     | 33 ++++-----------
 arch/sparc/kernel/trampoline_32.S | 17 ++++----
 6 files changed, 132 insertions(+), 78 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 291bb5de9ce..a702d9ab019 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -48,6 +48,10 @@ extern void sun4m_init_IRQ(void);
 extern void sun4m_unmask_profile_irq(void);
 extern void sun4m_clear_profile_irq(int cpu);
 
+/* sun4m_smp.c */
+void sun4m_cpu_pre_starting(void *arg);
+void sun4m_cpu_pre_online(void *arg);
+
 /* sun4d_irq.c */
 extern spinlock_t sun4d_imsk_lock;
 
@@ -60,6 +64,14 @@ extern int show_sun4d_interrupts(struct seq_file *, void *);
 extern void sun4d_distribute_irqs(void);
 extern void sun4d_free_irq(unsigned int irq, void *dev_id);
 
+/* sun4d_smp.c */
+void sun4d_cpu_pre_starting(void *arg);
+void sun4d_cpu_pre_online(void *arg);
+
+/* leon_smp.c */
+void leon_cpu_pre_starting(void *arg);
+void leon_cpu_pre_online(void *arg);
+
 /* head_32.S */
 extern unsigned int t_nmi[];
 extern unsigned int linux_trap_ipi15_sun4d[];
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 0f3fb6d9c8e..9b40c9c12a0 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -69,31 +69,19 @@ static inline unsigned long do_swap(volatile unsigned long *ptr,
 	return val;
 }
 
-void __cpuinit leon_callin(void)
+void __cpuinit leon_cpu_pre_starting(void *arg)
 {
-	int cpuid = hard_smp_processor_id();
-
-	local_ops->cache_all();
-	local_ops->tlb_all();
 	leon_configure_cache_smp();
+}
 
-	notify_cpu_starting(cpuid);
-
-	/* Get our local ticker going. */
-	register_percpu_ce(cpuid);
-
-	calibrate_delay();
-	smp_store_cpu_info(cpuid);
-
-	local_ops->cache_all();
-	local_ops->tlb_all();
+void __cpuinit leon_cpu_pre_online(void *arg)
+{
+	int cpuid = hard_smp_processor_id();
 
-	/*
-	 * Unblock the master CPU _only_ when the scheduler state
-	 * of all secondary CPUs will be up-to-date, so after
-	 * the SMP initialization the master will be just allowed
-	 * to call the scheduler code.
-	 * Allow master to continue.
+	/* Allow master to continue. The master will then give us the
+	 * go-ahead by setting the smp_commenced_mask and will wait without
+	 * timeouts until our setup is completed fully (signified by
+	 * our bit being set in the cpu_online_mask).
 	 */
 	do_swap(&cpu_callin_map[cpuid], 1);
 
@@ -110,9 +98,6 @@ void __cpuinit leon_callin(void)
 
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		mb();
-
-	local_irq_enable();
-	set_cpu_online(cpuid, true);
 }
 
 /*
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 79db45e5134..9e7e6d71836 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/cache.h>
 #include <linux/delay.h>
+#include <linux/cpu.h>
 
 #include <asm/ptrace.h>
 #include <linux/atomic.h>
@@ -32,8 +33,10 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/cpudata.h>
+#include <asm/timer.h>
 #include <asm/leon.h>
 
+#include "kernel.h"
 #include "irq.h"
 
 volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
@@ -294,6 +297,89 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	return ret;
 }
 
+void __cpuinit arch_cpu_pre_starting(void *arg)
+{
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		sun4m_cpu_pre_starting(arg);
+		break;
+	case sun4d:
+		sun4d_cpu_pre_starting(arg);
+		break;
+	case sparc_leon:
+		leon_cpu_pre_starting(arg);
+		break;
+	default:
+		BUG();
+	}
+}
+
+void __cpuinit arch_cpu_pre_online(void *arg)
+{
+	unsigned int cpuid = hard_smp_processor_id();
+
+	register_percpu_ce(cpuid);
+
+	calibrate_delay();
+	smp_store_cpu_info(cpuid);
+
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		sun4m_cpu_pre_online(arg);
+		break;
+	case sun4d:
+		sun4d_cpu_pre_online(arg);
+		break;
+	case sparc_leon:
+		leon_cpu_pre_online(arg);
+		break;
+	default:
+		BUG();
+	}
+}
+
+void __cpuinit sparc_start_secondary(void *arg)
+{
+	unsigned int cpu;
+
+	/*
+	 * SMP booting is extremely fragile in some architectures. So run
+	 * the cpu initialization code first before anything else.
+	 */
+	arch_cpu_pre_starting(arg);
+
+	preempt_disable();
+	cpu = smp_processor_id();
+
+	/* Invoke the CPU_STARTING notifier callbacks */
+	notify_cpu_starting(cpu);
+
+	arch_cpu_pre_online(arg);
+
+	/* Set the CPU in the cpu_online_mask */
+	set_cpu_online(cpu, true);
+
+	/* Enable local interrupts now */
+	local_irq_enable();
+
+	wmb();
+	cpu_idle();
+
+	/* We should never reach here! */
+	BUG();
+}
+
+void __cpuinit smp_callin(void)
+{
+	sparc_start_secondary(NULL);
+}
+
 void smp_bogo(struct seq_file *m)
 {
 	int i;
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index ddaea31de58..c9eb82f23d9 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -50,10 +50,9 @@ static inline void show_leds(int cpuid)
 			      "i" (ASI_M_CTL));
 }
 
-void __cpuinit smp4d_callin(void)
+void __cpuinit sun4d_cpu_pre_starting(void *arg)
 {
 	int cpuid = hard_smp_processor_id();
-	unsigned long flags;
 
 	/* Show we are alive */
 	cpu_leds[cpuid] = 0x6;
@@ -61,26 +60,20 @@ void __cpuinit smp4d_callin(void)
 
 	/* Enable level15 interrupt, disable level14 interrupt for now */
 	cc_set_imsk((cc_get_imsk() & ~0x8000) | 0x4000);
+}
 
-	local_ops->cache_all();
-	local_ops->tlb_all();
+void __cpuinit sun4d_cpu_pre_online(void *arg)
+{
+	unsigned long flags;
+	int cpuid;
 
-	notify_cpu_starting(cpuid);
-	/*
-	 * Unblock the master CPU _only_ when the scheduler state
+	cpuid = hard_smp_processor_id();
+
+	/* Unblock the master CPU _only_ when the scheduler state
 	 * of all secondary CPUs will be up-to-date, so after
 	 * the SMP initialization the master will be just allowed
 	 * to call the scheduler code.
 	 */
-	/* Get our local ticker going. */
-	register_percpu_ce(cpuid);
-
-	calibrate_delay();
-	smp_store_cpu_info(cpuid);
-	local_ops->cache_all();
-	local_ops->tlb_all();
-
-	/* Allow master to continue. */
 	sun4d_swap((unsigned long *)&cpu_callin_map[cpuid], 1);
 	local_ops->cache_all();
 	local_ops->tlb_all();
@@ -106,16 +99,12 @@ void __cpuinit smp4d_callin(void)
 	local_ops->cache_all();
 	local_ops->tlb_all();
 
-	local_irq_enable();	/* We don't allow PIL 14 yet */
-
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		barrier();
 
 	spin_lock_irqsave(&sun4d_imsk_lock, flags);
 	cc_set_imsk(cc_get_imsk() & ~0x4000); /* Allow PIL 14 as well */
 	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
-	set_cpu_online(cpuid, true);
-
 }
 
 /*
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 128af730428..8a65f158153 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -34,30 +34,19 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val)
 	return val;
 }
 
-void __cpuinit smp4m_callin(void)
+void __cpuinit sun4m_cpu_pre_starting(void *arg)
 {
-	int cpuid = hard_smp_processor_id();
-
-	local_ops->cache_all();
-	local_ops->tlb_all();
-
-	notify_cpu_starting(cpuid);
-
-	register_percpu_ce(cpuid);
-
-	calibrate_delay();
-	smp_store_cpu_info(cpuid);
+}
 
-	local_ops->cache_all();
-	local_ops->tlb_all();
+void __cpuinit sun4m_cpu_pre_online(void *arg)
+{
+	int cpuid = hard_smp_processor_id();
 
-	/*
-	 * Unblock the master CPU _only_ when the scheduler state
-	 * of all secondary CPUs will be up-to-date, so after
-	 * the SMP initialization the master will be just allowed
-	 * to call the scheduler code.
+	/* Allow master to continue. The master will then give us the
+	 * go-ahead by setting the smp_commenced_mask and will wait without
+	 * timeouts until our setup is completed fully (signified by
+	 * our bit being set in the cpu_online_mask).
 	 */
-	/* Allow master to continue. */
 	swap_ulong(&cpu_callin_map[cpuid], 1);
 
 	/* XXX: What's up with all the flushes? */
@@ -75,10 +64,6 @@ void __cpuinit smp4m_callin(void)
 
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		mb();
-
-	local_irq_enable();
-
-	set_cpu_online(cpuid, true);
 }
 
 /*
diff --git a/arch/sparc/kernel/trampoline_32.S b/arch/sparc/kernel/trampoline_32.S
index af27acab448..6cdb08cdabf 100644
--- a/arch/sparc/kernel/trampoline_32.S
+++ b/arch/sparc/kernel/trampoline_32.S
@@ -79,18 +79,15 @@ cpu3_startup:
 	 nop
 
 	/* Start this processor. */
-	call	smp4m_callin
+	call	smp_callin
 	 nop
 
-	b,a	smp_do_cpu_idle
+	b,a	smp_panic
 
 	.text
 	.align	4
 
-smp_do_cpu_idle:
-	call	cpu_idle
-	 mov	0, %o0
-
+smp_panic:
 	call	cpu_panic
 	 nop
 
@@ -144,10 +141,10 @@ sun4d_cpu_startup:
 	 nop
 
 	/* Start this processor. */
-	call	smp4d_callin
+	call	smp_callin
 	 nop
 
-	b,a	smp_do_cpu_idle
+	b,a	smp_panic
 
 	__CPUINIT
 	.align	4
@@ -201,7 +198,7 @@ leon_smp_cpu_startup:
 	 nop
 
 	/* Start this processor. */
-	call	leon_callin
+	call	smp_callin
 	 nop
 
-	b,a	smp_do_cpu_idle
+	b,a	smp_panic
-- 
cgit v1.2.3