aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2005-03-30 22:56:22 -0800
committerDavid S. Miller <davem@sunset.davemloft.net>2005-03-30 22:56:22 -0800
commitf4b9bfe34026406a19d58626b14f1a86454adf30 (patch)
treedaf14fde3032b7fc469b023b13a10940ba00a7bc
parent52525c94dbad620c16d8378812be7e73caaea22d (diff)
downloadhistory-f4b9bfe34026406a19d58626b14f1a86454adf30.tar.gz
[SPARC64]: Put per-cpu area base into register g5.
FINALLY, we can put the per-cpu base into register g5 on SMP. There are many simplifications and improvements now, but this is the base implementation. Thanks to Rusty and the IA64 folks for urging that I pursue this kind of scheme instead of locking stuff into the TLB at some fixed virtual address. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/Makefile4
-rw-r--r--arch/sparc64/kernel/etrap.S19
-rw-r--r--arch/sparc64/kernel/head.S42
-rw-r--r--arch/sparc64/kernel/rtrap.S5
-rw-r--r--arch/sparc64/kernel/semaphore.c6
-rw-r--r--arch/sparc64/kernel/smp.c9
-rw-r--r--arch/sparc64/kernel/unaligned.c2
-rw-r--r--arch/sparc64/kernel/winfixup.S21
-rw-r--r--arch/sparc64/prom/map.S2
-rw-r--r--include/asm-sparc64/cpudata.h3
-rw-r--r--include/asm-sparc64/percpu.h41
-rw-r--r--include/asm-sparc64/pgalloc.h2
-rw-r--r--include/asm-sparc64/system.h2
-rw-r--r--include/asm-sparc64/tlb.h2
14 files changed, 124 insertions, 36 deletions
diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile
index 61724880f20d3e..43fe382da07892 100644
--- a/arch/sparc64/Makefile
+++ b/arch/sparc64/Makefile
@@ -41,10 +41,10 @@ endif
ifneq ($(NEW_GCC),y)
CFLAGS := $(CFLAGS) -pipe -mno-fpu -mtune=ultrasparc -mmedlow \
- -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare
+ -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
else
CFLAGS := $(CFLAGS) -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow \
- -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare \
+ -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare \
$(CC_UNDECL)
AFLAGS += -m64 -mcpu=ultrasparc $(CC_UNDECL)
endif
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 058c505a322358..52cde3a2623133 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -102,11 +102,15 @@ cplus_etrap_insn_2:
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g3
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g3, 3, %g3
+ ldx [%g2 + %g3], %g5
+#endif
jmpl %l2 + 0x4, %g0
ldx [%g6 + TI_TASK], %g4
- nop
- nop
- nop
3: ldub [%l6 + TI_FPDEPTH], %l5
add %l6, TI_FPSAVED + 1, %l4
@@ -254,10 +258,15 @@ cplus_etrap_insn_4:
stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
mov %l6, %g6
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g3
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g3, 3, %g3
+ ldx [%g2 + %g3], %g5
+#endif
ldx [%g6 + TI_TASK], %g4
done
- nop
- nop
#undef TASK_REGOFF
#undef ETRAP_PSTATE1
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 0d6f58dad2db7c..954093551597f8 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -89,8 +89,8 @@ sparc_ramdisk_image64:
* PROM entry point is on %o4
*/
sparc64_boot:
- BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_boot)
- BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_boot)
+ BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
ba,pt %xcc, spitfire_boot
nop
@@ -103,11 +103,11 @@ cheetah_boot:
mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
wr %g1, %asr18
- sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
- or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
- sllx %g5, 32, %g5
- or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
- stxa %g5, [%g0] ASI_DCU_CONTROL_REG
+ sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ or %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ sllx %g7, 32, %g7
+ or %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
+ stxa %g7, [%g0] ASI_DCU_CONTROL_REG
membar #Sync
cheetah_generic_boot:
@@ -492,7 +492,7 @@ sun4u_init:
stxa %g3, [%g2] ASI_DMMU
membar #Sync
- BRANCH_IF_ANY_CHEETAH(g1,g5,cheetah_tlb_fixup)
+ BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup)
ba,pt %xcc, spitfire_tlb_fixup
nop
@@ -520,8 +520,8 @@ cheetah_tlb_fixup:
mov 1, %g2 /* Set TLB type to cheetah. */
-1: sethi %hi(tlb_type), %g5
- stw %g2, [%g5 + %lo(tlb_type)]
+1: sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
ba,pt %xcc, 2f
@@ -567,8 +567,8 @@ spitfire_tlb_fixup:
/* Set TLB type to spitfire. */
mov 0, %g2
- sethi %hi(tlb_type), %g5
- stw %g2, [%g5 + %lo(tlb_type)]
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
tlb_fixup_done:
sethi %hi(init_thread_union), %g6
@@ -596,12 +596,18 @@ tlb_fixup_done:
#endif
wr %g0, ASI_P, %asi
- mov 1, %g5
- sllx %g5, THREAD_SHIFT, %g5
- sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
- add %g6, %g5, %sp
+ mov 1, %g1
+ sllx %g1, THREAD_SHIFT, %g1
+ sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+ add %g6, %g1, %sp
mov 0, %fp
+ /* Set per-cpu pointer initially to zero, this makes
+ * the boot-cpu use the in-kernel-image per-cpu areas
+ * before setup_per_cpu_area() is invoked.
+ */
+ clr %g5
+
wrpr %g0, 0, %wstate
wrpr %g0, 0x0, %tl
@@ -637,8 +643,8 @@ setup_tba: /* i0 = is_starfire */
rdpr %pstate, %o1
mov %g6, %o2
wrpr %o1, (PSTATE_AG|PSTATE_IE), %pstate
- sethi %hi(sparc64_ttable_tl0), %g5
- wrpr %g5, %tba
+ sethi %hi(sparc64_ttable_tl0), %g1
+ wrpr %g1, %tba
mov %o2, %g6
/* Set up MMU globals */
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index 42e396112cfaa7..e917752080062b 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -222,8 +222,9 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
- ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
- ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
+ brz,a,pn %l3, 1f
+ ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
+1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
index 8094808d5ba5ce..63496c43fe1736 100644
--- a/arch/sparc64/kernel/semaphore.c
+++ b/arch/sparc64/kernel/semaphore.c
@@ -83,7 +83,7 @@ void up(struct semaphore *sem)
" restore\n"
" .previous\n"
: : "r" (sem), "i" (__up)
- : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+ : "g1", "g2", "g3", "g7", "memory", "cc");
}
static void __sched __down(struct semaphore * sem)
@@ -140,7 +140,7 @@ void __sched down(struct semaphore *sem)
" restore\n"
" .previous\n"
: : "r" (sem), "i" (__down)
- : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+ : "g1", "g2", "g3", "g7", "memory", "cc");
}
int down_trylock(struct semaphore *sem)
@@ -246,6 +246,6 @@ int __sched down_interruptible(struct semaphore *sem)
" .previous\n"
: "=r" (ret)
: "0" (ret), "r" (sem), "i" (__down_interruptible)
- : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+ : "g1", "g2", "g3", "g7", "memory", "cc");
return ret;
}
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 1cbc02aa27dd4c..6550d981b450cd 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -107,6 +107,10 @@ void __init smp_callin(void)
__flush_tlb_all();
+ __asm__ __volatile__("mov %0, %%g5\n\t"
+ : /* no outputs */
+ : "r" (__per_cpu_offset[cpuid]));
+
smp_setup_percpu_timer();
local_irq_enable();
@@ -1115,6 +1119,11 @@ void __devinit smp_prepare_boot_cpu(void)
}
current_thread_info()->cpu = hard_smp_processor_id();
+
+ __asm__ __volatile__("mov %0, %%g5\n\t"
+ : /* no outputs */
+ : "r" (__per_cpu_offset[smp_processor_id()]));
+
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), phys_cpu_present_map);
}
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index 82a29dae65ee78..4372bf32ecf6f2 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -413,7 +413,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
:
: "r" (regs), "r" (insn)
: "o0", "o1", "o2", "o3", "o4", "o5", "o7",
- "g1", "g2", "g3", "g4", "g5", "g7", "cc");
+ "g1", "g2", "g3", "g4", "g7", "cc");
} else {
unsigned long addr = compute_effective_address(regs, insn, ((insn >> 25) & 0x1f));
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index 74a06bef748b68..ca9891a8dad820 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -93,6 +93,13 @@ fill_fixup:
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6
ldx [%g6 + TI_TASK], %g4
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g1, 3, %g1
+ ldx [%g2 + %g1], %g5
+#endif
/* This is the same as below, except we handle this a bit special
* since we must preserve %l5 and %l6, see comment above.
@@ -213,6 +220,13 @@ fill_fixup_mna:
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g1, 3, %g1
+ ldx [%g2 + %g1], %g5
+#endif
call mem_address_unaligned
add %sp, PTREGS_OFF, %o0
@@ -318,6 +332,13 @@ fill_fixup_dax:
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g1, 3, %g1
+ ldx [%g2 + %g1], %g5
+#endif
call data_access_exception
add %sp, PTREGS_OFF, %o0
diff --git a/arch/sparc64/prom/map.S b/arch/sparc64/prom/map.S
index 509f7b4abef1e5..21b3f9c99ea778 100644
--- a/arch/sparc64/prom/map.S
+++ b/arch/sparc64/prom/map.S
@@ -32,6 +32,7 @@ prom_remap: /* %o0 = physpage, %o1 = virtpage, %o2 = mmu_ihandle */
ldx [%g2 + 0x08], %l0 ! prom_cif_handler
mov %g6, %i3
mov %g4, %i4
+ mov %g5, %i5
flushw
sethi %hi(prom_remap - call_method), %g7
@@ -62,6 +63,7 @@ prom_remap: /* %o0 = physpage, %o1 = virtpage, %o2 = mmu_ihandle */
/* Restore hard-coded globals. */
mov %i3, %g6
mov %i4, %g4
+ mov %i5, %g5
/* Wheee.... we are done. */
ret
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index b1f1f4e4bd4c68..cc7198aaac505e 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -25,6 +25,7 @@ typedef struct {
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
-#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() __get_cpu_var(__cpu_data)
#endif /* _SPARC64_CPUDATA_H */
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index 8571d6d1a9dfe4..80d66d31b62d6c 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -1,6 +1,45 @@
#ifndef __ARCH_SPARC64_PERCPU__
#define __ARCH_SPARC64_PERCPU__
-#include <asm-generic/percpu.h>
+#include <linux/compiler.h>
+
+#define __GENERIC_PER_CPU
+#ifdef CONFIG_SMP
+
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+/* Separate out the type, so (int[3], foo) works. */
+#define DEFINE_PER_CPU(type, name) \
+ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
+
+register unsigned long __local_per_cpu_offset asm("g5");
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
+
+/* A macro to avoid #include hell... */
+#define percpu_modcopy(pcpudst, src, size) \
+do { \
+ unsigned int __i; \
+ for (__i = 0; __i < NR_CPUS; __i++) \
+ if (cpu_possible(__i)) \
+ memcpy((pcpudst)+__per_cpu_offset[__i], \
+ (src), (size)); \
+} while (0)
+#else /* ! SMP */
+
+#define DEFINE_PER_CPU(type, name) \
+ __typeof__(type) per_cpu__##name
+
+#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
+#define __get_cpu_var(var) per_cpu__##var
+
+#endif /* SMP */
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
#endif /* __ARCH_SPARC64_PERCPU__ */
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index 88f9c142947cff..2c28e1f605b765 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -14,7 +14,7 @@
/* Page table allocation/freeing. */
#ifdef CONFIG_SMP
/* Sliiiicck */
-#define pgt_quicklists cpu_data(smp_processor_id())
+#define pgt_quicklists local_cpu_data()
#else
extern struct pgtable_cache_struct {
unsigned long *pgd_cache;
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index 3d1af3fc10a633..fd12ca386f4860 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -215,7 +215,7 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
"i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_FLAGS), "i" (TI_CWP), \
"i" (_TIF_NEWCHILD), "i" (TI_TASK) \
: "cc", \
- "g1", "g2", "g3", "g5", "g7", \
+ "g1", "g2", "g3", "g7", \
"l2", "l3", "l4", "l5", "l6", "l7", \
"i0", "i1", "i2", "i3", "i4", "i5", \
"o0", "o1", "o2", "o3", "o4", "o5", "o7" EXTRA_CLOBBER);\
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 6bad3f20f6cb57..9baf57db01d203 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -44,7 +44,7 @@ extern void flush_tlb_pending(void);
static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *mp = &per_cpu(mmu_gathers, smp_processor_id());
+ struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
BUG_ON(mp->tlb_nr);