diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2005-03-30 22:56:22 -0800 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-03-30 22:56:22 -0800 |
commit | f4b9bfe34026406a19d58626b14f1a86454adf30 (patch) | |
tree | daf14fde3032b7fc469b023b13a10940ba00a7bc | |
parent | 52525c94dbad620c16d8378812be7e73caaea22d (diff) | |
download | history-f4b9bfe34026406a19d58626b14f1a86454adf30.tar.gz |
[SPARC64]: Put per-cpu area base into register g5.
FINALLY, we can put the per-cpu base into register
g5 on SMP. There are many simplifications and improvements
now, but this is the base implementation.
Thanks to Rusty and the IA64 folks for urging that I pursue
this kind of scheme instead of locking stuff into the TLB
at some fixed virtual address.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc64/Makefile | 4 | ||||
-rw-r--r-- | arch/sparc64/kernel/etrap.S | 19 | ||||
-rw-r--r-- | arch/sparc64/kernel/head.S | 42 | ||||
-rw-r--r-- | arch/sparc64/kernel/rtrap.S | 5 | ||||
-rw-r--r-- | arch/sparc64/kernel/semaphore.c | 6 | ||||
-rw-r--r-- | arch/sparc64/kernel/smp.c | 9 | ||||
-rw-r--r-- | arch/sparc64/kernel/unaligned.c | 2 | ||||
-rw-r--r-- | arch/sparc64/kernel/winfixup.S | 21 | ||||
-rw-r--r-- | arch/sparc64/prom/map.S | 2 | ||||
-rw-r--r-- | include/asm-sparc64/cpudata.h | 3 | ||||
-rw-r--r-- | include/asm-sparc64/percpu.h | 41 | ||||
-rw-r--r-- | include/asm-sparc64/pgalloc.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/system.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/tlb.h | 2 |
14 files changed, 124 insertions, 36 deletions
diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile index 61724880f20d3e..43fe382da07892 100644 --- a/arch/sparc64/Makefile +++ b/arch/sparc64/Makefile @@ -41,10 +41,10 @@ endif ifneq ($(NEW_GCC),y) CFLAGS := $(CFLAGS) -pipe -mno-fpu -mtune=ultrasparc -mmedlow \ - -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare + -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare else CFLAGS := $(CFLAGS) -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow \ - -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare \ + -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare \ $(CC_UNDECL) AFLAGS += -m64 -mcpu=ultrasparc $(CC_UNDECL) endif diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 058c505a322358..52cde3a2623133 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -102,11 +102,15 @@ cplus_etrap_insn_2: stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 +#ifdef CONFIG_SMP + ldub [%g6 + TI_CPU], %g3 + sethi %hi(__per_cpu_offset), %g2 + or %g2, %lo(__per_cpu_offset), %g2 + sllx %g3, 3, %g3 + ldx [%g2 + %g3], %g5 +#endif jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 - nop - nop - nop 3: ldub [%l6 + TI_FPDEPTH], %l5 add %l6, TI_FPSAVED + 1, %l4 @@ -254,10 +258,15 @@ cplus_etrap_insn_4: stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] +#ifdef CONFIG_SMP + ldub [%g6 + TI_CPU], %g3 + sethi %hi(__per_cpu_offset), %g2 + or %g2, %lo(__per_cpu_offset), %g2 + sllx %g3, 3, %g3 + ldx [%g2 + %g3], %g5 +#endif ldx [%g6 + TI_TASK], %g4 done - nop - nop #undef TASK_REGOFF #undef ETRAP_PSTATE1 diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 0d6f58dad2db7c..954093551597f8 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -89,8 +89,8 @@ sparc_ramdisk_image64: * PROM entry point is on %o4 */ sparc64_boot: - BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_boot) - BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_boot) + BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) ba,pt %xcc, spitfire_boot nop @@ -103,11 +103,11 @@ cheetah_boot: mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1 wr %g1, %asr18 - sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 - or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 - sllx %g5, 32, %g5 - or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 - stxa %g5, [%g0] ASI_DCU_CONTROL_REG + sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7 + or %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7 + sllx %g7, 32, %g7 + or %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7 + stxa %g7, [%g0] ASI_DCU_CONTROL_REG membar #Sync cheetah_generic_boot: @@ -492,7 +492,7 @@ sun4u_init: stxa %g3, [%g2] ASI_DMMU membar #Sync - BRANCH_IF_ANY_CHEETAH(g1,g5,cheetah_tlb_fixup) + BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup) ba,pt %xcc, spitfire_tlb_fixup nop @@ -520,8 +520,8 @@ cheetah_tlb_fixup: mov 1, %g2 /* Set TLB type to cheetah. */ -1: sethi %hi(tlb_type), %g5 - stw %g2, [%g5 + %lo(tlb_type)] +1: sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f) ba,pt %xcc, 2f @@ -567,8 +567,8 @@ spitfire_tlb_fixup: /* Set TLB type to spitfire. */ mov 0, %g2 - sethi %hi(tlb_type), %g5 - stw %g2, [%g5 + %lo(tlb_type)] + sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] tlb_fixup_done: sethi %hi(init_thread_union), %g6 @@ -596,12 +596,18 @@ tlb_fixup_done: #endif wr %g0, ASI_P, %asi - mov 1, %g5 - sllx %g5, THREAD_SHIFT, %g5 - sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 - add %g6, %g5, %sp + mov 1, %g1 + sllx %g1, THREAD_SHIFT, %g1 + sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 + add %g6, %g1, %sp mov 0, %fp + /* Set per-cpu pointer initially to zero, this makes + * the boot-cpu use the in-kernel-image per-cpu areas + * before setup_per_cpu_area() is invoked. + */ + clr %g5 + wrpr %g0, 0, %wstate wrpr %g0, 0x0, %tl @@ -637,8 +643,8 @@ setup_tba: /* i0 = is_starfire */ rdpr %pstate, %o1 mov %g6, %o2 wrpr %o1, (PSTATE_AG|PSTATE_IE), %pstate - sethi %hi(sparc64_ttable_tl0), %g5 - wrpr %g5, %tba + sethi %hi(sparc64_ttable_tl0), %g1 + wrpr %g1, %tba mov %o2, %g6 /* Set up MMU globals */ diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 42e396112cfaa7..e917752080062b 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -222,8 +222,9 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 - ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 - ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 + brz,a,pn %l3, 1f + ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 +1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 8094808d5ba5ce..63496c43fe1736 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c @@ -83,7 +83,7 @@ void up(struct semaphore *sem) " restore\n" " .previous\n" : : "r" (sem), "i" (__up) - : "g1", "g2", "g3", "g5", "g7", "memory", "cc"); + : "g1", "g2", "g3", "g7", "memory", "cc"); } static void __sched __down(struct semaphore * sem) @@ -140,7 +140,7 @@ void __sched down(struct semaphore *sem) " restore\n" " .previous\n" : : "r" (sem), "i" (__down) - : "g1", "g2", "g3", "g5", "g7", "memory", "cc"); + : "g1", "g2", "g3", "g7", "memory", "cc"); } int down_trylock(struct semaphore *sem) @@ -246,6 +246,6 @@ int __sched down_interruptible(struct semaphore *sem) " .previous\n" : "=r" (ret) : "0" (ret), "r" (sem), "i" (__down_interruptible) - : "g1", "g2", "g3", "g5", "g7", "memory", "cc"); + : "g1", "g2", "g3", "g7", "memory", "cc"); return ret; } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 1cbc02aa27dd4c..6550d981b450cd 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -107,6 +107,10 @@ void __init smp_callin(void) __flush_tlb_all(); + __asm__ __volatile__("mov %0, %%g5\n\t" + : /* no outputs */ + : "r" (__per_cpu_offset[cpuid])); + smp_setup_percpu_timer(); local_irq_enable(); @@ -1115,6 +1119,11 @@ void __devinit smp_prepare_boot_cpu(void) } current_thread_info()->cpu = hard_smp_processor_id(); + + __asm__ __volatile__("mov %0, %%g5\n\t" + : /* no outputs */ + : "r" (__per_cpu_offset[smp_processor_id()])); + cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map); } diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index 82a29dae65ee78..4372bf32ecf6f2 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -413,7 +413,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u : : "r" (regs), "r" (insn) : "o0", "o1", "o2", "o3", "o4", "o5", "o7", - "g1", "g2", "g3", "g4", "g5", "g7", "cc"); + "g1", "g2", "g3", "g4", "g7", "cc"); } else { unsigned long addr = compute_effective_address(regs, insn, ((insn >> 25) & 0x1f)); diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index 74a06bef748b68..ca9891a8dad820 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -93,6 +93,13 @@ fill_fixup: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 +#ifdef CONFIG_SMP + ldub [%g6 + TI_CPU], %g1 + sethi %hi(__per_cpu_offset), %g2 + or %g2, %lo(__per_cpu_offset), %g2 + sllx %g1, 3, %g1 + ldx [%g2 + %g1], %g5 +#endif /* This is the same as below, except we handle this a bit special * since we must preserve %l5 and %l6, see comment above. @@ -213,6 +220,13 @@ fill_fixup_mna: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. +#ifdef CONFIG_SMP + ldub [%g6 + TI_CPU], %g1 + sethi %hi(__per_cpu_offset), %g2 + or %g2, %lo(__per_cpu_offset), %g2 + sllx %g1, 3, %g1 + ldx [%g2 + %g1], %g5 +#endif call mem_address_unaligned add %sp, PTREGS_OFF, %o0 @@ -318,6 +332,13 @@ fill_fixup_dax: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. +#ifdef CONFIG_SMP + ldub [%g6 + TI_CPU], %g1 + sethi %hi(__per_cpu_offset), %g2 + or %g2, %lo(__per_cpu_offset), %g2 + sllx %g1, 3, %g1 + ldx [%g2 + %g1], %g5 +#endif call data_access_exception add %sp, PTREGS_OFF, %o0 diff --git a/arch/sparc64/prom/map.S b/arch/sparc64/prom/map.S index 509f7b4abef1e5..21b3f9c99ea778 100644 --- a/arch/sparc64/prom/map.S +++ b/arch/sparc64/prom/map.S @@ -32,6 +32,7 @@ prom_remap: /* %o0 = physpage, %o1 = virtpage, %o2 = mmu_ihandle */ ldx [%g2 + 0x08], %l0 ! prom_cif_handler mov %g6, %i3 mov %g4, %i4 + mov %g5, %i5 flushw sethi %hi(prom_remap - call_method), %g7 @@ -62,6 +63,7 @@ prom_remap: /* %o0 = physpage, %o1 = virtpage, %o2 = mmu_ihandle */ /* Restore hard-coded globals. */ mov %i3, %g6 mov %i4, %g4 + mov %i5, %g5 /* Wheee.... we are done. */ ret diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index b1f1f4e4bd4c68..cc7198aaac505e 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -25,6 +25,7 @@ typedef struct { } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); -#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) +#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) +#define local_cpu_data() __get_cpu_var(__cpu_data) #endif /* _SPARC64_CPUDATA_H */ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 8571d6d1a9dfe4..80d66d31b62d6c 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -1,6 +1,45 @@ #ifndef __ARCH_SPARC64_PERCPU__ #define __ARCH_SPARC64_PERCPU__ -#include <asm-generic/percpu.h> +#include <linux/compiler.h> + +#define __GENERIC_PER_CPU +#ifdef CONFIG_SMP + +extern unsigned long __per_cpu_offset[NR_CPUS]; + +/* Separate out the type, so (int[3], foo) works. */ +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name + +register unsigned long __local_per_cpu_offset asm("g5"); + +/* var is in discarded region: offset to particular copy we want */ +#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) +#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) + +/* A macro to avoid #include hell... */ +#define percpu_modcopy(pcpudst, src, size) \ +do { \ + unsigned int __i; \ + for (__i = 0; __i < NR_CPUS; __i++) \ + if (cpu_possible(__i)) \ + memcpy((pcpudst)+__per_cpu_offset[__i], \ + (src), (size)); \ +} while (0) +#else /* ! SMP */ + +#define DEFINE_PER_CPU(type, name) \ + __typeof__(type) per_cpu__##name + +#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define __get_cpu_var(var) per_cpu__##var + +#endif /* SMP */ + +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) #endif /* __ARCH_SPARC64_PERCPU__ */ diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 88f9c142947cff..2c28e1f605b765 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -14,7 +14,7 @@ /* Page table allocation/freeing. */ #ifdef CONFIG_SMP /* Sliiiicck */ -#define pgt_quicklists cpu_data(smp_processor_id()) +#define pgt_quicklists local_cpu_data() #else extern struct pgtable_cache_struct { unsigned long *pgd_cache; diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index 3d1af3fc10a633..fd12ca386f4860 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -215,7 +215,7 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_FLAGS), "i" (TI_CWP), \ "i" (_TIF_NEWCHILD), "i" (TI_TASK) \ : "cc", \ - "g1", "g2", "g3", "g5", "g7", \ + "g1", "g2", "g3", "g7", \ "l2", "l3", "l4", "l5", "l6", "l7", \ "i0", "i1", "i2", "i3", "i4", "i5", \ "o0", "o1", "o2", "o3", "o4", "o5", "o7" EXTRA_CLOBBER);\ diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h index 6bad3f20f6cb57..9baf57db01d203 100644 --- a/include/asm-sparc64/tlb.h +++ b/include/asm-sparc64/tlb.h @@ -44,7 +44,7 @@ extern void flush_tlb_pending(void); static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) { - struct mmu_gather *mp = &per_cpu(mmu_gathers, smp_processor_id()); + struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); BUG_ON(mp->tlb_nr); |