From c6454e58ad522c2a1d971e8807a365cba1e64033 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Mar 2005 22:35:01 -0800 Subject: [SPARC64]: Store per-cpu pointer in IMMU TSB register. This trick requires that we PAGE_SIZE align the per-cpu areas, but that is fine. Also, reduce the __per_cpu_offsets[] array into just two values, a base and a shift. Signed-off-by: David S. Miller --- arch/sparc64/kernel/etrap.S | 14 +++------ arch/sparc64/kernel/head.S | 2 ++ arch/sparc64/kernel/rtrap.S | 6 ++-- arch/sparc64/kernel/smp.c | 60 ++++++++++++++++++++++++++++++++++----- arch/sparc64/kernel/vmlinux.lds.S | 2 +- arch/sparc64/kernel/winfixup.S | 21 ++++---------- include/asm-sparc64/percpu.h | 12 +++++--- 7 files changed, 78 insertions(+), 39 deletions(-) diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 52cde3a2623133..50d2af1d98aeeb 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -103,11 +103,8 @@ cplus_etrap_insn_2: wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 #ifdef CONFIG_SMP - ldub [%g6 + TI_CPU], %g3 - sethi %hi(__per_cpu_offset), %g2 - or %g2, %lo(__per_cpu_offset), %g2 - sllx %g3, 3, %g3 - ldx [%g2 + %g3], %g5 + mov TSB_REG, %g3 + ldxa [%g3] ASI_IMMU, %g5 #endif jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 @@ -259,11 +256,8 @@ cplus_etrap_insn_4: mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] #ifdef CONFIG_SMP - ldub [%g6 + TI_CPU], %g3 - sethi %hi(__per_cpu_offset), %g2 - or %g2, %lo(__per_cpu_offset), %g2 - sllx %g3, 3, %g3 - ldx [%g2 + %g3], %g5 + mov TSB_REG, %g3 + ldxa [%g3] ASI_IMMU, %g5 #endif ldx [%g6 + TI_TASK], %g4 done diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 954093551597f8..8104a56ca2d8e7 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -657,6 +657,8 @@ setup_tba: /* i0 = is_starfire */ mov TSB_REG, %g1 stxa %g0, [%g1] ASI_DMMU membar #Sync + stxa %g0, [%g1] ASI_IMMU + membar #Sync mov TLB_SFSR, %g1 sethi %uhi(KERN_HIGHBITS), %g2 or %g2, %ulo(KERN_HIGHBITS), %g2 diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index e917752080062b..0696ed4b9d644f 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -222,8 +222,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 - brz,a,pn %l3, 1f - ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 + ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 + mov TSB_REG, %g6 + brnz,a,pn %l3, 1f + ldxa [%g6] ASI_IMMU, %g5 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 6550d981b450cd..57f7f0f45d0977 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -3,6 +3,7 @@ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -99,6 +101,16 @@ static volatile unsigned long callin_flag = 0; extern void inherit_locked_prom_mappings(int save_p); +static inline void cpu_setup_percpu_base(unsigned long cpu_id) +{ + __asm__ __volatile__("mov %0, %%g5\n\t" + "stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (__per_cpu_offset(cpu_id)), + "r" (TSB_REG), "i" (ASI_IMMU)); +} + void __init smp_callin(void) { int cpuid = hard_smp_processor_id(); @@ -107,9 +119,7 @@ void __init smp_callin(void) __flush_tlb_all(); - __asm__ __volatile__("mov %0, %%g5\n\t" - : /* no outputs */ - : "r" (__per_cpu_offset[cpuid])); + cpu_setup_percpu_base(cpuid); smp_setup_percpu_timer(); @@ -1120,10 +1130,6 @@ void __devinit smp_prepare_boot_cpu(void) current_thread_info()->cpu = hard_smp_processor_id(); - __asm__ __volatile__("mov %0, %%g5\n\t" - : /* no outputs */ - : "r" (__per_cpu_offset[smp_processor_id()])); - cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map); } @@ -1184,3 +1190,43 @@ void smp_send_stop(void) { } +unsigned long __per_cpu_base; +unsigned long __per_cpu_shift; + +EXPORT_SYMBOL(__per_cpu_base); +EXPORT_SYMBOL(__per_cpu_shift); + +void __init setup_per_cpu_areas(void) +{ + unsigned long goal, size, i; + char *ptr; + /* Created by linker magic */ + extern char __per_cpu_start[], __per_cpu_end[]; + + /* Copy section for each CPU (we discard the original) */ + goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); + +#ifdef CONFIG_MODULES + if (goal < PERCPU_ENOUGH_ROOM) + goal = PERCPU_ENOUGH_ROOM; +#endif + __per_cpu_shift = 0; + for (size = 1UL; size < goal; size <<= 1UL) + __per_cpu_shift++; + + ptr = alloc_bootmem_pages(size * NR_CPUS); + + __per_cpu_base = ptr - __per_cpu_start; + + for (i = 0; i < NR_CPUS; i++, ptr += size) + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + /* Finally, load in the boot cpu's base value. + * We abuse the IMMU TSB register for trap handler + * entry and exit loading of %g5. That is why it + * has to be page aligned. + */ + BUG_ON((__per_cpu_shift < PAGE_SHIFT) || + (__per_cpu_base & ~PAGE_MASK)); + cpu_setup_percpu_base(hard_smp_processor_id()); +} diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index a710d38d1a91f8..382fd6798bb958 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -72,7 +72,7 @@ SECTIONS __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; - . = ALIGN(32); + . = ALIGN(8192); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index ca9891a8dad820..dfbc7e0dcf70f7 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -94,11 +94,8 @@ fill_fixup: mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 #ifdef CONFIG_SMP - ldub [%g6 + TI_CPU], %g1 - sethi %hi(__per_cpu_offset), %g2 - or %g2, %lo(__per_cpu_offset), %g2 - sllx %g1, 3, %g1 - ldx [%g2 + %g1], %g5 + mov TSB_REG, %g1 + ldxa [%g1] ASI_IMMU, %g5 #endif /* This is the same as below, except we handle this a bit special @@ -221,11 +218,8 @@ fill_fixup_mna: mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. #ifdef CONFIG_SMP - ldub [%g6 + TI_CPU], %g1 - sethi %hi(__per_cpu_offset), %g2 - or %g2, %lo(__per_cpu_offset), %g2 - sllx %g1, 3, %g1 - ldx [%g2 + %g1], %g5 + mov TSB_REG, %g1 + ldxa [%g1] ASI_IMMU, %g5 #endif call mem_address_unaligned add %sp, PTREGS_OFF, %o0 @@ -333,11 +327,8 @@ fill_fixup_dax: mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. #ifdef CONFIG_SMP - ldub [%g6 + TI_CPU], %g1 - sethi %hi(__per_cpu_offset), %g2 - or %g2, %lo(__per_cpu_offset), %g2 - sllx %g1, 3, %g1 - ldx [%g2 + %g1], %g5 + mov TSB_REG, %g1 + ldxa [%g1] ASI_IMMU, %g5 #endif call data_access_exception add %sp, PTREGS_OFF, %o0 diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 80d66d31b62d6c..aea4e51e7cd135 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -3,10 +3,14 @@ #include -#define __GENERIC_PER_CPU #ifdef CONFIG_SMP -extern unsigned long __per_cpu_offset[NR_CPUS]; +extern void setup_per_cpu_areas(void); + +extern unsigned long __per_cpu_base; +extern unsigned long __per_cpu_shift; +#define __per_cpu_offset(__cpu) \ + (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ @@ -15,7 +19,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; register unsigned long __local_per_cpu_offset asm("g5"); /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) +#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) /* A macro to avoid #include hell... */ @@ -24,7 +28,7 @@ do { \ unsigned int __i; \ for (__i = 0; __i < NR_CPUS; __i++) \ if (cpu_possible(__i)) \ - memcpy((pcpudst)+__per_cpu_offset[__i], \ + memcpy((pcpudst)+__per_cpu_offset(__i), \ (src), (size)); \ } while (0) #else /* ! SMP */ -- cgit 1.2.3-korg