aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.davemloft.net>2005-03-31 22:35:01 -0800
committerDavid S. Miller <davem@sunset.davemloft.net>2005-03-31 22:35:01 -0800
commitc6454e58ad522c2a1d971e8807a365cba1e64033 (patch)
tree3d5870397b8d9b631ad8ce2c62d53ffc4872a4d6
parentf4b9bfe34026406a19d58626b14f1a86454adf30 (diff)
downloadhistory-c6454e58ad522c2a1d971e8807a365cba1e64033.tar.gz
[SPARC64]: Store per-cpu pointer in IMMU TSB register.
This trick requires that we PAGE_SIZE align the per-cpu areas, but that is fine. Also, reduce the __per_cpu_offsets[] array into just two values, a base and a shift. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/kernel/etrap.S14
-rw-r--r--arch/sparc64/kernel/head.S2
-rw-r--r--arch/sparc64/kernel/rtrap.S6
-rw-r--r--arch/sparc64/kernel/smp.c60
-rw-r--r--arch/sparc64/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc64/kernel/winfixup.S21
-rw-r--r--include/asm-sparc64/percpu.h12
7 files changed, 78 insertions, 39 deletions
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 52cde3a2623133..50d2af1d98aeeb 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -103,11 +103,8 @@ cplus_etrap_insn_2:
wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
#ifdef CONFIG_SMP
- ldub [%g6 + TI_CPU], %g3
- sethi %hi(__per_cpu_offset), %g2
- or %g2, %lo(__per_cpu_offset), %g2
- sllx %g3, 3, %g3
- ldx [%g2 + %g3], %g5
+ mov TSB_REG, %g3
+ ldxa [%g3] ASI_IMMU, %g5
#endif
jmpl %l2 + 0x4, %g0
ldx [%g6 + TI_TASK], %g4
@@ -259,11 +256,8 @@ cplus_etrap_insn_4:
mov %l6, %g6
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
#ifdef CONFIG_SMP
- ldub [%g6 + TI_CPU], %g3
- sethi %hi(__per_cpu_offset), %g2
- or %g2, %lo(__per_cpu_offset), %g2
- sllx %g3, 3, %g3
- ldx [%g2 + %g3], %g5
+ mov TSB_REG, %g3
+ ldxa [%g3] ASI_IMMU, %g5
#endif
ldx [%g6 + TI_TASK], %g4
done
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 954093551597f8..8104a56ca2d8e7 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -657,6 +657,8 @@ setup_tba: /* i0 = is_starfire */
mov TSB_REG, %g1
stxa %g0, [%g1] ASI_DMMU
membar #Sync
+ stxa %g0, [%g1] ASI_IMMU
+ membar #Sync
mov TLB_SFSR, %g1
sethi %uhi(KERN_HIGHBITS), %g2
or %g2, %ulo(KERN_HIGHBITS), %g2
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index e917752080062b..0696ed4b9d644f 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -222,8 +222,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
- brz,a,pn %l3, 1f
- ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
+ ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
+ mov TSB_REG, %g6
+ brnz,a,pn %l3, 1f
+ ldxa [%g6] ASI_IMMU, %g5
1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 6550d981b450cd..57f7f0f45d0977 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -3,6 +3,7 @@
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*/
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -20,6 +21,7 @@
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/profile.h>
+#include <linux/bootmem.h>
#include <asm/head.h>
#include <asm/ptrace.h>
@@ -99,6 +101,16 @@ static volatile unsigned long callin_flag = 0;
extern void inherit_locked_prom_mappings(int save_p);
+static inline void cpu_setup_percpu_base(unsigned long cpu_id)
+{
+ __asm__ __volatile__("mov %0, %%g5\n\t"
+ "stxa %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (__per_cpu_offset(cpu_id)),
+ "r" (TSB_REG), "i" (ASI_IMMU));
+}
+
void __init smp_callin(void)
{
int cpuid = hard_smp_processor_id();
@@ -107,9 +119,7 @@ void __init smp_callin(void)
__flush_tlb_all();
- __asm__ __volatile__("mov %0, %%g5\n\t"
- : /* no outputs */
- : "r" (__per_cpu_offset[cpuid]));
+ cpu_setup_percpu_base(cpuid);
smp_setup_percpu_timer();
@@ -1120,10 +1130,6 @@ void __devinit smp_prepare_boot_cpu(void)
current_thread_info()->cpu = hard_smp_processor_id();
- __asm__ __volatile__("mov %0, %%g5\n\t"
- : /* no outputs */
- : "r" (__per_cpu_offset[smp_processor_id()]));
-
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), phys_cpu_present_map);
}
@@ -1184,3 +1190,43 @@ void smp_send_stop(void)
{
}
+unsigned long __per_cpu_base;
+unsigned long __per_cpu_shift;
+
+EXPORT_SYMBOL(__per_cpu_base);
+EXPORT_SYMBOL(__per_cpu_shift);
+
+void __init setup_per_cpu_areas(void)
+{
+ unsigned long goal, size, i;
+ char *ptr;
+ /* Created by linker magic */
+ extern char __per_cpu_start[], __per_cpu_end[];
+
+ /* Copy section for each CPU (we discard the original) */
+ goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
+
+#ifdef CONFIG_MODULES
+ if (goal < PERCPU_ENOUGH_ROOM)
+ goal = PERCPU_ENOUGH_ROOM;
+#endif
+ __per_cpu_shift = 0;
+ for (size = 1UL; size < goal; size <<= 1UL)
+ __per_cpu_shift++;
+
+ ptr = alloc_bootmem_pages(size * NR_CPUS);
+
+ __per_cpu_base = ptr - __per_cpu_start;
+
+ for (i = 0; i < NR_CPUS; i++, ptr += size)
+ memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+
+ /* Finally, load in the boot cpu's base value.
+ * We abuse the IMMU TSB register for trap handler
+ * entry and exit loading of %g5. That is why it
+ * has to be page aligned.
+ */
+ BUG_ON((__per_cpu_shift < PAGE_SHIFT) ||
+ (__per_cpu_base & ~PAGE_MASK));
+ cpu_setup_percpu_base(hard_smp_processor_id());
+}
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index a710d38d1a91f8..382fd6798bb958 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -72,7 +72,7 @@ SECTIONS
__initramfs_start = .;
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
- . = ALIGN(32);
+ . = ALIGN(8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index ca9891a8dad820..dfbc7e0dcf70f7 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -94,11 +94,8 @@ fill_fixup:
mov %o7, %g6
ldx [%g6 + TI_TASK], %g4
#ifdef CONFIG_SMP
- ldub [%g6 + TI_CPU], %g1
- sethi %hi(__per_cpu_offset), %g2
- or %g2, %lo(__per_cpu_offset), %g2
- sllx %g1, 3, %g1
- ldx [%g2 + %g1], %g5
+ mov TSB_REG, %g1
+ ldxa [%g1] ASI_IMMU, %g5
#endif
/* This is the same as below, except we handle this a bit special
@@ -221,11 +218,8 @@ fill_fixup_mna:
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
#ifdef CONFIG_SMP
- ldub [%g6 + TI_CPU], %g1
- sethi %hi(__per_cpu_offset), %g2
- or %g2, %lo(__per_cpu_offset), %g2
- sllx %g1, 3, %g1
- ldx [%g2 + %g1], %g5
+ mov TSB_REG, %g1
+ ldxa [%g1] ASI_IMMU, %g5
#endif
call mem_address_unaligned
add %sp, PTREGS_OFF, %o0
@@ -333,11 +327,8 @@ fill_fixup_dax:
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
#ifdef CONFIG_SMP
- ldub [%g6 + TI_CPU], %g1
- sethi %hi(__per_cpu_offset), %g2
- or %g2, %lo(__per_cpu_offset), %g2
- sllx %g1, 3, %g1
- ldx [%g2 + %g1], %g5
+ mov TSB_REG, %g1
+ ldxa [%g1] ASI_IMMU, %g5
#endif
call data_access_exception
add %sp, PTREGS_OFF, %o0
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index 80d66d31b62d6c..aea4e51e7cd135 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -3,10 +3,14 @@
#include <linux/compiler.h>
-#define __GENERIC_PER_CPU
#ifdef CONFIG_SMP
-extern unsigned long __per_cpu_offset[NR_CPUS];
+extern void setup_per_cpu_areas(void);
+
+extern unsigned long __per_cpu_base;
+extern unsigned long __per_cpu_shift;
+#define __per_cpu_offset(__cpu) \
+ (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
/* Separate out the type, so (int[3], foo) works. */
#define DEFINE_PER_CPU(type, name) \
@@ -15,7 +19,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
register unsigned long __local_per_cpu_offset asm("g5");
/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
/* A macro to avoid #include hell... */
@@ -24,7 +28,7 @@ do { \
unsigned int __i; \
for (__i = 0; __i < NR_CPUS; __i++) \
if (cpu_possible(__i)) \
- memcpy((pcpudst)+__per_cpu_offset[__i], \
+ memcpy((pcpudst)+__per_cpu_offset(__i), \
(src), (size)); \
} while (0)
#else /* ! SMP */