From: Paul Mackerras Rewrite/cleanup of the SLB management code. This removes nearly all the SLB related code from arch/ppc64/kernel/stab.c and puts a rewritten version in arch/ppc64/mm, where it better belongs. The main SLB miss path is in assembler and the other routines have been cleaned up and streamlined. Notable changes: - Ugly bitfields no longer used for generating SLB entries. - slb_allocate() (the main SLB miss routine) is now in assembler, and all the data it uses is stored in the PACA. - The mm context is now copied into the PACA at context switch time, to avoid looking up the thread struct on SLB miss. - An SLB miss will now never (directly) result in a call to do_page_fault. If we get a miss on a totally bogus address the handler will now put in an SLB referencing VSID 0. This will never have any pages, so we'll get the (fatal) page fault shortly afterwards. This simplifies the SLB entry and exit paths. - The round-robin pointer in the PACA now references the last-used instead of next-to-use SLB slot, which simplifies the asm for updating it slightly. - Unify do_slb_bolted with the general SLB miss path. There is now one SLB miss handler, in assembler, and called with only the low-level exception prolog (EXCEPTION_PROLOG_[PI]SERIES rather than EXCEPTION_PROLOG_COMMON) and minimal extra save/restore logic. - Streamlines the exception entry/exit path of the SLB miss handler to shave a few cycles off. The most significant change is that the RI bit is left off throughout the whole handler, which avoids an extra mtmsrd to turn it back off on the exit path. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras Signed-off-by: Andrew Morton --- 25-akpm/arch/ppc64/kernel/asm-offsets.c | 9 - 25-akpm/arch/ppc64/kernel/head.S | 249 ++++++++-------------------- 25-akpm/arch/ppc64/kernel/pacaData.c | 1 25-akpm/arch/ppc64/kernel/smp.c | 2 25-akpm/arch/ppc64/kernel/stab.c | 277 -------------------------------- 25-akpm/arch/ppc64/mm/Makefile | 2 25-akpm/arch/ppc64/mm/fault.c | 6 25-akpm/arch/ppc64/mm/slb.c | 136 +++++++++++++++ 25-akpm/arch/ppc64/mm/slb_low.S | 168 +++++++++++++++++++ 25-akpm/include/asm-ppc64/mmu.h | 59 ++---- 25-akpm/include/asm-ppc64/mmu_context.h | 8 25-akpm/include/asm-ppc64/paca.h | 10 - 25-akpm/include/asm-ppc64/page.h | 5 13 files changed, 439 insertions(+), 493 deletions(-) diff -puN arch/ppc64/kernel/asm-offsets.c~ppc64-improve-slb-reload arch/ppc64/kernel/asm-offsets.c --- 25/arch/ppc64/kernel/asm-offsets.c~ppc64-improve-slb-reload 2004-08-01 23:00:03.008075640 -0700 +++ 25-akpm/arch/ppc64/kernel/asm-offsets.c 2004-08-01 23:00:03.028072600 -0700 @@ -86,10 +86,17 @@ int main(void) DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); - DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_next_rr)); + DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled)); + DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); + DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); + DEFINE(PACASLBR3, offsetof(struct paca_struct, slb_r3)); +#ifdef CONFIG_HUGETLB_PAGE + DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); +#endif /* CONFIG_HUGETLB_PAGE */ DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); DEFINE(PACAPROFENABLED, offsetof(struct paca_struct, prof_enabled)); DEFINE(PACAPROFLEN, offsetof(struct paca_struct, prof_len)); diff -puN arch/ppc64/kernel/head.S~ppc64-improve-slb-reload arch/ppc64/kernel/head.S --- 25/arch/ppc64/kernel/head.S~ppc64-improve-slb-reload 2004-08-01 23:00:03.010075336 -0700 +++ 25-akpm/arch/ppc64/kernel/head.S 2004-08-01 23:00:03.031072144 -0700 @@ -200,6 +200,7 @@ exception_marker: #define EX_R13 32 #define EX_SRR0 40 #define EX_DAR 48 +#define EX_LR 48 /* SLB miss saves LR, but not DAR */ #define EX_DSISR 56 #define EX_CCR 60 @@ -433,18 +434,52 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl DataAccessSLB_Pseries DataAccessSLB_Pseries: mtspr SPRG1,r13 - mtspr SPRG2,r12 - mfspr r13,DAR - mfcr r12 - srdi r13,r13,60 - cmpdi r13,0xc - beq .do_slb_bolted_Pseries - mtcrf 0x80,r12 - mfspr r12,SPRG2 - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, DataAccessSLB_common) + mfspr r13,SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACASLBR3(r13) + mfspr r9,SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + clrrdi r12,r13,32 /* get high part of &label */ + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,(.do_slb_miss)@l + ori r10,r10,MSR_IR|MSR_DR /* DON'T set RI for SLB miss */ + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + mfspr r3,DAR + rfid STD_EXCEPTION_PSERIES(0x400, InstructionAccess) - STD_EXCEPTION_PSERIES(0x480, InstructionAccessSLB) + + . = 0x480 + .globl InstructionAccessSLB_Pseries +InstructionAccessSLB_Pseries: + mtspr SPRG1,r13 + mfspr r13,SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACASLBR3(r13) + mfspr r9,SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + clrrdi r12,r13,32 /* get high part of &label */ + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,(.do_slb_miss)@l + ori r10,r10,MSR_IR|MSR_DR /* DON'T set RI for SLB miss */ + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + mr r3,r11 /* SRR0 is faulting address */ + rfid + STD_EXCEPTION_PSERIES(0x500, HardwareInterrupt) STD_EXCEPTION_PSERIES(0x600, Alignment) STD_EXCEPTION_PSERIES(0x700, ProgramCheck) @@ -494,11 +529,6 @@ _GLOBAL(do_stab_bolted_Pseries) mfspr r12,SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) -_GLOBAL(do_slb_bolted_Pseries) - mtcrf 0x80,r12 - mfspr r12,SPRG2 - EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_slb_bolted) - /* Space for the naca. Architected to be located at real address * NACA_PHYS_ADDR. Various tools rely on this location being fixed. @@ -587,27 +617,25 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl DataAccessSLB_Iseries DataAccessSLB_Iseries: mtspr SPRG1,r13 /* save r13 */ - mtspr SPRG2,r12 - mfspr r13,DAR - mfcr r12 - srdi r13,r13,60 - cmpdi r13,0xc - beq .do_slb_bolted_Iseries - mtcrf 0x80,r12 - mfspr r12,SPRG2 - EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN) - EXCEPTION_PROLOG_ISERIES_2 - b DataAccessSLB_common - -.do_slb_bolted_Iseries: - mtcrf 0x80,r12 - mfspr r12,SPRG2 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) - EXCEPTION_PROLOG_ISERIES_2 - b .do_slb_bolted + std r3,PACASLBR3(r13) + ld r11,PACALPPACA+LPPACASRR0(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mfspr r3,DAR + b .do_slb_miss STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN) - STD_EXCEPTION_ISERIES(0x480, InstructionAccessSLB, PACA_EXGEN) + + .globl InstructionAccessSLB_Iseries +InstructionAccessSLB_Iseries: + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACASLBR3(r13) + ld r11,PACALPPACA+LPPACASRR0(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mr r3,r11 + b .do_slb_miss + MASKABLE_EXCEPTION_ISERIES(0x500, HardwareInterrupt) STD_EXCEPTION_ISERIES(0x600, Alignment, PACA_EXGEN) STD_EXCEPTION_ISERIES(0x700, ProgramCheck, PACA_EXGEN) @@ -865,21 +893,6 @@ DataAccess_common: b .do_hash_page /* Try to handle as hpte fault */ .align 7 - .globl DataAccessSLB_common -DataAccessSLB_common: - mfspr r10,DAR - std r10,PACA_EXGEN+EX_DAR(r13) - EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) - ld r3,PACA_EXGEN+EX_DAR(r13) - std r3,_DAR(r1) - bl .slb_allocate - cmpdi r3,0 /* Check return code */ - beq fast_exception_return /* Return if we succeeded */ - li r5,0 - std r5,_DSISR(r1) - b .handle_page_fault - - .align 7 .globl InstructionAccess_common InstructionAccess_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) @@ -889,21 +902,6 @@ InstructionAccess_common: b .do_hash_page /* Try to handle as hpte fault */ .align 7 - .globl InstructionAccessSLB_common -InstructionAccessSLB_common: - EXCEPTION_PROLOG_COMMON(0x480, PACA_EXGEN) - ld r3,_NIP(r1) /* SRR0 = NIA */ - bl .slb_allocate - or. r3,r3,r3 /* Check return code */ - beq+ fast_exception_return /* Return if we succeeded */ - - ld r4,_NIP(r1) - li r5,0 - std r4,_DAR(r1) - std r5,_DSISR(r1) - b .handle_page_fault - - .align 7 .globl HardwareInterrupt_common .globl HardwareInterrupt_entry HardwareInterrupt_common: @@ -1152,130 +1150,37 @@ _GLOBAL(do_stab_bolted) /* * r13 points to the PACA, r9 contains the saved CR, * r11 and r12 contain the saved SRR0 and SRR1. + * r3 has the faulting address * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 * We assume we aren't going to take any exceptions during this procedure. */ -/* XXX note fix masking in get_kernel_vsid to match */ -_GLOBAL(do_slb_bolted) +_GLOBAL(do_slb_miss) + mflr r10 + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - /* - * We take the next entry, round robin. Previously we tried - * to find a free slot first but that took too long. Unfortunately - * we dont have any LRU information to help us choose a slot. - */ - - /* r13 = paca */ -1: ld r10,PACASTABRR(r13) - addi r9,r10,1 - cmpdi r9,SLB_NUM_ENTRIES - blt+ 2f - li r9,2 /* dont touch slot 0 or 1 */ -2: std r9,PACASTABRR(r13) - - /* r13 = paca, r10 = entry */ - - /* - * Never cast out the segment for our kernel stack. Since we - * dont invalidate the ERAT we could have a valid translation - * for the kernel stack during the first part of exception exit - * which gets invalidated due to a tlbie from another cpu at a - * non recoverable point (after setting srr0/1) - Anton - */ - slbmfee r9,r10 - srdi r9,r9,27 - /* - * Use paca->ksave as the value of the kernel stack pointer, - * because this is valid at all times. - * The >> 27 (rather than >> 28) is so that the LSB is the - * valid bit - this way we check valid and ESID in one compare. - * In order to completely close the tiny race in the context - * switch (between updating r1 and updating paca->ksave), - * we check against both r1 and paca->ksave. - */ - srdi r11,r1,27 - ori r11,r11,1 - cmpd r11,r9 - beq- 1b - ld r11,PACAKSAVE(r13) - srdi r11,r11,27 - ori r11,r11,1 - cmpd r11,r9 - beq- 1b - - /* r13 = paca, r10 = entry */ - - /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ - mfspr r9,DAR - rldicl r11,r9,36,51 - sldi r11,r11,15 - srdi r9,r9,60 - or r11,r11,r9 - - /* VSID_RANDOMIZER */ - li r9,9 - sldi r9,r9,32 - oris r9,r9,58231 - ori r9,r9,39831 - - /* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */ - mulld r11,r11,r9 - clrldi r11,r11,28 - - /* r13 = paca, r10 = entry, r11 = vsid */ - - /* Put together slb word1 */ - sldi r11,r11,12 - -BEGIN_FTR_SECTION - /* set kp and c bits */ - ori r11,r11,0x480 -END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) -BEGIN_FTR_SECTION - /* set kp, l and c bits */ - ori r11,r11,0x580 -END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) - - /* r13 = paca, r10 = entry, r11 = slb word1 */ - - /* Put together slb word0 */ - mfspr r9,DAR - clrrdi r9,r9,28 /* get the new esid */ - oris r9,r9,0x800 /* set valid bit */ - rldimi r9,r10,0,52 /* insert entry */ - - /* r13 = paca, r9 = slb word0, r11 = slb word1 */ - - /* - * No need for an isync before or after this slbmte. The exception - * we enter with and the rfid we exit with are context synchronizing . - */ - slbmte r11,r9 + bl .slb_allocate /* handle it */ /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACASLBR3(r13) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ + mtlr r10 + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ beq- unrecov_slb - /* - * Until everyone updates binutils hardwire the POWER4 optimised - * single field mtcrf - */ -#if 0 - .machine push - .machine "power4" +.machine push +.machine "power4" mtcrf 0x80,r9 - .machine pop -#else - .long 0x7d380120 -#endif - - mfmsr r10 - clrrdi r10,r10,2 - mtmsrd r10,1 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop mtspr SRR0,r11 mtspr SRR1,r12 diff -puN arch/ppc64/kernel/pacaData.c~ppc64-improve-slb-reload arch/ppc64/kernel/pacaData.c --- 25/arch/ppc64/kernel/pacaData.c~ppc64-improve-slb-reload 2004-08-01 23:00:03.011075184 -0700 +++ 25-akpm/arch/ppc64/kernel/pacaData.c 2004-08-01 23:00:03.032071992 -0700 @@ -57,7 +57,6 @@ char emergency_stack[PAGE_SIZE * NR_CPUS .stab_addr = (asrv), /* Virt pointer to segment table */ \ .emergency_sp = &emergency_stack[((number)+1) * PAGE_SIZE], \ .cpu_start = (start), /* Processor start */ \ - .stab_next_rr = 1, \ .lppaca = { \ .xDesc = 0xd397d781, /* "LpPa" */ \ .xSize = sizeof(struct ItLpPaca), \ diff -puN arch/ppc64/kernel/smp.c~ppc64-improve-slb-reload arch/ppc64/kernel/smp.c --- 25/arch/ppc64/kernel/smp.c~ppc64-improve-slb-reload 2004-08-01 23:00:03.013074880 -0700 +++ 25-akpm/arch/ppc64/kernel/smp.c 2004-08-01 23:00:03.033071840 -0700 @@ -389,8 +389,6 @@ static inline int __devinit smp_startup_ /* Fixup atomic count: it exited inside IRQ handler. */ paca[lcpu].__current->thread_info->preempt_count = 0; - /* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */ - paca[lcpu].stab_next_rr = 0; /* At boot this is done in prom.c. */ paca[lcpu].hw_cpu_id = pcpu; diff -puN arch/ppc64/kernel/stab.c~ppc64-improve-slb-reload arch/ppc64/kernel/stab.c --- 25/arch/ppc64/kernel/stab.c~ppc64-improve-slb-reload 2004-08-01 23:00:03.014074728 -0700 +++ 25-akpm/arch/ppc64/kernel/stab.c 2004-08-01 23:00:03.035071536 -0700 @@ -20,26 +20,10 @@ #include #include -static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid); -static void make_slbe(unsigned long esid, unsigned long vsid, int large, - int kernel_segment); +static int make_ste(unsigned long stab, unsigned long esid, + unsigned long vsid); -static inline void slb_add_bolted(void) -{ -#ifndef CONFIG_PPC_ISERIES - unsigned long esid = GET_ESID(VMALLOCBASE); - unsigned long vsid = get_kernel_vsid(VMALLOCBASE); - - WARN_ON(!irqs_disabled()); - - /* - * Bolt in the first vmalloc segment. Since modules end - * up there it gets hit very heavily. - */ - get_paca()->stab_next_rr = 1; - make_slbe(esid, vsid, 0, 1); -#endif -} +void slb_initialize(void); /* * Build an entry for the base kernel segment and put it into @@ -48,32 +32,13 @@ static inline void slb_add_bolted(void) */ void stab_initialize(unsigned long stab) { - unsigned long esid, vsid; - int seg0_largepages = 0; - - esid = GET_ESID(KERNELBASE); - vsid = get_kernel_vsid(esid << SID_SHIFT); - - if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) - seg0_largepages = 1; + unsigned long vsid = get_kernel_vsid(KERNELBASE); if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - /* Invalidate the entire SLB & all the ERATS */ -#ifdef CONFIG_PPC_ISERIES - asm volatile("isync; slbia; isync":::"memory"); -#else - asm volatile("isync":::"memory"); - asm volatile("slbmte %0,%0"::"r" (0) : "memory"); - asm volatile("isync; slbia; isync":::"memory"); - get_paca()->stab_next_rr = 0; - make_slbe(esid, vsid, seg0_largepages, 1); - asm volatile("isync":::"memory"); -#endif - - slb_add_bolted(); + slb_initialize(); } else { asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, esid, vsid); + make_ste(stab, GET_ESID(KERNELBASE), vsid); /* Order update */ asm volatile("sync":::"memory"); @@ -129,7 +94,7 @@ static int make_ste(unsigned long stab, * Could not find empty entry, pick one with a round robin selection. * Search all entries in the two groups. */ - castout_entry = get_paca()->stab_next_rr; + castout_entry = get_paca()->stab_rr; for (i = 0; i < 16; i++) { if (castout_entry < 8) { global_entry = (esid & 0x1f) << 3; @@ -148,7 +113,7 @@ static int make_ste(unsigned long stab, castout_entry = (castout_entry + 1) & 0xf; } - get_paca()->stab_next_rr = (castout_entry + 1) & 0xf; + get_paca()->stab_rr = (castout_entry + 1) & 0xf; /* Modify the old entry to the new value. */ @@ -314,229 +279,3 @@ void flush_stab(struct task_struct *tsk, preload_stab(tsk, mm); } - -/* - * SLB stuff - */ - -/* - * Create a segment buffer entry for the given esid/vsid pair. - * - * NOTE: A context syncronising instruction is required before and after - * this, in the common case we use exception entry and rfid. - */ -static void make_slbe(unsigned long esid, unsigned long vsid, int large, - int kernel_segment) -{ - unsigned long entry, castout_entry; - union { - unsigned long word0; - slb_dword0 data; - } esid_data; - union { - unsigned long word0; - slb_dword1 data; - } vsid_data; - struct paca_struct *lpaca = get_paca(); - - /* - * We take the next entry, round robin. Previously we tried - * to find a free slot first but that took too long. Unfortunately - * we dont have any LRU information to help us choose a slot. - */ - - /* - * Never cast out the segment for our kernel stack. Since we - * dont invalidate the ERAT we could have a valid translation - * for the kernel stack during the first part of exception exit - * which gets invalidated due to a tlbie from another cpu at a - * non recoverable point (after setting srr0/1) - Anton - * - * paca Ksave is always valid (even when on the interrupt stack) - * so we use that. - */ - castout_entry = lpaca->stab_next_rr; - do { - entry = castout_entry; - castout_entry++; - /* - * We bolt in the first kernel segment and the first - * vmalloc segment. - */ - if (castout_entry >= SLB_NUM_ENTRIES) - castout_entry = 2; - asm volatile("slbmfee %0,%1" : "=r" (esid_data) : "r" (entry)); - } while (esid_data.data.v && - esid_data.data.esid == GET_ESID(lpaca->kstack)); - - lpaca->stab_next_rr = castout_entry; - - /* slbie not needed as the previous mapping is still valid. */ - - /* - * Write the new SLB entry. - */ - vsid_data.word0 = 0; - vsid_data.data.vsid = vsid; - vsid_data.data.kp = 1; - if (large) - vsid_data.data.l = 1; - if (kernel_segment) - vsid_data.data.c = 1; - else - vsid_data.data.ks = 1; - - esid_data.word0 = 0; - esid_data.data.esid = esid; - esid_data.data.v = 1; - esid_data.data.index = entry; - - /* - * No need for an isync before or after this slbmte. The exception - * we enter with and the rfid we exit with are context synchronizing. - */ - asm volatile("slbmte %0,%1" : : "r" (vsid_data), "r" (esid_data)); -} - -static inline void __slb_allocate(unsigned long esid, unsigned long vsid, - mm_context_t context) -{ - int large = 0; - int region_id = REGION_ID(esid << SID_SHIFT); - unsigned long offset; - - if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) { - if (region_id == KERNEL_REGION_ID) - large = 1; - else if (region_id == USER_REGION_ID) - large = in_hugepage_area(context, esid << SID_SHIFT); - } - - make_slbe(esid, vsid, large, region_id != USER_REGION_ID); - - if (region_id != USER_REGION_ID) - return; - - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = esid; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; -} - -/* - * Allocate a segment table entry for the given ea. - */ -int slb_allocate(unsigned long ea) -{ - unsigned long vsid, esid; - mm_context_t context; - - /* Check for invalid effective addresses. */ - if (unlikely(!IS_VALID_EA(ea))) - return 1; - - /* Kernel or user address? */ - if (REGION_ID(ea) >= KERNEL_REGION_ID) { - context = KERNEL_CONTEXT(ea); - vsid = get_kernel_vsid(ea); - } else { - if (unlikely(!current->mm)) - return 1; - - context = current->mm->context; - vsid = get_vsid(context.id, ea); - } - - esid = GET_ESID(ea); -#ifndef CONFIG_PPC_ISERIES - BUG_ON((esid << SID_SHIFT) == VMALLOCBASE); -#endif - __slb_allocate(esid, vsid, context); - - return 0; -} - -/* - * preload some userspace segments into the SLB. - */ -static void preload_slb(struct task_struct *tsk, struct mm_struct *mm) -{ - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - unsigned long pc_esid = GET_ESID(pc); - unsigned long stack_esid = GET_ESID(stack); - unsigned long unmapped_base_esid; - unsigned long vsid; - - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - unmapped_base_esid = GET_ESID(unmapped_base); - - if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, pc); - __slb_allocate(pc_esid, vsid, mm->context); - - if (pc_esid == stack_esid) - return; - - if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, stack); - __slb_allocate(stack_esid, vsid, mm->context); - - if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) - return; - - if (!IS_VALID_EA(unmapped_base) || - (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, unmapped_base); - __slb_allocate(unmapped_base_esid, vsid, mm->context); -} - -/* Flush all user entries from the segment table of the current processor. */ -void flush_slb(struct task_struct *tsk, struct mm_struct *mm) -{ - unsigned long offset = __get_cpu_var(stab_cache_ptr); - union { - unsigned long word0; - slb_dword0 data; - } esid_data; - - if (offset <= NR_STAB_CACHE_ENTRIES) { - int i; - asm volatile("isync" : : : "memory"); - for (i = 0; i < offset; i++) { - esid_data.word0 = 0; - esid_data.data.esid = __get_cpu_var(stab_cache[i]); - BUG_ON(esid_data.data.esid == GET_ESID(VMALLOCBASE)); - asm volatile("slbie %0" : : "r" (esid_data)); - } - asm volatile("isync" : : : "memory"); - } else { - asm volatile("isync; slbia; isync" : : : "memory"); - slb_add_bolted(); - } - - /* Workaround POWER5 < DD2.1 issue */ - if (offset == 1 || offset > NR_STAB_CACHE_ENTRIES) { - /* - * flush segment in EEH region, we dont normally access - * addresses in this region. - */ - esid_data.word0 = 0; - esid_data.data.esid = EEH_REGION_ID; - asm volatile("slbie %0" : : "r" (esid_data)); - } - - __get_cpu_var(stab_cache_ptr) = 0; - - preload_slb(tsk, mm); -} diff -puN arch/ppc64/mm/fault.c~ppc64-improve-slb-reload arch/ppc64/mm/fault.c --- 25/arch/ppc64/mm/fault.c~ppc64-improve-slb-reload 2004-08-01 23:00:03.016074424 -0700 +++ 25-akpm/arch/ppc64/mm/fault.c 2004-08-01 23:00:03.036071384 -0700 @@ -93,13 +93,15 @@ int do_page_fault(struct pt_regs *regs, unsigned long is_write = error_code & 0x02000000; unsigned long trap = TRAP(regs); - if (trap == 0x300 || trap == 0x380) { + BUG_ON((trap == 0x380) || (trap == 0x480)); + + if (trap == 0x300) { if (debugger_fault_handler(regs)) return 0; } /* On a kernel SLB miss we can only check for a valid exception entry */ - if (!user_mode(regs) && (trap == 0x380 || address >= TASK_SIZE)) + if (!user_mode(regs) && (address >= TASK_SIZE)) return SIGSEGV; if (error_code & 0x00400000) { diff -puN arch/ppc64/mm/Makefile~ppc64-improve-slb-reload arch/ppc64/mm/Makefile --- 25/arch/ppc64/mm/Makefile~ppc64-improve-slb-reload 2004-08-01 23:00:03.017074272 -0700 +++ 25-akpm/arch/ppc64/mm/Makefile 2004-08-01 23:00:03.036071384 -0700 @@ -4,6 +4,6 @@ EXTRA_CFLAGS += -mno-minimal-toc -obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o +obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o slb_low.o slb.o obj-$(CONFIG_DISCONTIGMEM) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff -puN /dev/null arch/ppc64/mm/slb.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/slb.c 2004-08-01 23:00:03.037071232 -0700 @@ -0,0 +1,136 @@ +/* + * PowerPC64 SLB support. + * + * Copyright (C) 2004 David Gibson , IBM + * Based on earlier code writteh by: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard , IBM + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +extern void slb_allocate(unsigned long ea); + +static inline void create_slbe(unsigned long ea, unsigned long vsid, + unsigned long flags, unsigned long entry) +{ + ea = (ea & ESID_MASK) | SLB_ESID_V | entry; + vsid = (vsid << SLB_VSID_SHIFT) | flags; + asm volatile("slbmte %0,%1" : + : "r" (vsid), "r" (ea) + : "memory" ); +} + +static void slb_add_bolted(void) +{ +#ifndef CONFIG_PPC_ISERIES + WARN_ON(!irqs_disabled()); + + /* If you change this make sure you change SLB_NUM_BOLTED + * appropriately too */ + + /* Slot 1 - first VMALLOC segment + * Since modules end up there it gets hit very heavily. + */ + create_slbe(VMALLOCBASE, get_kernel_vsid(VMALLOCBASE), + SLB_VSID_KERNEL, 1); + + asm volatile("isync":::"memory"); +#endif +} + +/* Flush all user entries from the segment table of the current processor. */ +void switch_slb(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long esid_data; + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + + if (offset <= SLB_CACHE_ENTRIES) { + int i; + asm volatile("isync" : : : "memory"); + for (i = 0; i < offset; i++) { + esid_data = (unsigned long)get_paca()->slb_cache[i] + << SID_SHIFT; + asm volatile("slbie %0" : : "r" (esid_data)); + } + asm volatile("isync" : : : "memory"); + } else { + asm volatile("isync; slbia; isync" : : : "memory"); + slb_add_bolted(); + } + + /* Workaround POWER5 < DD2.1 issue */ + if (offset == 1 || offset > SLB_CACHE_ENTRIES) { + /* flush segment in EEH region, we shouldn't ever + * access addresses in this region. */ + asm volatile("slbie %0" : : "r"(EEHREGIONBASE)); + } + + get_paca()->slb_cache_ptr = 0; + get_paca()->context = mm->context; + + /* + * preload some userspace segments into the SLB. + */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + if (pc >= KERNELBASE) + return; + slb_allocate(pc); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + if (stack >= KERNELBASE) + return; + slb_allocate(stack); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + if (unmapped_base >= KERNELBASE) + return; + slb_allocate(unmapped_base); +} + +void slb_initialize(void) +{ +#ifdef CONFIG_PPC_ISERIES + asm volatile("isync; slbia; isync":::"memory"); +#else + unsigned long flags = SLB_VSID_KERNEL; + + /* Invalidate the entire SLB (even slot 0) & all the ERATS */ + if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) + flags |= SLB_VSID_L; + + asm volatile("isync":::"memory"); + asm volatile("slbmte %0,%0"::"r" (0) : "memory"); + asm volatile("isync; slbia; isync":::"memory"); + create_slbe(KERNELBASE, get_kernel_vsid(KERNELBASE), + flags, 0); + +#endif + slb_add_bolted(); + get_paca()->stab_rr = SLB_NUM_BOLTED; +} diff -puN /dev/null arch/ppc64/mm/slb_low.S --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/slb_low.S 2004-08-01 23:00:03.038071080 -0700 @@ -0,0 +1,168 @@ +/* + * arch/ppc64/mm/slb_low.S + * + * Low-level SLB routines + * + * Copyright (C) 2004 David Gibson , IBM + * + * Based on earlier C version: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* void slb_allocate(unsigned long ea); + * + * Create an SLB entry for the given EA (user or kernel). + * r3 = faulting address, r13 = PACA + * r9, r10, r11 are clobbered by this function + * No other registers are examined or changed. + */ +_GLOBAL(slb_allocate) + /* + * First find a slot, round robin. Previously we tried to find + * a free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ + srdi r9,r1,27 + ori r9,r9,1 /* mangle SP for later compare */ + + ld r10,PACASTABRR(r13) +3: + addi r10,r10,1 + /* use a cpu feature mask if we ever change our slb size */ + cmpldi r10,SLB_NUM_ENTRIES + + blt+ 4f + li r10,SLB_NUM_BOLTED +4: + slbmfee r11,r10 + /* Don't throw out the segment for our kernel stack. Since we + * dont invalidate the ERAT we could have a valid translation + * for the kernel stack during the first part of exception + * exit which gets invalidated due to a tlbie from another cpu + * at a non recoverable point (after setting srr0/1) - Anton + * + * The >> 27 (rather than >> 28) is so that the LSB is the + * valid bit - this way we check valid and ESID in one compare. + */ + srdi r11,r11,27 + cmpd r11,r9 + beq- 3b + + std r10,PACASTABRR(r13) + + /* r3 = faulting address, r10 = entry */ + + srdi r9,r3,60 /* get region */ + srdi r3,r3,28 /* get esid */ + cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + + /* r9 = region, r3 = esid, cr7 = <>KERNELBASE */ + + rldicr. r11,r3,32,16 + bne- 8f /* invalid ea bits set */ + addi r11,r9,-1 + cmpldi r11,0xb + blt- 8f /* invalid region */ + + /* r9 = region, r3 = esid, r10 = entry, cr7 = <>KERNELBASE */ + + blt cr7,0f /* user or kernel? */ + + /* kernel address */ + li r11,SLB_VSID_KERNEL +BEGIN_FTR_SECTION + bne cr7,9f + li r11,(SLB_VSID_KERNEL|SLB_VSID_L) +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) + b 9f + +0: /* user address */ + li r11,SLB_VSID_USER +#ifdef CONFIG_HUGETLB_PAGE +BEGIN_FTR_SECTION + /* check against the hugepage ranges */ + cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) + bge 6f /* >= TASK_HPAGE_END */ + cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) + bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ + cmpldi r3,16 + bge 6f /* 4GB..TASK_HPAGE_BASE */ + + lhz r9,PACAHTLBSEGS(r13) + srd r9,r9,r3 + andi. r9,r9,1 + beq 6f + +5: /* this is a hugepage user address */ + li r11,(SLB_VSID_USER|SLB_VSID_L) +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) +#endif /* CONFIG_HUGETLB_PAGE */ + +6: ld r9,PACACONTEXTID(r13) + +9: /* r9 = "context", r3 = esid, r11 = flags, r10 = entry */ + + rldimi r9,r3,15,0 /* r9= VSID ordinal */ + +7: rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ + oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */ + + /* r9 = ordinal, r3 = esid, r11 = flags, r10 = esid_data */ + + li r3,VSID_RANDOMIZER@higher + sldi r3,r3,32 + oris r3,r3,VSID_RANDOMIZER@h + ori r3,r3,VSID_RANDOMIZER@l + + mulld r9,r3,r9 /* r9 = ordinal * VSID_RANDOMIZER */ + clrldi r9,r9,28 /* r9 &= VSID_MASK */ + sldi r9,r9,SLB_VSID_SHIFT /* r9 <<= SLB_VSID_SHIFT */ + or r9,r9,r11 /* r9 |= flags */ + + /* r9 = vsid_data, r10 = esid_data, cr7 = <>KERNELBASE */ + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + */ + slbmte r9,r10 + + bgelr cr7 /* we're done for kernel addresses */ + + /* Update the slb cache */ + lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r3,SLB_CACHE_ENTRIES + bge 1f + + /* still room in the slb cache */ + sldi r11,r3,1 /* r11 = offset * sizeof(u16) */ + rldicl r10,r10,36,28 /* get low 16 bits of the ESID */ + add r11,r11,r13 /* r11 = (u16 *)paca + offset */ + sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + addi r3,r3,1 /* offset++ */ + b 2f +1: /* offset >= SLB_CACHE_ENTRIES */ + li r3,SLB_CACHE_ENTRIES+1 +2: + sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + blr + +8: /* invalid EA */ + li r9,0 /* 0 VSID ordinal -> BAD_VSID */ + li r11,SLB_VSID_USER /* flags don't much matter */ + b 7b diff -puN include/asm-ppc64/mmu_context.h~ppc64-improve-slb-reload include/asm-ppc64/mmu_context.h --- 25/include/asm-ppc64/mmu_context.h~ppc64-improve-slb-reload 2004-08-01 23:00:03.019073968 -0700 +++ 25-akpm/include/asm-ppc64/mmu_context.h 2004-08-01 23:00:03.039070928 -0700 @@ -136,7 +136,7 @@ destroy_context(struct mm_struct *mm) } extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); -extern void flush_slb(struct task_struct *tsk, struct mm_struct *mm); +extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); /* * switch_mm is the entry point called from the architecture independent @@ -161,7 +161,7 @@ static inline void switch_mm(struct mm_s return; if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) - flush_slb(tsk, next); + switch_slb(tsk, next); else flush_stab(tsk, next); } @@ -181,10 +181,6 @@ static inline void activate_mm(struct mm local_irq_restore(flags); } -#define VSID_RANDOMIZER 42470972311UL -#define VSID_MASK 0xfffffffffUL - - /* This is only valid for kernel (including vmalloc, imalloc and bolted) EA's */ static inline unsigned long diff -puN include/asm-ppc64/mmu.h~ppc64-improve-slb-reload include/asm-ppc64/mmu.h --- 25/include/asm-ppc64/mmu.h~ppc64-improve-slb-reload 2004-08-01 23:00:03.020073816 -0700 +++ 25-akpm/include/asm-ppc64/mmu.h 2004-08-01 23:00:03.039070928 -0700 @@ -37,12 +37,6 @@ typedef struct { mm_context_t ctx = { .id = REGION_ID(ea), KERNEL_LOW_HPAGES}; \ ctx; }) -/* - * Hardware Segment Lookaside Buffer Entry - * This structure has been padded out to two 64b doublewords (actual SLBE's are - * 94 bits). This padding facilites use by the segment management - * instructions. - */ typedef struct { unsigned long esid: 36; /* Effective segment ID */ unsigned long resv0:20; /* Reserved */ @@ -71,35 +65,6 @@ typedef struct _STE { } dw1; } STE; -typedef struct { - unsigned long esid: 36; /* Effective segment ID */ - unsigned long v: 1; /* Entry valid (v=1) or invalid */ - unsigned long null1:15; /* padding to a 64b boundary */ - unsigned long index:12; /* Index to select SLB entry. Used by slbmte */ -} slb_dword0; - -typedef struct { - unsigned long vsid: 52; /* Virtual segment ID */ - unsigned long ks: 1; /* Supervisor (privileged) state storage key */ - unsigned long kp: 1; /* Problem state storage key */ - unsigned long n: 1; /* No-execute if n=1 */ - unsigned long l: 1; /* Virt pages are large (l=1) or 4KB (l=0) */ - unsigned long c: 1; /* Class */ - unsigned long resv0: 7; /* Padding to a 64b boundary */ -} slb_dword1; - -typedef struct { - union { - unsigned long dword0; - slb_dword0 dw0; - } dw0; - - union { - unsigned long dword1; - slb_dword1 dw1; - } dw1; -} SLBE; - /* Hardware Page Table Entry */ #define HPTES_PER_GROUP 8 @@ -259,6 +224,30 @@ extern void htab_finish_init(void); #define STAB0_PHYS_ADDR (STAB0_PAGE<> SID_SHIFT) & SID_MASK) #ifdef CONFIG_HUGETLB_PAGE @@ -37,8 +38,8 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) /* For 64-bit processes the hugepage range is 1T-1.5T */ -#define TASK_HPAGE_BASE (0x0000010000000000UL) -#define TASK_HPAGE_END (0x0000018000000000UL) +#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) +#define TASK_HPAGE_END ASM_CONST(0x0000018000000000) #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - (1U << GET_ESID(addr))) & 0xffff) _