diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2005-03-29 04:41:34 -0800 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-03-29 04:41:34 -0800 |
commit | aab9356ff0e79fe1c0188412c9f3de3d8584103a (patch) | |
tree | b188e8c0e1c0f3d4eb5de44d26d2c23d0fe77f3d | |
parent | a592a0d140435e410625f05c9f20fbfb8c8f7f6a (diff) | |
parent | 27649cce2886c85b367d2e2c98144c9a78526e66 (diff) | |
download | history-aab9356ff0e79fe1c0188412c9f3de3d8584103a.tar.gz |
Merge sunset.davemloft.net:/home/davem/src/BK/sparcwork-2.6
into sunset.davemloft.net:/home/davem/src/BK/sparc-2.6
55 files changed, 1448 insertions, 1619 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index b31687f3e7214..46a2436c9600c 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -16,6 +16,33 @@ config TIME_INTERPOLATION bool default y +choice + prompt "Kernel page size" + default SPARC64_PAGE_SIZE_8KB + +config SPARC64_PAGE_SIZE_8KB + bool "8KB" + help + This lets you select the page size of the kernel. + + 8KB and 64KB work quite well, since Sparc ELF sections + provide for up to 64KB alignment. + + Therefore, 512KB and 4MB are for expert hackers only. + + If you don't know what to do, choose 8KB. + +config SPARC64_PAGE_SIZE_64KB + bool "64KB" + +config SPARC64_PAGE_SIZE_512KB + bool "512KB" + +config SPARC64_PAGE_SIZE_4MB + bool "4MB" + +endchoice + source "init/Kconfig" config SYSVIPC_COMPAT @@ -198,9 +225,11 @@ config HUGETLB_PAGE_SIZE_4MB bool "4MB" config HUGETLB_PAGE_SIZE_512K + depends on !SPARC64_PAGE_SIZE_4MB bool "512K" config HUGETLB_PAGE_SIZE_64K + depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512K bool "64K" endchoice diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S index e6bc4a26aeb9b..b73a3c8587704 100644 --- a/arch/sparc64/kernel/dtlb_backend.S +++ b/arch/sparc64/kernel/dtlb_backend.S @@ -7,60 +7,143 @@ */ #include <asm/pgtable.h> -#include <asm/mmu_context.h> +#include <asm/mmu.h> #if PAGE_SHIFT == 13 -#define FILL_VALID_SZ_BITS1(r1) \ - sllx %g2, 62, r1 -#define FILL_VALID_SZ_BITS2(r1) -#define FILL_VALID_SZ_BITS_NOP nop +#define SZ_BITS _PAGE_SZ8K #elif PAGE_SHIFT == 16 -#define FILL_VALID_SZ_BITS1(r1) \ - or %g0, 5, r1 -#define FILL_VALID_SZ_BITS2(r1) \ - sllx r1, 61, r1 -#define FILL_VALID_SZ_BITS_NOP -#else -#error unsupported PAGE_SIZE -#endif /* PAGE_SHIFT */ +#define SZ_BITS _PAGE_SZ64K +#elif PAGE_SHIFT == 19 +#define SZ_BITS _PAGE_SZ512K +#elif PAGE_SHIFT == 22 +#define SZ_BITS _PAGE_SZ4M +#endif + +#define VALID_SZ_BITS (_PAGE_VALID | SZ_BITS) #define VPTE_BITS (_PAGE_CP | _PAGE_CV | _PAGE_P ) #define VPTE_SHIFT (PAGE_SHIFT - 3) -#define TLB_PMD_SHIFT (PAGE_SHIFT - 3 + 3) -#define TLB_PGD_SHIFT (PMD_BITS + PAGE_SHIFT - 3 + 3) -#define TLB_PMD_MASK (((1 << PMD_BITS) - 1) << 1) -#define TLB_PGD_MASK (((1 << (VA_BITS - PAGE_SHIFT - (PAGE_SHIFT - 3) - PMD_BITS)) - 1) << 2) /* Ways we can get here: * * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1. * 2) Nucleus loads and stores to/from user/kernel window save areas. * 3) VPTE misses from dtlb_base and itlb_base. + * + * We need to extract out the PMD and PGDIR indexes from the + * linear virtual page table access address. The PTE index + * is at the bottom, but we are not concerned with it. Bits + * 0 to 2 are clear since each PTE is 8 bytes in size. Each + * PMD and PGDIR entry are 4 bytes in size. Thus, this + * address looks something like: + * + * |---------------------------------------------------------------| + * | ... | PGDIR index | PMD index | PTE index | | + * |---------------------------------------------------------------| + * 63 F E D C B A 3 2 0 <- bit nr + * + * The variable bits above are defined as: + * A --> 3 + (PAGE_SHIFT - log2(8)) + * --> 3 + (PAGE_SHIFT - 3) - 1 + * (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1) + * B --> A + 1 + * C --> B + (PAGE_SHIFT - log2(4)) + * --> B + (PAGE_SHIFT - 2) - 1 + * (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1) + * D --> C + 1 + * E --> D + (PAGE_SHIFT - log2(4)) + * --> D + (PAGE_SHIFT - 2) - 1 + * (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1) + * F --> E + 1 + * + * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants + * cancel out.) + * + * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are: + * A --> 12 + * B --> 13 + * C --> 23 + * D --> 24 + * E --> 34 + * F --> 35 + * + * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are: + * A --> 15 + * B --> 16 + * C --> 29 + * D --> 30 + * E --> 43 + * F --> 44 + * + * Because bits both above and below each PGDIR and PMD index need to + * be masked out, and the index can be as long as 14 bits (when using a + * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions + * to extract each index out. + * + * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so + * we try to avoid using them for the entire operation. We could setup + * a mask anywhere from bit 31 down to bit 10 using the sethi instruction. + * + * We need a mask covering bits B --> C and one covering D --> E. + * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000. + * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000. + * The second in each set cannot be loaded with a single sethi + * instruction, because the upper bits are past bit 32. We would + * need to use a sethi + a shift. + * + * For the time being, we use 2 shifts and a simple "and" mask. + * We shift left to clear the bits above the index, we shift down + * to clear the bits below the index (sans the log2(4 or 8) bits) + * and a mask to clear the log2(4 or 8) bits. We need therefore + * define 4 shift counts, all of which are relative to PAGE_SHIFT. + * + * Although unsupportable for other reasons, this does mean that + * 512K and 4MB page sizes would be generaally supported by the + * kernel. (ELF binaries would break with > 64K PAGE_SIZE since + * the sections are only aligned that strongly). + * + * The operations performed for extraction are thus: + * + * ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3 + * */ +#define A (3 + (PAGE_SHIFT - 3) - 1) +#define B (A + 1) +#define C (B + (PAGE_SHIFT - 2) - 1) +#define D (C + 1) +#define E (D + (PAGE_SHIFT - 2) - 1) +#define F (E + 1) + +#define PMD_SHIFT_LEFT (64 - D) +#define PMD_SHIFT_RIGHT (64 - (D - B) - 2) +#define PGDIR_SHIFT_LEFT (64 - F) +#define PGDIR_SHIFT_RIGHT (64 - (F - D) - 2) +#define LOW_MASK_BITS 0x3 + /* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss */ ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS add %g3, %g3, %g5 ! Compute VPTE base cmp %g4, %g5 ! VPTE miss? bgeu,pt %xcc, 1f ! Continue here - andcc %g4, TAG_CONTEXT_BITS, %g5 ! From Nucleus? (for tl0 miss) - ba,pt %xcc, from_tl1_trap ! Fall to tl0 miss - rdpr %tl, %g5 ! For tl0 miss TL==3 test + andcc %g4, TAG_CONTEXT_BITS, %g5 ! tl0 miss Nucleus test + ba,a,pt %xcc, from_tl1_trap ! Fall to tl0 miss 1: sllx %g6, VPTE_SHIFT, %g4 ! Position TAG_ACCESS + or %g4, %g5, %g4 ! Prepare TAG_ACCESS /* TLB1 ** ICACHE line 2: Quick VPTE miss */ - or %g4, %g5, %g4 ! Prepare TAG_ACCESS mov TSB_REG, %g1 ! Grab TSB reg ldxa [%g1] ASI_DMMU, %g5 ! Doing PGD caching? - srlx %g6, (TLB_PMD_SHIFT - 1), %g1 ! Position PMD offset + sllx %g6, PMD_SHIFT_LEFT, %g1 ! Position PMD offset be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus? - and %g1, TLB_PMD_MASK, %g1 ! Mask PMD offset bits + srlx %g1, PMD_SHIFT_RIGHT, %g1 ! Mask PMD offset bits brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke - add %g1, %g1, %g1 ! Position PMD offset some more + andn %g1, LOW_MASK_BITS, %g1 ! Final PMD mask + sllx %g6, PGDIR_SHIFT_LEFT, %g5 ! Position PGD offset /* TLB1 ** ICACHE line 3: Quick VPTE miss */ - srlx %g6, (TLB_PGD_SHIFT - 2), %g5 ! Position PGD offset - and %g5, TLB_PGD_MASK, %g5 ! Mask PGD offset + srlx %g5, PGDIR_SHIFT_RIGHT, %g5 ! Mask PGD offset bits + andn %g5, LOW_MASK_BITS, %g5 ! Final PGD mask lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD brz,pn %g5, vpte_noent ! Valid? sparc64_kpte_continue: @@ -71,23 +154,28 @@ sparc64_vpte_continue: brz,pn %g5, vpte_noent ! Valid? /* TLB1 ** ICACHE line 4: Quick VPTE miss */ - FILL_VALID_SZ_BITS1(%g1) ! Put _PAGE_VALID into %g1 - FILL_VALID_SZ_BITS2(%g1) ! Put _PAGE_VALID into %g1 + mov (VALID_SZ_BITS >> 61), %g1 ! upper vpte into %g1 + sllx %g1, 61, %g1 ! finish calc or %g5, VPTE_BITS, %g5 ! Prepare VPTE data or %g5, %g1, %g5 ! ... mov TLB_SFSR, %g1 ! Restore %g1 value stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS retry ! Load PTE once again - FILL_VALID_SZ_BITS_NOP +#undef SZ_BITS +#undef VALID_SZ_BITS #undef VPTE_SHIFT -#undef TLB_PMD_SHIFT -#undef TLB_PGD_SHIFT #undef VPTE_BITS -#undef TLB_PMD_MASK -#undef TLB_PGD_MASK -#undef FILL_VALID_SZ_BITS1 -#undef FILL_VALID_SZ_BITS2 -#undef FILL_VALID_SZ_BITS_NOP +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef PMD_SHIFT_LEFT +#undef PMD_SHIFT_RIGHT +#undef PGDIR_SHIFT_LEFT +#undef PGDIR_SHIFT_RIGHT +#undef LOW_MASK_BITS diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S index 294fb44aeb2c9..ded2fed23fcc5 100644 --- a/arch/sparc64/kernel/dtlb_base.S +++ b/arch/sparc64/kernel/dtlb_base.S @@ -7,7 +7,7 @@ */ #include <asm/pgtable.h> -#include <asm/mmu_context.h> +#include <asm/mmu.h> /* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS) * %g2 (KERN_HIGHBITS | KERN_LOWBITS) @@ -68,8 +68,8 @@ /* DTLB ** ICACHE line 1: Quick user TLB misses */ ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? - mov 1, %g5 ! For TL==3 test from_tl1_trap: + rdpr %tl, %g5 ! For TL==3 test CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset be,pn %xcc, 3f ! Yep, special processing CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index c4b705d0e00ca..a47f2d0b1a29b 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -38,97 +38,150 @@ * range (note that this is only possible for instruction miss, data misses to * obp range do not use vpte). If so, go back directly to the faulting address. * This is because we want to read the tpc, otherwise we have no way of knowing - * the 8k aligned faulting address if we are using >8k kernel pagesize. This also - * ensures no vpte range addresses are dropped into tlb while obp is executing - * (see inherit_locked_prom_mappings() rant). + * the 8k aligned faulting address if we are using >8k kernel pagesize. This + * also ensures no vpte range addresses are dropped into tlb while obp is + * executing (see inherit_locked_prom_mappings() rant). */ sparc64_vpte_nucleus: + /* Load 0xf0000000, which is LOW_OBP_ADDRESS. */ mov 0xf, %g5 - sllx %g5, 28, %g5 ! Load 0xf0000000 - cmp %g4, %g5 ! Is addr >= LOW_OBP_ADDRESS? + sllx %g5, 28, %g5 + + /* Is addr >= LOW_OBP_ADDRESS? */ + cmp %g4, %g5 blu,pn %xcc, sparc64_vpte_patchme1 mov 0x1, %g5 - sllx %g5, 32, %g5 ! Load 0x100000000 - cmp %g4, %g5 ! Is addr < HI_OBP_ADDRESS? + + /* Load 0x100000000, which is HI_OBP_ADDRESS. */ + sllx %g5, 32, %g5 + + /* Is addr < HI_OBP_ADDRESS? */ + cmp %g4, %g5 blu,pn %xcc, obp_iaddr_patch nop + + /* These two instructions are patched by paginig_init(). */ sparc64_vpte_patchme1: - sethi %hi(0), %g5 ! This has to be patched + sethi %hi(0), %g5 sparc64_vpte_patchme2: - or %g5, %lo(0), %g5 ! This is patched too - ba,pt %xcc, sparc64_kpte_continue ! Part of dtlb_backend - add %g1, %g1, %g1 ! Finish PMD offset adjustment + or %g5, %lo(0), %g5 + + /* With kernel PGD in %g5, branch back into dtlb_backend. */ + ba,pt %xcc, sparc64_kpte_continue + andn %g1, 0x3, %g1 /* Finish PMD offset adjustment. */ vpte_noent: - mov TLB_SFSR, %g1 ! Restore %g1 value - stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS - done ! Slick trick + /* Restore previous TAG_ACCESS, %g5 is zero, and we will + * skip over the trap instruction so that the top level + * TLB miss handler will thing this %g5 value is just an + * invalid PTE, thus branching to full fault processing. + */ + mov TLB_SFSR, %g1 + stxa %g4, [%g1 + %g1] ASI_DMMU + done .globl obp_iaddr_patch - .globl obp_daddr_patch - obp_iaddr_patch: - sethi %hi(0), %g5 ! This and following is patched - or %g5, %lo(0), %g5 ! g5 now holds obp pmd base physaddr - wrpr %g0, 1, %tl ! Behave as if we are at TL0 - rdpr %tpc, %g4 ! Find original faulting iaddr - srlx %g4, 13, %g4 ! Throw out context bits - sllx %g4, 13, %g4 ! g4 has vpn + ctx0 now - mov TLB_SFSR, %g1 ! Restore %g1 value - stxa %g4, [%g1 + %g1] ASI_IMMU ! Restore previous TAG_ACCESS - srlx %g4, 23, %g6 ! Find pmd number - and %g6, 0x7ff, %g6 ! Find pmd number - sllx %g6, 2, %g6 ! Find pmd offset - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5! Load pmd, ie pagetable physaddr - brz,pn %g5, longpath ! Kill the PROM ? :-) - sllx %g5, 11, %g5 ! Shift into place - srlx %g4, 13, %g6 ! find pte number in pagetable - and %g6, 0x3ff, %g6 ! find pte number in pagetable - sllx %g6, 3, %g6 ! find pte offset in pagetable - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5! Load pte - brgez,pn %g5, longpath ! Kill the PROM ? :-) + /* These two instructions patched by inherit_prom_mappings(). */ + sethi %hi(0), %g5 + or %g5, %lo(0), %g5 + + /* Behave as if we are at TL0. */ + wrpr %g0, 1, %tl + rdpr %tpc, %g4 /* Find original faulting iaddr */ + srlx %g4, 13, %g4 /* Throw out context bits */ + sllx %g4, 13, %g4 /* g4 has vpn + ctx0 now */ + + /* Restore previous TAG_ACCESS. */ + mov TLB_SFSR, %g1 + stxa %g4, [%g1 + %g1] ASI_IMMU + + /* Get PMD offset. */ + srlx %g4, 23, %g6 + and %g6, 0x7ff, %g6 + sllx %g6, 2, %g6 + + /* Load PMD, is it valid? */ + lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 + brz,pn %g5, longpath + sllx %g5, 11, %g5 + + /* Get PTE offset. */ + srlx %g4, 13, %g6 + and %g6, 0x3ff, %g6 + sllx %g6, 3, %g6 + + /* Load PTE. */ + ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 + brgez,pn %g5, longpath nop - stxa %g5, [%g0] ASI_ITLB_DATA_IN ! put into tlb - retry ! go back to original fault + /* TLB load and return from trap. */ + stxa %g5, [%g0] ASI_ITLB_DATA_IN + retry + + .globl obp_daddr_patch obp_daddr_patch: - sethi %hi(0), %g5 ! This and following is patched - or %g5, %lo(0), %g5 ! g5 now holds obp pmd base physaddr - srlx %g4, 23, %g6 ! Find pmd number - and %g6, 0x7ff, %g6 ! Find pmd number - sllx %g6, 2, %g6 ! Find pmd offset - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5! Load pmd, ie pagetable physaddr + /* These two instructions patched by inherit_prom_mappings(). */ + sethi %hi(0), %g5 + or %g5, %lo(0), %g5 + + /* Get PMD offset. */ + srlx %g4, 23, %g6 + and %g6, 0x7ff, %g6 + sllx %g6, 2, %g6 + + /* Load PMD, is it valid? */ + lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 brz,pn %g5, longpath - sllx %g5, 11, %g5 ! Shift into place - srlx %g4, 13, %g6 ! find pte number in pagetable - and %g6, 0x3ff, %g6 ! find pte number in pagetable - sllx %g6, 3, %g6 ! find pte offset in pagetable - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5! Load pte + sllx %g5, 11, %g5 + + /* Get PTE offset. */ + srlx %g4, 13, %g6 + and %g6, 0x3ff, %g6 + sllx %g6, 3, %g6 + + /* Load PTE. */ + ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 brgez,pn %g5, longpath nop - stxa %g5, [%g0] ASI_DTLB_DATA_IN ! put into tlb + + /* TLB load and return from trap. */ + stxa %g5, [%g0] ASI_DTLB_DATA_IN retry /* - * On a first level data miss, check whether this is to the OBP range (note that - * such accesses can be made by prom, as well as by kernel using prom_getproperty - * on "address"), and if so, do not use vpte access ... rather, use information - * saved during inherit_prom_mappings() using 8k pagesize. + * On a first level data miss, check whether this is to the OBP range (note + * that such accesses can be made by prom, as well as by kernel using + * prom_getproperty on "address"), and if so, do not use vpte access ... + * rather, use information saved during inherit_prom_mappings() using 8k + * pagesize. */ kvmap: + /* Load 0xf0000000, which is LOW_OBP_ADDRESS. */ mov 0xf, %g5 - sllx %g5, 28, %g5 ! Load 0xf0000000 - cmp %g4, %g5 ! Is addr >= LOW_OBP_ADDRESS? + sllx %g5, 28, %g5 + + /* Is addr >= LOW_OBP_ADDRESS? */ + cmp %g4, %g5 blu,pn %xcc, vmalloc_addr mov 0x1, %g5 - sllx %g5, 32, %g5 ! Load 0x100000000 - cmp %g4, %g5 ! Is addr < HI_OBP_ADDRESS? + + /* Load 0x100000000, which is HI_OBP_ADDRESS. */ + sllx %g5, 32, %g5 + + /* Is addr < HI_OBP_ADDRESS? */ + cmp %g4, %g5 blu,pn %xcc, obp_daddr_patch nop -vmalloc_addr: ! vmalloc addr accessed - ldxa [%g3 + %g6] ASI_N, %g5 ! Yep, load k-vpte - brgez,pn %g5, longpath ! Valid, load into TLB + +vmalloc_addr: + /* If we get here, a vmalloc addr accessed, load kernel VPTE. */ + ldxa [%g3 + %g6] ASI_N, %g5 + brgez,pn %g5, longpath nop + + /* PTE is valid, load into TLB and return from trap. */ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB retry @@ -199,9 +252,11 @@ do_fpdis: faddd %f0, %f2, %f4 fmuld %f0, %f2, %f6 ldxa [%g3] ASI_DMMU, %g5 - add %g6, TI_FPREGS + 0xc0, %g2 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_1: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync + add %g6, TI_FPREGS + 0xc0, %g2 faddd %f0, %f2, %f8 fmuld %f0, %f2, %f10 ldda [%g1] ASI_BLK_S, %f32 ! grrr, where is ASI_BLK_NUCLEUS 8-( @@ -225,7 +280,9 @@ do_fpdis: fzero %f34 ldxa [%g3] ASI_DMMU, %g5 add %g6, TI_FPREGS, %g1 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_2: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync add %g6, TI_FPREGS + 0x40, %g2 faddd %f32, %f34, %f36 @@ -249,9 +306,11 @@ do_fpdis: 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 ldxa [%g3] ASI_DMMU, %g5 - mov 0x40, %g2 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_3: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync + mov 0x40, %g2 ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-( ldda [%g1 + %g2] ASI_BLK_S, %f16 add %g1, 0x80, %g1 @@ -412,10 +471,12 @@ do_fptrap_after_fsr: rd %gsr, %g3 stx %g3, [%g6 + TI_GSR] mov SECONDARY_CONTEXT, %g3 - add %g6, TI_FPREGS, %g2 ldxa [%g3] ASI_DMMU, %g5 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_4: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync + add %g6, TI_FPREGS, %g2 andcc %g1, FPRS_DL, %g0 be,pn %icc, 4f mov 0x40, %g3 @@ -433,6 +494,33 @@ do_fptrap_after_fsr: ba,pt %xcc, etrap wr %g0, 0, %fprs +cplus_fptrap_1: + sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 + + .globl cheetah_plus_patch_fpdis +cheetah_plus_patch_fpdis: + /* We configure the dTLB512_0 for 4MB pages and the + * dTLB512_1 for 8K pages when in context zero. + */ + sethi %hi(cplus_fptrap_1), %o0 + lduw [%o0 + %lo(cplus_fptrap_1)], %o1 + + set cplus_fptrap_insn_1, %o2 + stw %o1, [%o2] + flush %o2 + set cplus_fptrap_insn_2, %o2 + stw %o1, [%o2] + flush %o2 + set cplus_fptrap_insn_3, %o2 + stw %o1, [%o2] + flush %o2 + set cplus_fptrap_insn_4, %o2 + stw %o1, [%o2] + flush %o2 + + retl + nop + /* The registers for cross calls will be: * * DATA 0: [low 32-bits] Address of function to call, jmp to this @@ -1642,7 +1730,7 @@ ret_from_syscall: andn %o7, _TIF_NEWCHILD, %l0 stx %l0, [%g6 + TI_FLAGS] call schedule_tail - mov %g5, %o0 + mov %g7, %o0 andcc %l0, _TIF_PERFCTR, %g0 be,pt %icc, 1f nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index d50b755c7e9c3..058c505a32235 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -14,6 +14,7 @@ #include <asm/spitfire.h> #include <asm/head.h> #include <asm/processor.h> +#include <asm/mmu.h> #define TASK_REGOFF (THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ) #define ETRAP_PSTATE1 (PSTATE_RMO | PSTATE_PRIV) @@ -67,7 +68,13 @@ etrap_irq: wrpr %g3, 0, %otherwin wrpr %g2, 0, %wstate - stxa %g0, [%l4] ASI_DMMU +cplus_etrap_insn_1: + sethi %hi(0), %g3 + sllx %g3, 32, %g3 +cplus_etrap_insn_2: + sethi %hi(0), %g2 + or %g3, %g2, %g3 + stxa %g3, [%l4] ASI_DMMU flush %l6 wr %g0, ASI_AIUS, %asi 2: wrpr %g0, 0x0, %tl @@ -207,7 +214,13 @@ scetrap: rdpr %pil, %g2 mov PRIMARY_CONTEXT, %l4 wrpr %g3, 0, %otherwin wrpr %g2, 0, %wstate - stxa %g0, [%l4] ASI_DMMU +cplus_etrap_insn_3: + sethi %hi(0), %g3 + sllx %g3, 32, %g3 +cplus_etrap_insn_4: + sethi %hi(0), %g2 + or %g3, %g2, %g3 + stxa %g3, [%l4] ASI_DMMU flush %l6 mov ASI_AIUS, %l7 @@ -248,4 +261,38 @@ scetrap: rdpr %pil, %g2 #undef TASK_REGOFF #undef ETRAP_PSTATE1 -#undef ETRAP_PSTATE2 + +cplus_einsn_1: + sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3 +cplus_einsn_2: + sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 + + .globl cheetah_plus_patch_etrap +cheetah_plus_patch_etrap: + /* We configure the dTLB512_0 for 4MB pages and the + * dTLB512_1 for 8K pages when in context zero. + */ + sethi %hi(cplus_einsn_1), %o0 + sethi %hi(cplus_etrap_insn_1), %o2 + lduw [%o0 + %lo(cplus_einsn_1)], %o1 + or %o2, %lo(cplus_etrap_insn_1), %o2 + stw %o1, [%o2] + flush %o2 + sethi %hi(cplus_etrap_insn_3), %o2 + or %o2, %lo(cplus_etrap_insn_3), %o2 + stw %o1, [%o2] + flush %o2 + + sethi %hi(cplus_einsn_2), %o0 + sethi %hi(cplus_etrap_insn_2), %o2 + lduw [%o0 + %lo(cplus_einsn_2)], %o1 + or %o2, %lo(cplus_etrap_insn_2), %o2 + stw %o1, [%o2] + flush %o2 + sethi %hi(cplus_etrap_insn_4), %o2 + or %o2, %lo(cplus_etrap_insn_4), %o2 + stw %o1, [%o2] + flush %o2 + + retl + nop diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 4a286a8000b07..0d6f58dad2db7 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -25,6 +25,7 @@ #include <asm/dcu.h> #include <asm/head.h> #include <asm/ttable.h> +#include <asm/mmu.h> /* This section from from _start to sparc64_boot_end should fit into * 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space @@ -515,14 +516,31 @@ cheetah_tlb_fixup: membar #Sync mov 2, %g2 /* Set TLB type to cheetah+. */ - BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g5,g7,1f) + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f) mov 1, %g2 /* Set TLB type to cheetah. */ 1: sethi %hi(tlb_type), %g5 stw %g2, [%g5 + %lo(tlb_type)] - /* Patch copy/page operations to cheetah optimized versions. */ + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f) + ba,pt %xcc, 2f + nop + +1: /* Patch context register writes to support nucleus page + * size correctly. + */ + call cheetah_plus_patch_etrap + nop + call cheetah_plus_patch_rtrap + nop + call cheetah_plus_patch_fpdis + nop + call cheetah_plus_patch_winfixup + nop + + +2: /* Patch copy/page operations to cheetah optimized versions. */ call cheetah_patch_copyops nop call cheetah_patch_cachetlbops @@ -685,10 +703,23 @@ spitfire_vpte_base: call init_irqwork_curcpu nop - sethi %hi(sparc64_ttable_tl0), %g5 call prom_set_trap_table - mov %g5, %o0 + sethi %hi(sparc64_ttable_tl0), %o0 + + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f) + ba,pt %xcc, 2f + nop +1: /* Start using proper page size encodings in ctx register. */ + sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3 + mov PRIMARY_CONTEXT, %g1 + sllx %g3, 32, %g3 + sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 + or %g3, %g2, %g3 + stxa %g3, [%g1] ASI_DMMU + membar #Sync + +2: rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 wrpr %o1, 0, %pstate diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index b7c3277bb92ac..42e396112cfaa 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -250,6 +250,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 brnz,pn %l3, kern_rtt mov PRIMARY_CONTEXT, %l7 ldxa [%l7 + %l7] ASI_DMMU, %l0 +cplus_rtrap_insn_1: + sethi %hi(0), %l1 + sllx %l1, 32, %l1 + or %l0, %l1, %l0 stxa %l0, [%l7] ASI_DMMU flush %g6 rdpr %wstate, %l1 @@ -298,10 +302,10 @@ kern_fpucheck: ldub [%g6 + TI_FPDEPTH], %l5 andcc %l2, FPRS_FEF, %g0 be,pn %icc, 5f sll %o0, 3, %o5 - rd %fprs, %g5 + rd %fprs, %g1 - wr %g5, FPRS_FEF, %fprs - ldx [%o1 + %o5], %g5 + wr %g1, FPRS_FEF, %fprs + ldx [%o1 + %o5], %g1 add %g6, TI_XFSR, %o1 membar #StoreLoad | #LoadLoad sll %o0, 8, %o2 @@ -313,7 +317,7 @@ kern_fpucheck: ldub [%g6 + TI_FPDEPTH], %l5 ldda [%o4 + %o2] ASI_BLK_P, %f16 1: andcc %l2, FPRS_DU, %g0 be,pn %icc, 1f - wr %g5, 0, %gsr + wr %g1, 0, %gsr add %o2, 0x80, %o2 ldda [%o3 + %o2] ASI_BLK_P, %f32 ldda [%o4 + %o2] ASI_BLK_P, %f48 @@ -335,3 +339,21 @@ kern_fpucheck: ldub [%g6 + TI_FPDEPTH], %l5 wr %g0, FPRS_DU, %fprs ba,pt %xcc, rt_continue stb %l5, [%g6 + TI_FPDEPTH] + +cplus_rinsn_1: + sethi %uhi(CTX_CHEETAH_PLUS_NUC), %l1 + + .globl cheetah_plus_patch_rtrap +cheetah_plus_patch_rtrap: + /* We configure the dTLB512_0 for 4MB pages and the + * dTLB512_1 for 8K pages when in context zero. + */ + sethi %hi(cplus_rinsn_1), %o0 + sethi %hi(cplus_rtrap_insn_1), %o2 + lduw [%o0 + %lo(cplus_rinsn_1)], %o1 + or %o2, %lo(cplus_rtrap_insn_1), %o2 + stw %o1, [%o2] + flush %o2 + + retl + nop diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 9ddfcb9a19001..8094808d5ba5c 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c @@ -65,30 +65,25 @@ void up(struct semaphore *sem) __asm__ __volatile__("\n" " ! up sem(%0)\n" " membar #StoreLoad | #LoadLoad\n" -"1: lduw [%0], %%g5\n" -" add %%g5, 1, %%g7\n" -" cas [%0], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: lduw [%0], %%g1\n" +" add %%g1, 1, %%g7\n" +" cas [%0], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " addcc %%g7, 1, %%g0\n" " ble,pn %%icc, 3f\n" " membar #StoreLoad | #StoreStore\n" "2:\n" " .subsection 2\n" -"3: mov %0, %%g5\n" +"3: mov %0, %%g1\n" " save %%sp, -160, %%sp\n" -" mov %%g1, %%l1\n" -" mov %%g2, %%l2\n" -" mov %%g3, %%l3\n" " call %1\n" -" mov %%g5, %%o0\n" -" mov %%l1, %%g1\n" -" mov %%l2, %%g2\n" +" mov %%g1, %%o0\n" " ba,pt %%xcc, 2b\n" -" restore %%l3, %%g0, %%g3\n" +" restore\n" " .previous\n" : : "r" (sem), "i" (__up) - : "g5", "g7", "memory", "cc"); + : "g1", "g2", "g3", "g5", "g7", "memory", "cc"); } static void __sched __down(struct semaphore * sem) @@ -127,30 +122,25 @@ void __sched down(struct semaphore *sem) __asm__ __volatile__("\n" " ! down sem(%0)\n" -"1: lduw [%0], %%g5\n" -" sub %%g5, 1, %%g7\n" -" cas [%0], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: lduw [%0], %%g1\n" +" sub %%g1, 1, %%g7\n" +" cas [%0], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" " bl,pn %%icc, 3f\n" " membar #StoreLoad | #StoreStore\n" "2:\n" " .subsection 2\n" -"3: mov %0, %%g5\n" +"3: mov %0, %%g1\n" " save %%sp, -160, %%sp\n" -" mov %%g1, %%l1\n" -" mov %%g2, %%l2\n" -" mov %%g3, %%l3\n" " call %1\n" -" mov %%g5, %%o0\n" -" mov %%l1, %%g1\n" -" mov %%l2, %%g2\n" +" mov %%g1, %%o0\n" " ba,pt %%xcc, 2b\n" -" restore %%l3, %%g0, %%g3\n" +" restore\n" " .previous\n" : : "r" (sem), "i" (__down) - : "g5", "g7", "memory", "cc"); + : "g1", "g2", "g3", "g5", "g7", "memory", "cc"); } int down_trylock(struct semaphore *sem) @@ -175,20 +165,20 @@ int down_trylock(struct semaphore *sem) __asm__ __volatile__("\n" " ! down_trylock sem(%1) ret(%0)\n" -"1: lduw [%1], %%g5\n" -" sub %%g5, 1, %%g7\n" -" cmp %%g5, 1\n" +"1: lduw [%1], %%g1\n" +" sub %%g1, 1, %%g7\n" +" cmp %%g1, 1\n" " bl,pn %%icc, 2f\n" " mov 1, %0\n" -" cas [%1], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +" cas [%1], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " mov 0, %0\n" " membar #StoreLoad | #StoreStore\n" "2:\n" : "=&r" (ret) : "r" (sem) - : "g5", "g7", "memory", "cc"); + : "g1", "g7", "memory", "cc"); return ret; } @@ -237,31 +227,25 @@ int __sched down_interruptible(struct semaphore *sem) __asm__ __volatile__("\n" " ! down_interruptible sem(%2) ret(%0)\n" -"1: lduw [%2], %%g5\n" -" sub %%g5, 1, %%g7\n" -" cas [%2], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: lduw [%2], %%g1\n" +" sub %%g1, 1, %%g7\n" +" cas [%2], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" " bl,pn %%icc, 3f\n" " membar #StoreLoad | #StoreStore\n" "2:\n" " .subsection 2\n" -"3: mov %2, %%g5\n" +"3: mov %2, %%g1\n" " save %%sp, -160, %%sp\n" -" mov %%g1, %%l1\n" -" mov %%g2, %%l2\n" -" mov %%g3, %%l3\n" " call %3\n" -" mov %%g5, %%o0\n" -" mov %%l1, %%g1\n" -" mov %%l2, %%g2\n" -" mov %%l3, %%g3\n" +" mov %%g1, %%o0\n" " ba,pt %%xcc, 2b\n" -" restore %%o0, %%g0, %0\n" +" restore\n" " .previous\n" : "=r" (ret) : "0" (ret), "r" (sem), "i" (__down_interruptible) - : "g5", "g7", "memory", "cc"); + : "g1", "g2", "g3", "g5", "g7", "memory", "cc"); return ret; } diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 0c9ce2bb5100a..12c3d84b7460c 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -47,6 +47,7 @@ #include <asm/timer.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/mmu.h> #ifdef CONFIG_IP_PNP #include <net/ipconfig.h> @@ -157,11 +158,11 @@ int prom_callback(long *args) for_each_process(p) { mm = p->mm; - if (CTX_HWBITS(mm->context) == ctx) + if (CTX_NRBITS(mm->context) == ctx) break; } if (!mm || - CTX_HWBITS(mm->context) != ctx) + CTX_NRBITS(mm->context) != ctx) goto done; pgdp = pgd_offset(mm, va); @@ -187,12 +188,19 @@ int prom_callback(long *args) } if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) { + unsigned long kernel_pctx = 0; + + if (tlb_type == cheetah_plus) + kernel_pctx |= (CTX_CHEETAH_PLUS_NUC | + CTX_CHEETAH_PLUS_CTX0); + /* Spitfire Errata #32 workaround */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + : "r" (kernel_pctx), + "r" (PRIMARY_CONTEXT), + "i" (ASI_DMMU)); /* * Locked down tlb entry. diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 1441ef81b8abe..1cbc02aa27dd4 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -89,7 +89,6 @@ void __init smp_store_cpu_info(int id) cpu_data(id).pgcache_size = 0; cpu_data(id).pte_cache[0] = NULL; cpu_data(id).pte_cache[1] = NULL; - cpu_data(id).pgdcache_size = 0; cpu_data(id).pgd_cache = NULL; cpu_data(id).idle_volume = 1; } @@ -627,7 +626,10 @@ extern unsigned long xcall_flush_tlb_all_spitfire; extern unsigned long xcall_flush_tlb_all_cheetah; extern unsigned long xcall_report_regs; extern unsigned long xcall_receive_signal; + +#ifdef DCACHE_ALIASING_POSSIBLE extern unsigned long xcall_flush_dcache_page_cheetah; +#endif extern unsigned long xcall_flush_dcache_page_spitfire; #ifdef CONFIG_DEBUG_DCFLUSH @@ -637,7 +639,7 @@ extern atomic_t dcpage_flushes_xcall; static __inline__ void __local_flush_dcache_page(struct page *page) { -#if (L1DCACHE_SIZE > PAGE_SIZE) +#ifdef DCACHE_ALIASING_POSSIBLE __flush_dcache_page(page_address(page), ((tlb_type == spitfire) && page_mapping(page) != NULL)); @@ -672,11 +674,13 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) (u64) pg_addr, mask); } else { +#ifdef DCACHE_ALIASING_POSSIBLE data0 = ((u64)&xcall_flush_dcache_page_cheetah); cheetah_xcall_deliver(data0, __pa(pg_addr), 0, mask); +#endif } #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); @@ -709,10 +713,12 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) (u64) pg_addr, mask); } else { +#ifdef DCACHE_ALIASING_POSSIBLE data0 = ((u64)&xcall_flush_dcache_page_cheetah); cheetah_xcall_deliver(data0, __pa(pg_addr), 0, mask); +#endif } #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); @@ -1055,74 +1061,6 @@ void __init smp_tick_init(void) prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1; } -extern unsigned long cheetah_tune_scheduling(void); - -static void __init smp_tune_scheduling(void) -{ - unsigned long orig_flush_base, flush_base, flags, *p; - unsigned int ecache_size, order; - cycles_t tick1, tick2, raw; - int cpu_node; - - /* Approximate heuristic for SMP scheduling. It is an - * estimation of the time it takes to flush the L2 cache - * on the local processor. - * - * The ia32 chooses to use the L1 cache flush time instead, - * and I consider this complete nonsense. The Ultra can service - * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and - * L2 misses are what create extra bus traffic (ie. the "cost" - * of moving a process from one cpu to another). - */ - printk("SMP: Calibrating ecache flush... "); - if (tlb_type == cheetah || tlb_type == cheetah_plus) - return; - - cpu_find_by_instance(0, &cpu_node, NULL); - ecache_size = prom_getintdefault(cpu_node, - "ecache-size", (512 * 1024)); - if (ecache_size > (4 * 1024 * 1024)) - ecache_size = (4 * 1024 * 1024); - orig_flush_base = flush_base = - __get_free_pages(GFP_KERNEL, order = get_order(ecache_size)); - - if (flush_base != 0UL) { - local_irq_save(flags); - - /* Scan twice the size once just to get the TLB entries - * loaded and make sure the second scan measures pure misses. - */ - for (p = (unsigned long *)flush_base; - ((unsigned long)p) < (flush_base + (ecache_size<<1)); - p += (64 / sizeof(unsigned long))) - *((volatile unsigned long *)p); - - tick1 = tick_ops->get_tick(); - - __asm__ __volatile__("1:\n\t" - "ldx [%0 + 0x000], %%g1\n\t" - "ldx [%0 + 0x040], %%g2\n\t" - "ldx [%0 + 0x080], %%g3\n\t" - "ldx [%0 + 0x0c0], %%g5\n\t" - "add %0, 0x100, %0\n\t" - "cmp %0, %2\n\t" - "bne,pt %%xcc, 1b\n\t" - " nop" - : "=&r" (flush_base) - : "0" (flush_base), - "r" (flush_base + ecache_size) - : "g1", "g2", "g3", "g5"); - - tick2 = tick_ops->get_tick(); - - local_irq_restore(flags); - - raw = (tick2 - tick1); - - free_pages(orig_flush_base, order); - } -} - /* /proc/profile writes can call this, don't __init it please. */ static DEFINE_SPINLOCK(prof_setup_lock); @@ -1212,11 +1150,6 @@ void __init smp_cpus_done(unsigned int max_cpus) (long) num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); - - /* We want to run this with all the other cpus spinning - * in the kernel. - */ - smp_tune_scheduling(); } /* This needn't do anything as we do not sleep the cpu diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 3cec1ebb083b0..2f7a028dc37c2 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -59,6 +59,7 @@ #include <asm/ns87303.h> #include <asm/timer.h> #include <asm/cpudata.h> +#include <asm/rwsem.h> struct poll { int fd; @@ -174,6 +175,15 @@ EXPORT_SYMBOL(down_trylock); EXPORT_SYMBOL(down_interruptible); EXPORT_SYMBOL(up); +/* RW semaphores */ +EXPORT_SYMBOL(__down_read); +EXPORT_SYMBOL(__down_read_trylock); +EXPORT_SYMBOL(__down_write); +EXPORT_SYMBOL(__down_write_trylock); +EXPORT_SYMBOL(__up_read); +EXPORT_SYMBOL(__up_write); +EXPORT_SYMBOL(__downgrade_write); + /* Atomic counter implementation. */ EXPORT_SYMBOL(atomic_add); EXPORT_SYMBOL(atomic_add_ret); @@ -209,8 +219,11 @@ EXPORT_SYMBOL(__flushw_user); EXPORT_SYMBOL(tlb_type); EXPORT_SYMBOL(get_fb_unmapped_area); EXPORT_SYMBOL(flush_icache_range); + EXPORT_SYMBOL(flush_dcache_page); +#ifdef DCACHE_ALIASING_POSSIBLE EXPORT_SYMBOL(__flush_dcache_range); +#endif EXPORT_SYMBOL(mostek_lock); EXPORT_SYMBOL(mstk48t02_regs); diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index a9fa9a47074d8..567c91c77b20e 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -264,7 +264,7 @@ asmlinkage long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compa switch (call) { case SEMTIMEDOP: - if (third) + if (fifth) /* sign extend semid */ return compat_sys_semtimedop((int)first, compat_ptr(ptr), second, diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index f1d764b2d39b5..2c8f9344b4eea 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -15,6 +15,7 @@ #include <asm/spitfire.h> #include <asm/processor.h> #include <asm/thread_info.h> +#include <asm/mmu.h> .data .align 8 @@ -334,6 +335,20 @@ do_unlock: call init_irqwork_curcpu nop + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f) + ba,pt %xcc, 2f + nop + +1: /* Start using proper page size encodings in ctx register. */ + sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3 + mov PRIMARY_CONTEXT, %g1 + sllx %g3, 32, %g3 + sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 + or %g3, %g2, %g3 + stxa %g3, [%g1] ASI_DMMU + membar #Sync + +2: rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 wrpr %o1, 0, %pstate diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 7d0e96f00bd00..56b203a2af696 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -806,48 +806,6 @@ static void cheetah_flush_ecache_line(unsigned long physaddr) "i" (ASI_PHYS_USE_EC)); } -#ifdef CONFIG_SMP -unsigned long __init cheetah_tune_scheduling(void) -{ - unsigned long tick1, tick2, raw; - unsigned long flush_base = ecache_flush_physbase; - unsigned long flush_linesize = ecache_flush_linesize; - unsigned long flush_size = ecache_flush_size; - - /* Run through the whole cache to guarantee the timed loop - * is really displacing cache lines. - */ - __asm__ __volatile__("1: subcc %0, %4, %0\n\t" - " bne,pt %%xcc, 1b\n\t" - " ldxa [%2 + %0] %3, %%g0\n\t" - : "=&r" (flush_size) - : "0" (flush_size), "r" (flush_base), - "i" (ASI_PHYS_USE_EC), "r" (flush_linesize)); - - /* The flush area is 2 X Ecache-size, so cut this in half for - * the timed loop. - */ - flush_base = ecache_flush_physbase; - flush_linesize = ecache_flush_linesize; - flush_size = ecache_flush_size >> 1; - - tick1 = tick_ops->get_tick(); - - __asm__ __volatile__("1: subcc %0, %4, %0\n\t" - " bne,pt %%xcc, 1b\n\t" - " ldxa [%2 + %0] %3, %%g0\n\t" - : "=&r" (flush_size) - : "0" (flush_size), "r" (flush_base), - "i" (ASI_PHYS_USE_EC), "r" (flush_linesize)); - - tick2 = tick_ops->get_tick(); - - raw = (tick2 - tick1); - - return (raw - (raw >> 2)); -} -#endif - /* Unfortunately, the diagnostic access to the I-cache tags we need to * use to clear the thing interferes with I-cache coherency transactions. * diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index 8a9d3b6bfe5c9..82a29dae65ee7 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -379,8 +379,8 @@ void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn) printk(KERN_ALERT "Unable to handle kernel paging request in mna handler"); printk(KERN_ALERT " at virtual address %016lx\n",address); printk(KERN_ALERT "current->{mm,active_mm}->context = %016lx\n", - (current->mm ? current->mm->context : - current->active_mm->context)); + (current->mm ? CTX_HWBITS(current->mm->context) : + CTX_HWBITS(current->active_mm->context))); printk(KERN_ALERT "current->{mm,active_mm}->pgd = %016lx\n", (current->mm ? (unsigned long) current->mm->pgd : (unsigned long) current->active_mm->pgd)); diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index 3427d7a743e1f..74a06bef748b6 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -14,6 +14,25 @@ #include <asm/thread_info.h> .text + +set_pcontext: +cplus_winfixup_insn_1: + sethi %hi(0), %l1 + mov PRIMARY_CONTEXT, %g1 + sllx %l1, 32, %l1 +cplus_winfixup_insn_2: + sethi %hi(0), %g2 + or %l1, %g2, %l1 + stxa %l1, [%g1] ASI_DMMU + flush %g6 + retl + nop + +cplus_wfinsn_1: + sethi %uhi(CTX_CHEETAH_PLUS_NUC), %l1 +cplus_wfinsn_2: + sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 + .align 32 /* Here are the rules, pay attention. @@ -62,9 +81,8 @@ fill_fixup: wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. wrpr %g2, 0x0, %wstate ! This must be consistent. wrpr %g0, 0x0, %otherwin ! We know this. - mov PRIMARY_CONTEXT, %g1 ! Change contexts... - stxa %g0, [%g1] ASI_DMMU ! Back into the nucleus. - flush %g6 ! Flush instruction buffers + call set_pcontext ! Change contexts... + nop rdpr %pstate, %l1 ! Prepare to change globals. mov %g6, %o7 ! Get current. @@ -183,9 +201,8 @@ fill_fixup_mna: wrpr %g2, 0x0, %wstate ! This must be consistent. wrpr %g0, 0x0, %otherwin ! We know this. - mov PRIMARY_CONTEXT, %g1 ! Change contexts... - stxa %g0, [%g1] ASI_DMMU ! Back into the nucleus. - flush %g6 ! Flush instruction buffers + call set_pcontext ! Change contexts... + nop rdpr %pstate, %l1 ! Prepare to change globals. mov %g4, %o2 ! Setup args for mov %g5, %o1 ! final call to mem_address_unaligned. @@ -289,9 +306,8 @@ fill_fixup_dax: wrpr %g2, 0x0, %wstate ! This must be consistent. wrpr %g0, 0x0, %otherwin ! We know this. - mov PRIMARY_CONTEXT, %g1 ! Change contexts... - stxa %g0, [%g1] ASI_DMMU ! Back into the nucleus. - flush %g6 ! Flush instruction buffers + call set_pcontext ! Change contexts... + nop rdpr %pstate, %l1 ! Prepare to change globals. mov %g4, %o1 ! Setup args for mov %g5, %o2 ! final call to data_access_exception. @@ -368,3 +384,22 @@ window_dax_from_user_common: ba,pt %xcc, rtrap clr %l6 + + .globl cheetah_plus_patch_winfixup +cheetah_plus_patch_winfixup: + sethi %hi(cplus_wfinsn_1), %o0 + sethi %hi(cplus_winfixup_insn_1), %o2 + lduw [%o0 + %lo(cplus_wfinsn_1)], %o1 + or %o2, %lo(cplus_winfixup_insn_1), %o2 + stw %o1, [%o2] + flush %o2 + + sethi %hi(cplus_wfinsn_2), %o0 + sethi %hi(cplus_winfixup_insn_2), %o2 + lduw [%o0 + %lo(cplus_wfinsn_2)], %o1 + or %o2, %lo(cplus_winfixup_insn_2), %o2 + stw %o1, [%o2] + flush %o2 + + retl + nop diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 3cf408cb1695e..d58575a978d5b 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -7,7 +7,7 @@ EXTRA_CFLAGS := -Werror lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \ - VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ + bzero.o VIScsum.o VIScsumcopy.o \ VIScsumcopyusr.o VISsave.o atomic.o bitops.o \ U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index fffec2e3cef8e..ec0788236a816 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include <asm/visasm.h> #include <asm/asi.h> +#define GLOBAL_SPARE %g7 #else #define ASI_BLK_P 0xf0 #define FPRS_FEF 0x04 @@ -18,6 +19,7 @@ #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs #endif +#define GLOBAL_SPARE %g5 #endif #ifndef EX_LD @@ -123,7 +125,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %g2, 0 tne %xcc, 5 PREAMBLE - mov %o0, %g5 + mov %o0, %o4 cmp %o2, 0 be,pn %XCC, 85f or %o0, %o1, %o3 @@ -146,7 +148,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * of bytes to copy to make 'dst' 64-byte aligned. We pre- * subtract this from 'len'. */ - sub %o0, %o1, %o4 + sub %o0, %o1, GLOBAL_SPARE sub %g2, 0x40, %g2 sub %g0, %g2, %g2 sub %o2, %g2, %o2 @@ -156,11 +158,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %g1, 0x1, %g1 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST(STORE(stb, %o3, %o1 + %o4)) + EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 0x1, %o1 - add %o1, %o4, %o0 + add %o1, GLOBAL_SPARE, %o0 2: cmp %g2, 0x0 and %o1, 0x7, %g1 @@ -188,19 +190,19 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 3: membar #LoadStore | #StoreStore | #StoreLoad - subcc %o2, 0x40, %o4 + subcc %o2, 0x40, GLOBAL_SPARE add %o1, %g1, %g1 - andncc %o4, (0x40 - 1), %o4 + andncc GLOBAL_SPARE, (0x40 - 1), GLOBAL_SPARE srl %g1, 3, %g2 - sub %o2, %o4, %g3 + sub %o2, GLOBAL_SPARE, %g3 andn %o1, (0x40 - 1), %o1 and %g2, 7, %g2 andncc %g3, 0x7, %g3 fmovd %f0, %f2 sub %g3, 0x8, %g3 - sub %o2, %o4, %o2 + sub %o2, GLOBAL_SPARE, %o2 - add %g1, %o4, %g1 + add %g1, GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 EX_LD(LOAD_BLK(%o1, %f0)) @@ -208,7 +210,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %g3, %g1 EX_LD(LOAD_BLK(%o1, %f16)) add %o1, 0x40, %o1 - sub %o4, 0x80, %o4 + sub GLOBAL_SPARE, 0x80, GLOBAL_SPARE EX_LD(LOAD_BLK(%o1, %f32)) add %o1, 0x40, %o1 @@ -449,18 +451,18 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 2: membar #StoreLoad | #StoreStore VISExit retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .align 64 70: /* 16 < len <= (5 * 64) */ bne,pn %XCC, 75f sub %o0, %o1, %o3 -72: andn %o2, 0xf, %o4 +72: andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - subcc %o4, 0x10, %o4 + subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE EX_ST(STORE(stx, %o5, %o1 + %o3)) add %o1, 0x8, %o1 EX_ST(STORE(stx, %g1, %o1 + %o3)) @@ -512,10 +514,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o1, 0x7, %o1 EX_LD(LOAD(ldx, %o1, %g2)) sub %o3, %g1, %o3 - andn %o2, 0x7, %o4 + andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) - subcc %o4, 0x8, %o4 + subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 @@ -544,7 +546,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o1, 4, %o1 85: retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .align 32 90: EX_LD(LOAD(ldub, %o1, %g1)) @@ -553,6 +555,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bgu,pt %XCC, 90b add %o1, 1, %o1 retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/U3memcpy.S b/arch/sparc64/lib/U3memcpy.S index 8fe195a10bbad..7cae9cc6a204a 100644 --- a/arch/sparc64/lib/U3memcpy.S +++ b/arch/sparc64/lib/U3memcpy.S @@ -6,6 +6,7 @@ #ifdef __KERNEL__ #include <asm/visasm.h> #include <asm/asi.h> +#define GLOBAL_SPARE %g7 #else #define ASI_BLK_P 0xf0 #define FPRS_FEF 0x04 @@ -17,6 +18,7 @@ #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs #endif +#define GLOBAL_SPARE %g5 #endif #ifndef EX_LD @@ -84,7 +86,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %g2, 0 tne %xcc, 5 PREAMBLE - mov %o0, %g5 + mov %o0, %o4 cmp %o2, 0 be,pn %XCC, 85f or %o0, %o1, %o3 @@ -109,7 +111,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * of bytes to copy to make 'dst' 64-byte aligned. We pre- * subtract this from 'len'. */ - sub %o0, %o1, %o4 + sub %o0, %o1, GLOBAL_SPARE sub %g2, 0x40, %g2 sub %g0, %g2, %g2 sub %o2, %g2, %o2 @@ -119,11 +121,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %g1, 0x1, %g1 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST(STORE(stb, %o3, %o1 + %o4)) + EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 0x1, %o1 - add %o1, %o4, %o0 + add %o1, GLOBAL_SPARE, %o0 2: cmp %g2, 0x0 and %o1, 0x7, %g1 @@ -149,7 +151,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 3: LOAD(prefetch, %o1 + 0x000, #one_read) LOAD(prefetch, %o1 + 0x040, #one_read) - andn %o2, (0x40 - 1), %o4 + andn %o2, (0x40 - 1), GLOBAL_SPARE LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) @@ -173,10 +175,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) - subcc %o4, 0x80, %o4 + subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 bgu,pt %XCC, 1f - srl %o4, 6, %o3 + srl GLOBAL_SPARE, 6, %o3 ba,pt %xcc, 2f nop @@ -315,9 +317,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o0, %o1, %o3 72: - andn %o2, 0xf, %o4 + andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 -1: subcc %o4, 0x10, %o4 +1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) EX_ST(STORE(stx, %o5, %o1 + %o3)) @@ -372,10 +374,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o1, 0x7, %o1 EX_LD(LOAD(ldx, %o1, %g2)) sub %o3, %g1, %o3 - andn %o2, 0x7, %o4 + andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) - subcc %o4, 0x8, %o4 + subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 @@ -405,7 +407,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o1, 4, %o1 85: retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .align 32 90: @@ -415,6 +417,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bgu,pt %XCC, 90b add %o1, 1, %o1 retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/VISbzero.S b/arch/sparc64/lib/VISbzero.S deleted file mode 100644 index 06b697bab974b..0000000000000 --- a/arch/sparc64/lib/VISbzero.S +++ /dev/null @@ -1,274 +0,0 @@ -/* $Id: VISbzero.S,v 1.11 2001/03/15 08:51:24 anton Exp $ - * VISbzero.S: High speed clear operations utilizing the UltraSparc - * Visual Instruction Set. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include "VIS.h" - -#ifdef __KERNEL__ -#include <asm/visasm.h> - -#define EXN(x,y,a,b,z) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: ba VISbzerofixup_ret##z; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXC(x,y,a,b,c...) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: c; \ - ba VISbzerofixup_ret0; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXO1(x,y) \ -98: x,y; \ - .section __ex_table; \ - .align 4; \ - .word 98b, VISbzerofixup_reto1; \ - .text; \ - .align 4; -#define EX(x,y,a,b) EXN(x,y,a,b,0) -#define EX1(x,y,a,b) EXN(x,y,a,b,1) -#define EX2(x,y,a,b) EXN(x,y,a,b,2) -#define EXT(start,end,handler) \ - .section __ex_table; \ - .align 4; \ - .word start, 0, end, handler; \ - .text; \ - .align 4 -#else -#define EX(x,y,a,b) x,y -#define EX1(x,y,a,b) x,y -#define EX2(x,y,a,b) x,y -#define EXC(x,y,a,b,c...) x,y -#define EXO1(x,y) x,y -#define EXT(a,b,c) -#endif - -#define ZERO_BLOCKS(base, offset, source) \ - STX source, [base - offset - 0x38] ASINORMAL; \ - STX source, [base - offset - 0x30] ASINORMAL; \ - STX source, [base - offset - 0x28] ASINORMAL; \ - STX source, [base - offset - 0x20] ASINORMAL; \ - STX source, [base - offset - 0x18] ASINORMAL; \ - STX source, [base - offset - 0x10] ASINORMAL; \ - STX source, [base - offset - 0x08] ASINORMAL; \ - STX source, [base - offset - 0x00] ASINORMAL; - -#ifdef __KERNEL__ -#define RETL clr %o0 -#else -#define RETL mov %g3, %o0 -#endif - - /* Well, bzero is a lot easier to get right than bcopy... */ -#ifdef __KERNEL__ - .section __ex_table,#alloc - .section .fixup,#alloc,#execinstr -#endif - .text - .align 32 -#ifdef __KERNEL__ - .globl __bzero, __bzero_noasi -__bzero_noasi: - rd %asi, %g5 - ba,pt %xcc, __bzero+12 - mov %g5, %o4 -__bzero: - rd %asi, %g5 - wr %g0, ASI_P, %asi ! LSU Group - mov ASI_P, %o4 -#else - .globl bzero -bzero_private: -bzero: -#ifndef REGS_64BIT - srl %o1, 0, %o1 -#endif - mov %o0, %g3 -#endif - cmp %o1, 7 - bleu,pn %xcc, 17f - andcc %o0, 3, %o2 - be,a,pt %xcc, 4f - andcc %o0, 4, %g0 - cmp %o2, 3 - be,pn %xcc, 2f - EXO1(STB %g0, [%o0 + 0x00] ASINORMAL) - cmp %o2, 2 - be,pt %xcc, 2f - EX(STB %g0, [%o0 + 0x01] ASINORMAL, sub %o1, 1) - EX(STB %g0, [%o0 + 0x02] ASINORMAL, sub %o1, 2) -2: sub %o2, 4, %o2 - sub %o0, %o2, %o0 - add %o1, %o2, %o1 - andcc %o0, 4, %g0 -4: be,pt %xcc, 2f - cmp %o1, 128 - EXO1(STW %g0, [%o0] ASINORMAL) - sub %o1, 4, %o1 - add %o0, 4, %o0 -2: blu,pn %xcc, 9f - andcc %o0, 0x38, %o2 - be,pn %icc, 6f - mov 64, %o5 - andcc %o0, 8, %g0 - be,pn %icc, 1f - sub %o5, %o2, %o5 - EX(STX %g0, [%o0] ASINORMAL, sub %o1, 0) - add %o0, 8, %o0 -1: andcc %o5, 16, %g0 - be,pn %icc, 1f - sub %o1, %o5, %o1 - EX1(STX %g0, [%o0] ASINORMAL, add %g0, 0) - EX1(STX %g0, [%o0 + 8] ASINORMAL, sub %g0, 8) - add %o0, 16, %o0 -1: andcc %o5, 32, %g0 - be,pn %icc, 7f - andncc %o1, 0x3f, %o3 - EX(STX %g0, [%o0] ASINORMAL, add %o1, 32) - EX(STX %g0, [%o0 + 8] ASINORMAL, add %o1, 24) - EX(STX %g0, [%o0 + 16] ASINORMAL, add %o1, 16) - EX(STX %g0, [%o0 + 24] ASINORMAL, add %o1, 8) - add %o0, 32, %o0 -6: andncc %o1, 0x3f, %o3 -7: be,pn %xcc, 9f -#ifdef __KERNEL__ - or %o4, ASI_BLK_OR, %g7 - wr %g7, %g0, %asi - VISEntryHalf -#else - wr %g0, ASI_BLK_P, %asi -#endif - membar #StoreLoad | #StoreStore | #LoadStore - fzero %f0 - andcc %o3, 0xc0, %o2 - and %o1, 0x3f, %o1 - fzero %f2 - andn %o3, 0xff, %o3 - faddd %f0, %f2, %f4 - fmuld %f0, %f2, %f6 - cmp %o2, 64 - faddd %f0, %f2, %f8 - fmuld %f0, %f2, %f10 - faddd %f0, %f2, %f12 - brz,pn %o2, 10f - fmuld %f0, %f2, %f14 - be,pn %icc, 2f - EXC(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o2, add %o2, %o1, %o2) - cmp %o2, 128 - be,pn %icc, 2f - EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 64, %o2) - EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 128, %o2) -2: brz,pn %o3, 12f - add %o0, %o2, %o0 -10: EX(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o1) - EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o1, sub %o1, 64, %o1) - EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o1, sub %o1, 128, %o1) - EXC(STBLK %f0, [%o0 + 0xc0] ASIBLK, add %o3, %o1, sub %o1, 192, %o1) -11: subcc %o3, 256, %o3 - bne,pt %xcc, 10b - add %o0, 256, %o0 -12: -#ifdef __KERNEL__ - VISExitHalf - wr %o4, 0x0, %asi -#else -#ifndef REGS_64BIT - wr %g0, FPRS_FEF, %fprs -#endif -#endif - membar #StoreLoad | #StoreStore -9: andcc %o1, 0xf8, %o2 - be,pn %xcc, 13f - andcc %o1, 7, %o1 -#ifdef __KERNEL__ -14: sethi %hi(13f), %o4 - srl %o2, 1, %o3 - sub %o4, %o3, %o4 - jmpl %o4 + %lo(13f), %g0 - add %o0, %o2, %o0 -#else -14: rd %pc, %o4 - srl %o2, 1, %o3 - sub %o4, %o3, %o4 - jmpl %o4 + (13f - 14b), %g0 - add %o0, %o2, %o0 -#endif -12: ZERO_BLOCKS(%o0, 0xc8, %g0) - ZERO_BLOCKS(%o0, 0x88, %g0) - ZERO_BLOCKS(%o0, 0x48, %g0) - ZERO_BLOCKS(%o0, 0x08, %g0) - EXT(12b,13f,VISbzerofixup_zb) -13: be,pn %xcc, 8f - andcc %o1, 4, %g0 - be,pn %xcc, 1f - andcc %o1, 2, %g0 - EX(STW %g0, [%o0] ASINORMAL, and %o1, 7) - add %o0, 4, %o0 -1: be,pn %xcc, 1f - andcc %o1, 1, %g0 - EX(STH %g0, [%o0] ASINORMAL, and %o1, 3) - add %o0, 2, %o0 -1: bne,a,pn %xcc, 8f - EX(STB %g0, [%o0] ASINORMAL, add %g0, 1) -8: -#ifdef __KERNEL__ - wr %g5, %g0, %asi -#endif - retl - RETL -17: be,pn %xcc, 13b - orcc %o1, 0, %g0 - be,pn %xcc, 0f -8: add %o0, 1, %o0 - subcc %o1, 1, %o1 - bne,pt %xcc, 8b - EX(STB %g0, [%o0 - 1] ASINORMAL, add %o1, 1) -0: -#ifdef __KERNEL__ - wr %g5, %g0, %asi -#endif - retl - RETL - -#ifdef __KERNEL__ - .section .fixup - .align 4 -VISbzerofixup_reto1: - mov %o1, %o0 -VISbzerofixup_ret0: - wr %g5, %g0, %asi - retl - wr %g0, 0, %fprs -VISbzerofixup_ret1: - and %o5, 0x30, %o5 - add %o5, %o1, %o5 - ba,pt %xcc, VISbzerofixup_ret0 - add %o0, %o5, %o0 -VISbzerofixup_ret2: - and %o5, 0x20, %o5 - add %o5, %o1, %o5 - ba,pt %xcc, VISbzerofixup_ret0 - add %o0, %o5, %o0 -VISbzerofixup_zb: - andcc %o1, 7, %o1 - sll %g2, 3, %g2 - add %o1, 256, %o1 - ba,pt %xcc, VISbzerofixup_ret0 - sub %o1, %g2, %o0 -#endif diff --git a/arch/sparc64/lib/VISmemset.S b/arch/sparc64/lib/VISmemset.S deleted file mode 100644 index 152723a490141..0000000000000 --- a/arch/sparc64/lib/VISmemset.S +++ /dev/null @@ -1,240 +0,0 @@ -/* $Id: VISmemset.S,v 1.10 1999/12/23 17:02:16 jj Exp $ - * VISmemset.S: High speed memset operations utilizing the UltraSparc - * Visual Instruction Set. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jakub@redhat.com) - */ - -#include "VIS.h" - -#ifdef REGS_64BIT -#define SET_BLOCKS(base, offset, source) \ - stx source, [base - offset - 0x18]; \ - stx source, [base - offset - 0x10]; \ - stx source, [base - offset - 0x08]; \ - stx source, [base - offset - 0x00]; -#else -#define SET_BLOCKS(base, offset, source) \ - stw source, [base - offset - 0x18]; \ - stw source, [base - offset - 0x14]; \ - stw source, [base - offset - 0x10]; \ - stw source, [base - offset - 0x0c]; \ - stw source, [base - offset - 0x08]; \ - stw source, [base - offset - 0x04]; \ - stw source, [base - offset - 0x00]; \ - stw source, [base - offset + 0x04]; -#endif - -#ifndef __KERNEL__ -/* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */ -#include "VISbzero.S" -#endif - -#ifdef __KERNEL__ -#include <asm/visasm.h> -#endif - - /* Well, memset is a lot easier to get right than bcopy... */ - .text - .align 32 -#ifdef __KERNEL__ - .globl __memset -__memset: -#endif - .globl memset -memset: -#ifndef __KERNEL__ - brz,a,pt %o1, bzero_private - mov %o2, %o1 -#ifndef REGS_64BIT - srl %o2, 0, %o2 -#endif -#endif - mov %o0, %o4 - cmp %o2, 7 - bleu,pn %xcc, 17f - andcc %o0, 3, %g5 - be,pt %xcc, 4f - and %o1, 0xff, %o1 - cmp %g5, 3 - be,pn %xcc, 2f - stb %o1, [%o0 + 0x00] - cmp %g5, 2 - be,pt %xcc, 2f - stb %o1, [%o0 + 0x01] - stb %o1, [%o0 + 0x02] -2: sub %g5, 4, %g5 - sub %o0, %g5, %o0 - add %o2, %g5, %o2 -4: sllx %o1, 8, %g1 - andcc %o0, 4, %g0 - or %o1, %g1, %o1 - sllx %o1, 16, %g1 - or %o1, %g1, %o1 - be,pt %xcc, 2f -#ifdef REGS_64BIT - sllx %o1, 32, %g1 -#else - cmp %o2, 128 -#endif - stw %o1, [%o0] - sub %o2, 4, %o2 - add %o0, 4, %o0 -2: -#ifdef REGS_64BIT - cmp %o2, 128 - or %o1, %g1, %o1 -#endif - blu,pn %xcc, 9f - andcc %o0, 0x38, %g5 - be,pn %icc, 6f - mov 64, %o5 - andcc %o0, 8, %g0 - be,pn %icc, 1f - sub %o5, %g5, %o5 -#ifdef REGS_64BIT - stx %o1, [%o0] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] -#endif - add %o0, 8, %o0 -1: andcc %o5, 16, %g0 - be,pn %icc, 1f - sub %o2, %o5, %o2 -#ifdef REGS_64BIT - stx %o1, [%o0] - stx %o1, [%o0 + 8] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] - stw %o1, [%o0 + 8] - stw %o1, [%o0 + 12] -#endif - add %o0, 16, %o0 -1: andcc %o5, 32, %g0 - be,pn %icc, 7f - andncc %o2, 0x3f, %o3 -#ifdef REGS_64BIT - stx %o1, [%o0] - stx %o1, [%o0 + 8] - stx %o1, [%o0 + 16] - stx %o1, [%o0 + 24] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] - stw %o1, [%o0 + 8] - stw %o1, [%o0 + 12] - stw %o1, [%o0 + 16] - stw %o1, [%o0 + 20] - stw %o1, [%o0 + 24] - stw %o1, [%o0 + 28] -#endif - add %o0, 32, %o0 -7: be,pn %xcc, 9f - nop -#ifdef __KERNEL__ - VISEntryHalf -#endif - ldd [%o0 - 8], %f0 -18: rd %asi, %g2 - wr %g0, ASI_BLK_P, %asi - membar #StoreStore | #LoadStore - andcc %o3, 0xc0, %g5 - and %o2, 0x3f, %o2 - fmovd %f0, %f2 - fmovd %f0, %f4 - andn %o3, 0xff, %o3 - fmovd %f0, %f6 - cmp %g5, 64 - fmovd %f0, %f8 - fmovd %f0, %f10 - fmovd %f0, %f12 - brz,pn %g5, 10f - fmovd %f0, %f14 - be,pn %icc, 2f - stda %f0, [%o0 + 0x00] %asi - cmp %g5, 128 - be,pn %icc, 2f - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi -2: brz,pn %o3, 12f - add %o0, %g5, %o0 -10: stda %f0, [%o0 + 0x00] %asi - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi - stda %f0, [%o0 + 0xc0] %asi -11: subcc %o3, 256, %o3 - bne,pt %xcc, 10b - add %o0, 256, %o0 -12: -#ifdef __KERNEL__ - wr %g2, %g0, %asi - VISExitHalf -#else -#ifndef REGS_64BIT - wr %g0, FPRS_FEF, %fprs -#endif -#endif - membar #StoreLoad | #StoreStore -9: andcc %o2, 0x78, %g5 - be,pn %xcc, 13f - andcc %o2, 7, %o2 -#ifdef __KERNEL__ -14: srl %g5, 1, %o3 - sethi %hi(13f), %g3 - sub %g3, %o3, %g3 - jmpl %g3 + %lo(13f), %g0 - add %o0, %g5, %o0 -#else -14: rd %pc, %g3 -#ifdef REGS_64BIT - srl %g5, 1, %o3 - sub %g3, %o3, %g3 -#else - sub %g3, %g5, %g3 -#endif - jmpl %g3 + (13f - 14b), %g0 - add %o0, %g5, %o0 -#endif -12: SET_BLOCKS(%o0, 0x68, %o1) - SET_BLOCKS(%o0, 0x48, %o1) - SET_BLOCKS(%o0, 0x28, %o1) - SET_BLOCKS(%o0, 0x08, %o1) -13: be,pn %xcc, 8f - andcc %o2, 4, %g0 - be,pn %xcc, 1f - andcc %o2, 2, %g0 - stw %o1, [%o0] - add %o0, 4, %o0 -1: be,pn %xcc, 1f - andcc %o2, 1, %g0 - sth %o1, [%o0] - add %o0, 2, %o0 -1: bne,a,pn %xcc, 8f - stb %o1, [%o0] -8: retl - mov %o4, %o0 -17: brz,pn %o2, 0f -8: add %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %xcc, 8b - stb %o1, [%o0 - 1] -0: retl - mov %o4, %o0 -6: -#ifdef REGS_64BIT - stx %o1, [%o0] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] -#endif - andncc %o2, 0x3f, %o3 - be,pn %xcc, 9b - nop -#ifdef __KERNEL__ - VISEntryHalf -#endif - ba,pt %xcc, 18b - ldd [%o0], %f0 diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index 41be4131f8008..e528b8d1a3e69 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S @@ -29,10 +29,10 @@ .globl atomic_add .type atomic_add,#function atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ -1: lduw [%o1], %g5 - add %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + add %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b nop retl @@ -42,10 +42,10 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ .globl atomic_sub .type atomic_sub,#function atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ -1: lduw [%o1], %g5 - sub %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + sub %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b nop retl @@ -56,10 +56,10 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ .type atomic_add_ret,#function atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: lduw [%o1], %g5 - add %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + add %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b add %g7, %o0, %g7 ATOMIC_POST_BARRIER @@ -71,10 +71,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ .type atomic_sub_ret,#function atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: lduw [%o1], %g5 - sub %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + sub %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b sub %g7, %o0, %g7 ATOMIC_POST_BARRIER @@ -85,10 +85,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ .globl atomic64_add .type atomic64_add,#function atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ -1: ldx [%o1], %g5 - add %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + add %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b nop retl @@ -98,10 +98,10 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ .globl atomic64_sub .type atomic64_sub,#function atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ -1: ldx [%o1], %g5 - sub %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + sub %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b nop retl @@ -112,10 +112,10 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ .type atomic64_add_ret,#function atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: ldx [%o1], %g5 - add %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + add %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b add %g7, %o0, %g7 ATOMIC_POST_BARRIER @@ -127,10 +127,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ .type atomic64_sub_ret,#function atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: ldx [%o1], %g5 - sub %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + sub %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b sub %g7, %o0, %g7 ATOMIC_POST_BARRIER diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S index fd20171ecfd10..886dcd2b376a0 100644 --- a/arch/sparc64/lib/bitops.S +++ b/arch/sparc64/lib/bitops.S @@ -26,17 +26,17 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ BITOP_PRE_BARRIER srlx %o0, 6, %g1 - mov 1, %g5 + mov 1, %o2 sllx %g1, 3, %g3 and %o0, 63, %g2 - sllx %g5, %g2, %g5 + sllx %o2, %g2, %o2 add %o1, %g3, %o1 1: ldx [%o1], %g7 - or %g7, %g5, %g1 + or %g7, %o2, %g1 casx [%o1], %g7, %g1 cmp %g7, %g1 bne,pn %xcc, 1b - and %g7, %g5, %g2 + and %g7, %o2, %g2 BITOP_POST_BARRIER clr %o0 retl @@ -48,17 +48,17 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ test_and_clear_bit: /* %o0=nr, %o1=addr */ BITOP_PRE_BARRIER srlx %o0, 6, %g1 - mov 1, %g5 + mov 1, %o2 sllx %g1, 3, %g3 and %o0, 63, %g2 - sllx %g5, %g2, %g5 + sllx %o2, %g2, %o2 add %o1, %g3, %o1 1: ldx [%o1], %g7 - andn %g7, %g5, %g1 + andn %g7, %o2, %g1 casx [%o1], %g7, %g1 cmp %g7, %g1 bne,pn %xcc, 1b - and %g7, %g5, %g2 + and %g7, %o2, %g2 BITOP_POST_BARRIER clr %o0 retl @@ -70,17 +70,17 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ test_and_change_bit: /* %o0=nr, %o1=addr */ BITOP_PRE_BARRIER srlx %o0, 6, %g1 - mov 1, %g5 + mov 1, %o2 sllx %g1, 3, %g3 and %o0, 63, %g2 - sllx %g5, %g2, %g5 + sllx %o2, %g2, %o2 add %o1, %g3, %o1 1: ldx [%o1], %g7 - xor %g7, %g5, %g1 + xor %g7, %o2, %g1 casx [%o1], %g7, %g1 cmp %g7, %g1 bne,pn %xcc, 1b - and %g7, %g5, %g2 + and %g7, %o2, %g2 BITOP_POST_BARRIER clr %o0 retl @@ -91,13 +91,13 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ .type set_bit,#function set_bit: /* %o0=nr, %o1=addr */ srlx %o0, 6, %g1 - mov 1, %g5 + mov 1, %o2 sllx %g1, 3, %g3 and %o0, 63, %g2 - sllx %g5, %g2, %g5 + sllx %o2, %g2, %o2 add %o1, %g3, %o1 1: ldx [%o1], %g7 - or %g7, %g5, %g1 + or %g7, %o2, %g1 casx [%o1], %g7, %g1 cmp %g7, %g1 bne,pn %xcc, 1b @@ -110,13 +110,13 @@ set_bit: /* %o0=nr, %o1=addr */ .type clear_bit,#function clear_bit: /* %o0=nr, %o1=addr */ srlx %o0, 6, %g1 - mov 1, %g5 + mov 1, %o2 sllx %g1, 3, %g3 and %o0, 63, %g2 - sllx %g5, %g2, %g5 + sllx %o2, %g2, %o2 add %o1, %g3, %o1 1: ldx [%o1], %g7 - andn %g7, %g5, %g1 + andn %g7, %o2, %g1 casx [%o1], %g7, %g1 cmp %g7, %g1 bne,pn %xcc, 1b @@ -129,13 +129,13 @@ clear_bit: /* %o0=nr, %o1=addr */ .type change_bit,#function change_bit: /* %o0=nr, %o1=addr */ srlx %o0, 6, %g1 - mov 1, %g5 + mov 1, %o2 sllx %g1, 3, %g3 and %o0, 63, %g2 - sllx %g5, %g2, %g5 + sllx %o2, %g2, %o2 add %o1, %g3, %o1 1: ldx [%o1], %g7 - xor %g7, %g5, %g1 + xor %g7, %o2, %g1 casx [%o1], %g7, %g1 cmp %g7, %g1 bne,pn %xcc, 1b diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S new file mode 100644 index 0000000000000..21a933ffb7c29 --- /dev/null +++ b/arch/sparc64/lib/bzero.S @@ -0,0 +1,158 @@ +/* bzero.S: Simple prefetching memset, bzero, and clear_user + * implementations. + * + * Copyright (C) 2005 David S. Miller <davem@davemloft.net> + */ + + .text + + .globl __memset + .type __memset, #function +__memset: /* %o0=buf, %o1=pat, %o2=len */ + + .globl memset + .type memset, #function +memset: /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %xcc, 1f + or %g1, %o2, %o2 + + .globl __bzero + .type __bzero, #function +__bzero: /* %o0=buf, %o1=len */ + clr %o2 +1: mov %o0, %o3 + brz,pn %o1, __bzero_done + cmp %o1, 16 + bl,pn %icc, __bzero_tiny + prefetch [%o0 + 0x000], #n_writes + andcc %o0, 0x3, %g0 + be,pt %icc, 2f +1: stb %o2, [%o0 + 0x00] + add %o0, 1, %o0 + andcc %o0, 0x3, %g0 + bne,pn %icc, 1b + sub %o1, 1, %o1 +2: andcc %o0, 0x7, %g0 + be,pt %icc, 3f + stw %o2, [%o0 + 0x00] + sub %o1, 4, %o1 + add %o0, 4, %o0 +3: and %o1, 0x38, %g1 + cmp %o1, 0x40 + andn %o1, 0x3f, %o4 + bl,pn %icc, 5f + and %o1, 0x7, %o1 + prefetch [%o0 + 0x040], #n_writes + prefetch [%o0 + 0x080], #n_writes + prefetch [%o0 + 0x0c0], #n_writes + prefetch [%o0 + 0x100], #n_writes + prefetch [%o0 + 0x140], #n_writes +4: prefetch [%o0 + 0x180], #n_writes + stx %o2, [%o0 + 0x00] + stx %o2, [%o0 + 0x08] + stx %o2, [%o0 + 0x10] + stx %o2, [%o0 + 0x18] + stx %o2, [%o0 + 0x20] + stx %o2, [%o0 + 0x28] + stx %o2, [%o0 + 0x30] + stx %o2, [%o0 + 0x38] + subcc %o4, 0x40, %o4 + bne,pt %icc, 4b + add %o0, 0x40, %o0 + brz,pn %g1, 6f + nop +5: stx %o2, [%o0 + 0x00] + subcc %g1, 8, %g1 + bne,pt %icc, 5b + add %o0, 0x8, %o0 +6: brz,pt %o1, __bzero_done + nop +__bzero_tiny: +1: stb %o2, [%o0 + 0x00] + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 +__bzero_done: + retl + mov %o3, %o0 + .size __bzero, .-__bzero + .size __memset, .-__memset + .size memset, .-memset + +#define EX_ST(x,y) \ +98: x,y; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov %o1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + + .globl __bzero_noasi + .type __bzero_noasi, #function +__bzero_noasi: /* %o0=buf, %o1=len */ + brz,pn %o1, __bzero_noasi_done + cmp %o1, 16 + bl,pn %icc, __bzero_noasi_tiny + EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) + andcc %o0, 0x3, %g0 + be,pt %icc, 2f +1: EX_ST(stba %g0, [%o0 + 0x00] %asi) + add %o0, 1, %o0 + andcc %o0, 0x3, %g0 + bne,pn %icc, 1b + sub %o1, 1, %o1 +2: andcc %o0, 0x7, %g0 + be,pt %icc, 3f + EX_ST(stwa %g0, [%o0 + 0x00] %asi) + sub %o1, 4, %o1 + add %o0, 4, %o0 +3: and %o1, 0x38, %g1 + cmp %o1, 0x40 + andn %o1, 0x3f, %o4 + bl,pn %icc, 5f + and %o1, 0x7, %o1 + EX_ST(prefetcha [%o0 + 0x040] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x080] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x0c0] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x100] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x140] %asi, #n_writes) +4: EX_ST(prefetcha [%o0 + 0x180] %asi, #n_writes) + EX_ST(stxa %g0, [%o0 + 0x00] %asi) + EX_ST(stxa %g0, [%o0 + 0x08] %asi) + EX_ST(stxa %g0, [%o0 + 0x10] %asi) + EX_ST(stxa %g0, [%o0 + 0x18] %asi) + EX_ST(stxa %g0, [%o0 + 0x20] %asi) + EX_ST(stxa %g0, [%o0 + 0x28] %asi) + EX_ST(stxa %g0, [%o0 + 0x30] %asi) + EX_ST(stxa %g0, [%o0 + 0x38] %asi) + subcc %o4, 0x40, %o4 + bne,pt %icc, 4b + add %o0, 0x40, %o0 + brz,pn %g1, 6f + nop +5: EX_ST(stxa %g0, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %icc, 5b + add %o0, 0x8, %o0 +6: brz,pt %o1, __bzero_noasi_done + nop +__bzero_noasi_tiny: +1: EX_ST(stba %g0, [%o0 + 0x00] %asi) + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 +__bzero_noasi_done: + retl + clr %o0 + .size __bzero_noasi, .-__bzero_noasi diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index 46e5ebfb4b7ce..c421e0c653253 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c @@ -138,15 +138,15 @@ wlock_again: } /* Try once to increment the counter. */ __asm__ __volatile__( -" ldx [%0], %%g5\n" -" brlz,a,pn %%g5, 2f\n" +" ldx [%0], %%g1\n" +" brlz,a,pn %%g1, 2f\n" " mov 1, %0\n" -" add %%g5, 1, %%g7\n" -" casx [%0], %%g5, %%g7\n" -" sub %%g5, %%g7, %0\n" +" add %%g1, 1, %%g7\n" +" casx [%0], %%g1, %%g7\n" +" sub %%g1, %%g7, %0\n" "2:" : "=r" (val) : "0" (&(rw->lock)) - : "g5", "g7", "memory"); + : "g1", "g7", "memory"); membar("#StoreLoad | #StoreStore"); if (val) goto wlock_again; @@ -173,14 +173,14 @@ runlock_again: /* Spin trying to decrement the counter using casx. */ __asm__ __volatile__( " membar #StoreLoad | #LoadLoad\n" -" ldx [%0], %%g5\n" -" sub %%g5, 1, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" sub %%g1, 1, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" : "=r" (val) : "0" (&(rw->lock)) - : "g5", "g7", "memory"); + : "g1", "g7", "memory"); if (val) { if (!--stuck) { if (shown++ <= 2) @@ -216,17 +216,17 @@ wlock_again: __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -" ldx [%0], %%g5\n" -" brlz,pn %%g5, 1f\n" -" or %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" brlz,pn %%g1, 1f\n" +" or %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" " ba,pt %%xcc, 2f\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" "1: mov 1, %0\n" "2:" : "=r" (val) : "0" (&(rw->lock)) - : "g3", "g5", "g7", "memory"); + : "g3", "g1", "g7", "memory"); if (val) { /* We couldn't get the write bit. */ if (!--stuck) { @@ -248,15 +248,15 @@ wlock_again: __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -"1: ldx [%0], %%g5\n" -" andn %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: ldx [%0], %%g1\n" +" andn %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%xcc, 1b\n" " membar #StoreLoad | #StoreStore" : /* no outputs */ : "r" (&(rw->lock)) - : "g3", "g5", "g7", "cc", "memory"); + : "g3", "g1", "g7", "cc", "memory"); while(rw->lock != 0) { if (!--stuck) { if (shown++ <= 2) @@ -294,14 +294,14 @@ wlock_again: " membar #StoreLoad | #LoadLoad\n" " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -" ldx [%0], %%g5\n" -" andn %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" andn %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" : "=r" (val) : "0" (&(rw->lock)) - : "g3", "g5", "g7", "memory"); + : "g3", "g1", "g7", "memory"); if (val) { if (!--stuck) { if (shown++ <= 2) @@ -323,17 +323,17 @@ int _do_write_trylock (rwlock_t *rw, char *str) __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -" ldx [%0], %%g5\n" -" brlz,pn %%g5, 1f\n" -" or %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" brlz,pn %%g1, 1f\n" +" or %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" " ba,pt %%xcc, 2f\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" "1: mov 1, %0\n" "2:" : "=r" (val) : "0" (&(rw->lock)) - : "g3", "g5", "g7", "memory"); + : "g3", "g1", "g7", "memory"); if (val) { put_cpu(); @@ -347,15 +347,15 @@ int _do_write_trylock (rwlock_t *rw, char *str) __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -"1: ldx [%0], %%g5\n" -" andn %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: ldx [%0], %%g1\n" +" andn %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%xcc, 1b\n" " membar #StoreLoad | #StoreStore" : /* no outputs */ : "r" (&(rw->lock)) - : "g3", "g5", "g7", "cc", "memory"); + : "g3", "g1", "g7", "cc", "memory"); put_cpu(); diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index e86906744cf6f..7e6fdaebedbab 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S @@ -27,12 +27,12 @@ .globl _atomic_dec_and_lock _atomic_dec_and_lock: /* %o0 = counter, %o1 = lock */ -loop1: lduw [%o0], %g5 - subcc %g5, 1, %g7 +loop1: lduw [%o0], %g2 + subcc %g2, 1, %g7 be,pn %icc, start_to_zero nop -nzero: cas [%o0], %g5, %g7 - cmp %g5, %g7 +nzero: cas [%o0], %g2, %g7 + cmp %g2, %g7 bne,pn %icc, loop1 mov 0, %g1 @@ -50,13 +50,13 @@ to_zero: ldstub [%o1], %g3 brnz,pn %g3, spin_on_lock membar #StoreLoad | #StoreStore -loop2: cas [%o0], %g5, %g7 /* ASSERT(g7 == 0) */ - cmp %g5, %g7 +loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ + cmp %g2, %g7 be,pt %icc, out mov 1, %g1 - lduw [%o0], %g5 - subcc %g5, 1, %g7 + lduw [%o0], %g2 + subcc %g2, 1, %g7 be,pn %icc, loop2 nop membar #StoreStore | #LoadStore diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S index 4e8c7928c49f1..2ef2e268bdcfd 100644 --- a/arch/sparc64/lib/mcount.S +++ b/arch/sparc64/lib/mcount.S @@ -38,22 +38,22 @@ _mcount: * Check whether %sp is dangerously low. */ ldub [%g6 + TI_FPDEPTH], %g1 - srl %g1, 1, %g5 - add %g5, 1, %g5 - sllx %g5, 8, %g5 ! each fpregs frame is 256b - add %g5, 192, %g5 - add %g6, %g5, %g5 ! where does task_struct+frame end? - sub %g5, STACK_BIAS, %g5 - cmp %sp, %g5 + srl %g1, 1, %g3 + add %g3, 1, %g3 + sllx %g3, 8, %g3 ! each fpregs frame is 256b + add %g3, 192, %g3 + add %g6, %g3, %g3 ! where does task_struct+frame end? + sub %g3, STACK_BIAS, %g3 + cmp %sp, %g3 bg,pt %xcc, 1f - sethi %hi(panicstring), %g5 + sethi %hi(panicstring), %g3 sethi %hi(ovstack), %g7 ! cant move to panic stack fast enough or %g7, %lo(ovstack), %g7 add %g7, OVSTACKSIZE, %g7 sub %g7, STACK_BIAS, %g7 mov %g7, %sp call prom_printf - or %g5, %lo(panicstring), %o0 + or %g3, %lo(panicstring), %o0 call prom_halt nop #endif diff --git a/arch/sparc64/lib/memcmp.S b/arch/sparc64/lib/memcmp.S index d34dc3d874dae..c90ad96c51b9c 100644 --- a/arch/sparc64/lib/memcmp.S +++ b/arch/sparc64/lib/memcmp.S @@ -13,12 +13,12 @@ memcmp: cmp %o2, 0 ! IEU1 Group loop: be,pn %icc, ret_0 ! CTI nop ! IEU0 - ldub [%o0], %g5 ! LSU Group + ldub [%o0], %g7 ! LSU Group ldub [%o1], %g3 ! LSU Group sub %o2, 1, %o2 ! IEU0 add %o0, 1, %o0 ! IEU1 add %o1, 1, %o1 ! IEU0 Group - subcc %g5, %g3, %g3 ! IEU1 Group + subcc %g7, %g3, %g3 ! IEU1 Group be,pt %icc, loop ! CTI cmp %o2, 0 ! IEU1 Group diff --git a/arch/sparc64/lib/memmove.S b/arch/sparc64/lib/memmove.S index 1c1ebbbdf830e..97395802c23c4 100644 --- a/arch/sparc64/lib/memmove.S +++ b/arch/sparc64/lib/memmove.S @@ -12,17 +12,17 @@ memmove: /* o0=dst o1=src o2=len */ mov %o0, %g1 cmp %o0, %o1 bleu,pt %xcc, memcpy - add %o1, %o2, %g5 - cmp %g5, %o0 + add %o1, %o2, %g7 + cmp %g7, %o0 bleu,pt %xcc, memcpy add %o0, %o2, %o5 - sub %g5, 1, %o1 + sub %g7, 1, %o1 sub %o5, 1, %o0 -1: ldub [%o1], %g5 +1: ldub [%o1], %g7 subcc %o2, 1, %o2 sub %o1, 1, %o1 - stb %g5, [%o0] + stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 diff --git a/arch/sparc64/lib/memscan.S b/arch/sparc64/lib/memscan.S index a34c6b9d21e85..5e72d49114179 100644 --- a/arch/sparc64/lib/memscan.S +++ b/arch/sparc64/lib/memscan.S @@ -52,43 +52,43 @@ check_bytes: andcc %o5, 0xff, %g0 add %o0, -5, %g2 ba,pt %xcc, 3f - srlx %o5, 32, %g5 + srlx %o5, 32, %g7 -2: srlx %o5, 8, %g5 +2: srlx %o5, 8, %g7 be,pn %icc, 1f add %o0, -8, %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 + andcc %g7, 0xff, %g0 - srlx %g5, 8, %g5 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 andcc %g3, %o3, %g0 be,a,pn %icc, 2f mov %o0, %g2 -3: andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 +3: andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S new file mode 100644 index 0000000000000..174ff7b9164c5 --- /dev/null +++ b/arch/sparc64/lib/rwsem.S @@ -0,0 +1,165 @@ +/* rwsem.S: RW semaphore assembler. + * + * Written by David S. Miller (davem@redhat.com), 2001. + * Derived from asm-i386/rwsem.h + */ + +#include <asm/rwsem-const.h> + + .section .sched.text + + .globl __down_read +__down_read: +1: lduw [%o0], %g1 + add %g1, 1, %g7 + cas [%o0], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 1b + add %g7, 1, %g7 + cmp %g7, 0 + bl,pn %icc, 3f + membar #StoreLoad | #StoreStore +2: + retl + nop +3: + save %sp, -192, %sp + call rwsem_down_read_failed + mov %i0, %o0 + ret + restore + .size __down_read, .-__down_read + + .globl __down_read_trylock +__down_read_trylock: +1: lduw [%o0], %g1 + add %g1, 1, %g7 + cmp %g7, 0 + bl,pn %icc, 2f + mov 0, %o1 + cas [%o0], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 1b + mov 1, %o1 + membar #StoreLoad | #StoreStore +2: retl + mov %o1, %o0 + .size __down_read_trylock, .-__down_read_trylock + + .globl __down_write +__down_write: + sethi %hi(RWSEM_ACTIVE_WRITE_BIAS), %g1 + or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 +1: + lduw [%o0], %g3 + add %g3, %g1, %g7 + cas [%o0], %g3, %g7 + cmp %g3, %g7 + bne,pn %icc, 1b + cmp %g7, 0 + bne,pn %icc, 3f + membar #StoreLoad | #StoreStore +2: retl + nop +3: + save %sp, -192, %sp + call rwsem_down_write_failed + mov %i0, %o0 + ret + restore + .size __down_write, .-__down_write + + .globl __down_write_trylock +__down_write_trylock: + sethi %hi(RWSEM_ACTIVE_WRITE_BIAS), %g1 + or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 +1: + lduw [%o0], %g3 + cmp %g3, 0 + bne,pn %icc, 2f + mov 0, %o1 + add %g3, %g1, %g7 + cas [%o0], %g3, %g7 + cmp %g3, %g7 + bne,pn %icc, 1b + mov 1, %o1 + membar #StoreLoad | #StoreStore +2: retl + mov %o1, %o0 + .size __down_write_trylock, .-__down_write_trylock + + .globl __up_read +__up_read: +1: + lduw [%o0], %g1 + sub %g1, 1, %g7 + cas [%o0], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 1b + cmp %g7, 0 + bl,pn %icc, 3f + membar #StoreLoad | #StoreStore +2: retl + nop +3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 + sub %g7, 1, %g7 + or %g1, %lo(RWSEM_ACTIVE_MASK), %g1 + andcc %g7, %g1, %g0 + bne,pn %icc, 2b + nop + save %sp, -192, %sp + call rwsem_wake + mov %i0, %o0 + ret + restore + .size __up_read, .-__up_read + + .globl __up_write +__up_write: + sethi %hi(RWSEM_ACTIVE_WRITE_BIAS), %g1 + or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 +1: + lduw [%o0], %g3 + sub %g3, %g1, %g7 + cas [%o0], %g3, %g7 + cmp %g3, %g7 + bne,pn %icc, 1b + sub %g7, %g1, %g7 + cmp %g7, 0 + bl,pn %icc, 3f + membar #StoreLoad | #StoreStore +2: + retl + nop +3: + save %sp, -192, %sp + call rwsem_wake + mov %i0, %o0 + ret + restore + .size __up_write, .-__up_write + + .globl __downgrade_write +__downgrade_write: + sethi %hi(RWSEM_WAITING_BIAS), %g1 + or %g1, %lo(RWSEM_WAITING_BIAS), %g1 +1: + lduw [%o0], %g3 + sub %g3, %g1, %g7 + cas [%o0], %g3, %g7 + cmp %g3, %g7 + bne,pn %icc, 1b + sub %g7, %g1, %g7 + cmp %g7, 0 + bl,pn %icc, 3f + membar #StoreLoad | #StoreStore +2: + retl + nop +3: + save %sp, -192, %sp + call rwsem_downgrade_wake + mov %i0, %o0 + ret + restore + .size __downgrade_write, .-__downgrade_write diff --git a/arch/sparc64/lib/rwsem.c b/arch/sparc64/lib/rwsem.c deleted file mode 100644 index e19968dbc2d15..0000000000000 --- a/arch/sparc64/lib/rwsem.c +++ /dev/null @@ -1,239 +0,0 @@ -/* rwsem.c: Don't inline expand these suckers all over the place. - * - * Written by David S. Miller (davem@redhat.com), 2001. - * Derived from asm-i386/rwsem.h - */ - -#include <linux/kernel.h> -#include <linux/rwsem.h> -#include <linux/init.h> -#include <linux/module.h> - -extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem)); -extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem)); -extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *)); -extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *)); - -void __sched __down_read(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "! beginning __down_read\n" - "1:\tlduw [%0], %%g5\n\t" - "add %%g5, 1, %%g7\n\t" - "cas [%0], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " add %%g7, 1, %%g7\n\t" - "cmp %%g7, 0\n\t" - "bl,pn %%icc, 3f\n\t" - " membar #StoreLoad | #StoreStore\n" - "2:\n\t" - ".subsection 2\n" - "3:\tmov %0, %%g5\n\t" - "save %%sp, -160, %%sp\n\t" - "mov %%g1, %%l1\n\t" - "mov %%g2, %%l2\n\t" - "mov %%g3, %%l3\n\t" - "call %1\n\t" - " mov %%g5, %%o0\n\t" - "mov %%l1, %%g1\n\t" - "mov %%l2, %%g2\n\t" - "ba,pt %%xcc, 2b\n\t" - " restore %%l3, %%g0, %%g3\n\t" - ".previous\n\t" - "! ending __down_read" - : : "r" (sem), "i" (rwsem_down_read_failed) - : "g5", "g7", "memory", "cc"); -} -EXPORT_SYMBOL(__down_read); - -int __down_read_trylock(struct rw_semaphore *sem) -{ - int result; - - __asm__ __volatile__( - "! beginning __down_read_trylock\n" - "1:\tlduw [%1], %%g5\n\t" - "add %%g5, 1, %%g7\n\t" - "cmp %%g7, 0\n\t" - "bl,pn %%icc, 2f\n\t" - " mov 0, %0\n\t" - "cas [%1], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " mov 1, %0\n\t" - "membar #StoreLoad | #StoreStore\n" - "2:\n\t" - "! ending __down_read_trylock" - : "=&r" (result) - : "r" (sem) - : "g5", "g7", "memory", "cc"); - - return result; -} -EXPORT_SYMBOL(__down_read_trylock); - -void __sched __down_write(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "! beginning __down_write\n\t" - "sethi %%hi(%2), %%g1\n\t" - "or %%g1, %%lo(%2), %%g1\n" - "1:\tlduw [%0], %%g5\n\t" - "add %%g5, %%g1, %%g7\n\t" - "cas [%0], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " cmp %%g7, 0\n\t" - "bne,pn %%icc, 3f\n\t" - " membar #StoreLoad | #StoreStore\n" - "2:\n\t" - ".subsection 2\n" - "3:\tmov %0, %%g5\n\t" - "save %%sp, -160, %%sp\n\t" - "mov %%g2, %%l2\n\t" - "mov %%g3, %%l3\n\t" - "call %1\n\t" - " mov %%g5, %%o0\n\t" - "mov %%l2, %%g2\n\t" - "ba,pt %%xcc, 2b\n\t" - " restore %%l3, %%g0, %%g3\n\t" - ".previous\n\t" - "! ending __down_write" - : : "r" (sem), "i" (rwsem_down_write_failed), - "i" (RWSEM_ACTIVE_WRITE_BIAS) - : "g1", "g5", "g7", "memory", "cc"); -} -EXPORT_SYMBOL(__down_write); - -int __down_write_trylock(struct rw_semaphore *sem) -{ - int result; - - __asm__ __volatile__( - "! beginning __down_write_trylock\n\t" - "sethi %%hi(%2), %%g1\n\t" - "or %%g1, %%lo(%2), %%g1\n" - "1:\tlduw [%1], %%g5\n\t" - "cmp %%g5, 0\n\t" - "bne,pn %%icc, 2f\n\t" - " mov 0, %0\n\t" - "add %%g5, %%g1, %%g7\n\t" - "cas [%1], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " mov 1, %0\n\t" - "membar #StoreLoad | #StoreStore\n" - "2:\n\t" - "! ending __down_write_trylock" - : "=&r" (result) - : "r" (sem), "i" (RWSEM_ACTIVE_WRITE_BIAS) - : "g1", "g5", "g7", "memory", "cc"); - - return result; -} -EXPORT_SYMBOL(__down_write_trylock); - -void __up_read(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "! beginning __up_read\n\t" - "1:\tlduw [%0], %%g5\n\t" - "sub %%g5, 1, %%g7\n\t" - "cas [%0], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " cmp %%g7, 0\n\t" - "bl,pn %%icc, 3f\n\t" - " membar #StoreLoad | #StoreStore\n" - "2:\n\t" - ".subsection 2\n" - "3:\tsethi %%hi(%2), %%g1\n\t" - "sub %%g7, 1, %%g7\n\t" - "or %%g1, %%lo(%2), %%g1\n\t" - "andcc %%g7, %%g1, %%g0\n\t" - "bne,pn %%icc, 2b\n\t" - " mov %0, %%g5\n\t" - "save %%sp, -160, %%sp\n\t" - "mov %%g2, %%l2\n\t" - "mov %%g3, %%l3\n\t" - "call %1\n\t" - " mov %%g5, %%o0\n\t" - "mov %%l2, %%g2\n\t" - "ba,pt %%xcc, 2b\n\t" - " restore %%l3, %%g0, %%g3\n\t" - ".previous\n\t" - "! ending __up_read" - : : "r" (sem), "i" (rwsem_wake), - "i" (RWSEM_ACTIVE_MASK) - : "g1", "g5", "g7", "memory", "cc"); -} -EXPORT_SYMBOL(__up_read); - -void __up_write(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "! beginning __up_write\n\t" - "sethi %%hi(%2), %%g1\n\t" - "or %%g1, %%lo(%2), %%g1\n" - "1:\tlduw [%0], %%g5\n\t" - "sub %%g5, %%g1, %%g7\n\t" - "cas [%0], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " sub %%g7, %%g1, %%g7\n\t" - "cmp %%g7, 0\n\t" - "bl,pn %%icc, 3f\n\t" - " membar #StoreLoad | #StoreStore\n" - "2:\n\t" - ".subsection 2\n" - "3:\tmov %0, %%g5\n\t" - "save %%sp, -160, %%sp\n\t" - "mov %%g2, %%l2\n\t" - "mov %%g3, %%l3\n\t" - "call %1\n\t" - " mov %%g5, %%o0\n\t" - "mov %%l2, %%g2\n\t" - "ba,pt %%xcc, 2b\n\t" - " restore %%l3, %%g0, %%g3\n\t" - ".previous\n\t" - "! ending __up_write" - : : "r" (sem), "i" (rwsem_wake), - "i" (RWSEM_ACTIVE_WRITE_BIAS) - : "g1", "g5", "g7", "memory", "cc"); -} -EXPORT_SYMBOL(__up_write); - -void __downgrade_write(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "! beginning __downgrade_write\n\t" - "sethi %%hi(%2), %%g1\n\t" - "or %%g1, %%lo(%2), %%g1\n" - "1:\tlduw [%0], %%g5\n\t" - "sub %%g5, %%g1, %%g7\n\t" - "cas [%0], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" - "bne,pn %%icc, 1b\n\t" - " sub %%g7, %%g1, %%g7\n\t" - "cmp %%g7, 0\n\t" - "bl,pn %%icc, 3f\n\t" - " membar #StoreLoad | #StoreStore\n" - "2:\n\t" - ".subsection 2\n" - "3:\tmov %0, %%g5\n\t" - "save %%sp, -160, %%sp\n\t" - "mov %%g2, %%l2\n\t" - "mov %%g3, %%l3\n\t" - "call %1\n\t" - " mov %%g5, %%o0\n\t" - "mov %%l2, %%g2\n\t" - "ba,pt %%xcc, 2b\n\t" - " restore %%l3, %%g0, %%g3\n\t" - ".previous\n\t" - "! ending __up_write" - : : "r" (sem), "i" (rwsem_downgrade_wake), - "i" (RWSEM_WAITING_BIAS) - : "g1", "g5", "g7", "memory", "cc"); -} -EXPORT_SYMBOL(__downgrade_write); diff --git a/arch/sparc64/lib/strlen.S b/arch/sparc64/lib/strlen.S index 066ec1ed7d0dd..e9ba1920d818e 100644 --- a/arch/sparc64/lib/strlen.S +++ b/arch/sparc64/lib/strlen.S @@ -48,16 +48,16 @@ strlen: add %o0, 4, %o0 /* Check every byte. */ - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 24, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 16, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 8, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 andcc %o5, 0xff, %g0 diff --git a/arch/sparc64/lib/strlen_user.S b/arch/sparc64/lib/strlen_user.S index 4af69a0adfbcc..9ed54ba14fc63 100644 --- a/arch/sparc64/lib/strlen_user.S +++ b/arch/sparc64/lib/strlen_user.S @@ -54,16 +54,16 @@ __strnlen_user: ba,a,pt %xcc, 1f /* Check every byte. */ -82: srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 +82: srl %o5, 24, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o0, -3, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 16, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 8, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 andcc %o5, 0xff, %g0 diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S index f748fd6bbc389..4cd5d2be1ae1f 100644 --- a/arch/sparc64/lib/xor.S +++ b/arch/sparc64/lib/xor.S @@ -248,7 +248,7 @@ xor_vis_4: .globl xor_vis_5 .type xor_vis_5,#function xor_vis_5: - mov %o5, %g5 + save %sp, -192, %sp rd %fprs, %o5 andcc %o5, FPRS_FEF|FPRS_DU, %g0 be,pt %icc, 0f @@ -256,61 +256,60 @@ xor_vis_5: jmpl %g1 + %lo(VISenter), %g7 add %g7, 8, %g7 0: wr %g0, FPRS_FEF, %fprs - mov %g5, %o5 rd %asi, %g1 wr %g0, ASI_BLK_P, %asi membar #LoadStore|#StoreLoad|#StoreStore - sub %o0, 64, %o0 - ldda [%o1] %asi, %f0 - ldda [%o2] %asi, %f16 + sub %i0, 64, %i0 + ldda [%i1] %asi, %f0 + ldda [%i2] %asi, %f16 -5: ldda [%o3] %asi, %f32 +5: ldda [%i3] %asi, %f32 fxor %f0, %f16, %f48 fxor %f2, %f18, %f50 - add %o1, 64, %o1 + add %i1, 64, %i1 fxor %f4, %f20, %f52 fxor %f6, %f22, %f54 - add %o2, 64, %o2 + add %i2, 64, %i2 fxor %f8, %f24, %f56 fxor %f10, %f26, %f58 fxor %f12, %f28, %f60 fxor %f14, %f30, %f62 - ldda [%o4] %asi, %f16 + ldda [%i4] %asi, %f16 fxor %f48, %f32, %f48 fxor %f50, %f34, %f50 fxor %f52, %f36, %f52 fxor %f54, %f38, %f54 - add %o3, 64, %o3 + add %i3, 64, %i3 fxor %f56, %f40, %f56 fxor %f58, %f42, %f58 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - ldda [%o5] %asi, %f32 + ldda [%i5] %asi, %f32 fxor %f48, %f16, %f48 fxor %f50, %f18, %f50 - add %o4, 64, %o4 + add %i4, 64, %i4 fxor %f52, %f20, %f52 fxor %f54, %f22, %f54 - add %o5, 64, %o5 + add %i5, 64, %i5 fxor %f56, %f24, %f56 fxor %f58, %f26, %f58 fxor %f60, %f28, %f60 fxor %f62, %f30, %f62 - ldda [%o1] %asi, %f0 + ldda [%i1] %asi, %f0 fxor %f48, %f32, %f48 fxor %f50, %f34, %f50 fxor %f52, %f36, %f52 fxor %f54, %f38, %f54 fxor %f56, %f40, %f56 fxor %f58, %f42, %f58 - subcc %o0, 64, %o0 + subcc %i0, 64, %i0 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - stda %f48, [%o1 - 64] %asi + stda %f48, [%i1 - 64] %asi bne,pt %xcc, 5b - ldda [%o2] %asi, %f16 + ldda [%i2] %asi, %f16 - ldda [%o3] %asi, %f32 + ldda [%i3] %asi, %f32 fxor %f0, %f16, %f48 fxor %f2, %f18, %f50 fxor %f4, %f20, %f52 @@ -319,7 +318,7 @@ xor_vis_5: fxor %f10, %f26, %f58 fxor %f12, %f28, %f60 fxor %f14, %f30, %f62 - ldda [%o4] %asi, %f16 + ldda [%i4] %asi, %f16 fxor %f48, %f32, %f48 fxor %f50, %f34, %f50 fxor %f52, %f36, %f52 @@ -328,7 +327,7 @@ xor_vis_5: fxor %f58, %f42, %f58 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - ldda [%o5] %asi, %f32 + ldda [%i5] %asi, %f32 fxor %f48, %f16, %f48 fxor %f50, %f18, %f50 fxor %f52, %f20, %f52 @@ -346,9 +345,10 @@ xor_vis_5: fxor %f58, %f42, %f58 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - stda %f48, [%o1] %asi + stda %f48, [%i1] %asi membar #Sync|#StoreStore|#StoreLoad wr %g1, %g0, %asi - retl - wr %g0, 0, %fprs + wr %g0, 0, %fprs + ret + restore .size xor_vis_5, .-xor_vis_5 diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 45edb9459bcdf..3ffee7b51aed5 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -144,7 +144,9 @@ static void unhandled_fault(unsigned long address, struct task_struct *tsk, "at virtual address %016lx\n", (unsigned long)address); } printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n", - (tsk->mm ? tsk->mm->context : tsk->active_mm->context)); + (tsk->mm ? + CTX_HWBITS(tsk->mm->context) : + CTX_HWBITS(tsk->active_mm->context))); printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n", (tsk->mm ? (unsigned long) tsk->mm->pgd : (unsigned long) tsk->active_mm->pgd)); diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index ffa207795f1df..5a1f831b2de1b 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -20,6 +20,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/mmu_context.h> static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { @@ -217,12 +218,50 @@ void unmap_hugepage_range(struct vm_area_struct *vma, flush_tlb_range(vma, start, end); } +static void context_reload(void *__data) +{ + struct mm_struct *mm = __data; + + if (mm == current->mm) + load_secondary_context(mm); +} + int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) { struct mm_struct *mm = current->mm; unsigned long addr; int ret = 0; + /* On UltraSPARC-III+ and later, configure the second half of + * the Data-TLB for huge pages. + */ + if (tlb_type == cheetah_plus) { + unsigned long ctx; + + spin_lock(&ctx_alloc_lock); + ctx = mm->context.sparc64_ctx_val; + ctx &= ~CTX_PGSZ_MASK; + ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; + ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; + + if (ctx != mm->context.sparc64_ctx_val) { + /* When changing the page size fields, we + * must perform a context flush so that no + * stale entries match. This flush must + * occur with the original context register + * settings. + */ + do_flush_tlb_mm(mm); + + /* Reload the context register of all processors + * also executing in this address space. + */ + mm->context.sparc64_ctx_val = ctx; + on_each_cpu(context_reload, mm, 0, 0); + } + spin_unlock(&ctx_alloc_lock); + } + BUG_ON(vma->vm_start & ~HPAGE_MASK); BUG_ON(vma->vm_end & ~HPAGE_MASK); diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 0e62b62c7dd44..89022ccaa75bb 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -61,7 +61,7 @@ static unsigned long bootmap_base; /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; -#define CTX_BMAP_SLOTS (1UL << (CTX_VERSION_SHIFT - 6)) +#define CTX_BMAP_SLOTS (1UL << (CTX_NR_BITS - 6)) unsigned long mmu_context_bmap[CTX_BMAP_SLOTS]; /* References to special section boundaries */ @@ -85,40 +85,14 @@ void check_pgt_cache(void) preempt_disable(); if (pgtable_cache_size > PGT_CACHE_HIGH) { do { -#ifdef CONFIG_SMP if (pgd_quicklist) free_pgd_slow(get_pgd_fast()); -#endif if (pte_quicklist[0]) free_pte_slow(pte_alloc_one_fast(NULL, 0)); if (pte_quicklist[1]) free_pte_slow(pte_alloc_one_fast(NULL, 1 << (PAGE_SHIFT + 10))); } while (pgtable_cache_size > PGT_CACHE_LOW); } -#ifndef CONFIG_SMP - if (pgd_cache_size > PGT_CACHE_HIGH / 4) { - struct page *page, *page2; - for (page2 = NULL, page = (struct page *)pgd_quicklist; page;) { - if ((unsigned long)page->lru.prev == 3) { - if (page2) - page2->lru.next = page->lru.next; - else - pgd_quicklist = (void *) page->lru.next; - pgd_cache_size -= 2; - __free_page(page); - if (page2) - page = (struct page *)page2->lru.next; - else - page = (struct page *)pgd_quicklist; - if (pgd_cache_size <= PGT_CACHE_LOW / 4) - break; - continue; - } - page2 = page; - page = (struct page *)page->lru.next; - } - } -#endif preempt_enable(); } @@ -135,7 +109,7 @@ __inline__ void flush_dcache_page_impl(struct page *page) atomic_inc(&dcpage_flushes); #endif -#if (L1DCACHE_SIZE > PAGE_SIZE) +#ifdef DCACHE_ALIASING_POSSIBLE __flush_dcache_page(page_address(page), ((tlb_type == spitfire) && page_mapping(page) != NULL)); @@ -158,15 +132,15 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) mask = (mask << 24) | (1UL << PG_dcache_dirty); __asm__ __volatile__("1:\n\t" "ldx [%2], %%g7\n\t" - "and %%g7, %1, %%g5\n\t" - "or %%g5, %0, %%g5\n\t" - "casx [%2], %%g7, %%g5\n\t" - "cmp %%g7, %%g5\n\t" + "and %%g7, %1, %%g1\n\t" + "or %%g1, %0, %%g1\n\t" + "casx [%2], %%g7, %%g1\n\t" + "cmp %%g7, %%g1\n\t" "bne,pn %%xcc, 1b\n\t" " membar #StoreLoad | #StoreStore" : /* no outputs */ : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) - : "g5", "g7"); + : "g1", "g7"); } static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu) @@ -176,20 +150,20 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c __asm__ __volatile__("! test_and_clear_dcache_dirty\n" "1:\n\t" "ldx [%2], %%g7\n\t" - "srlx %%g7, 24, %%g5\n\t" - "and %%g5, %3, %%g5\n\t" - "cmp %%g5, %0\n\t" + "srlx %%g7, 24, %%g1\n\t" + "and %%g1, %3, %%g1\n\t" + "cmp %%g1, %0\n\t" "bne,pn %%icc, 2f\n\t" - " andn %%g7, %1, %%g5\n\t" - "casx [%2], %%g7, %%g5\n\t" - "cmp %%g7, %%g5\n\t" + " andn %%g7, %1, %%g1\n\t" + "casx [%2], %%g7, %%g1\n\t" + "cmp %%g7, %%g1\n\t" "bne,pn %%xcc, 1b\n\t" " membar #StoreLoad | #StoreStore\n" "2:" : /* no outputs */ : "r" (cpu), "r" (mask), "r" (&page->flags), "i" (NR_CPUS - 1UL) - : "g5", "g7"); + : "g1", "g7"); } extern void __update_mmu_cache(unsigned long mmu_context_hw, unsigned long address, pte_t pte, int code); @@ -219,8 +193,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p put_cpu(); } + if (get_thread_fault_code()) - __update_mmu_cache(vma->vm_mm->context & TAG_CONTEXT_BITS, + __update_mmu_cache(CTX_NRBITS(vma->vm_mm->context), address, pte, get_thread_fault_code()); } @@ -281,9 +256,6 @@ void show_mem(void) printk("%ld pages of RAM\n", num_physpages); printk("%d free pages\n", nr_free_pages()); printk("%d pages in page table cache\n",pgtable_cache_size); -#ifndef CONFIG_SMP - printk("%d entries in page dir cache\n",pgd_cache_size); -#endif } void mmu_info(struct seq_file *m) @@ -392,10 +364,10 @@ static void inherit_prom_mappings(void) n = n / sizeof(*trans); /* - * The obp translations are saved based on 8k pagesize, since obp can use - * a mixture of pagesizes. Misses to the 0xf0000000 - 0x100000000, ie obp - * range, are handled in entry.S and do not use the vpte scheme (see rant - * in inherit_locked_prom_mappings()). + * The obp translations are saved based on 8k pagesize, since obp can + * use a mixture of pagesizes. Misses to the 0xf0000000 - 0x100000000, + * ie obp range, are handled in entry.S and do not use the vpte scheme + * (see rant in inherit_locked_prom_mappings()). */ #define OBP_PMD_SIZE 2048 prompmd = __alloc_bootmem(OBP_PMD_SIZE, OBP_PMD_SIZE, bootmap_base); @@ -449,11 +421,15 @@ static void inherit_prom_mappings(void) prom_printf("Remapping the kernel... "); /* Spitfire Errata #32 workaround */ + /* NOTE: Using plain zero for the context value is + * correct here, we are not using the Linux trap + * tables yet so we should not use the special + * UltraSPARC-III+ page size encodings yet. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + : "r" (0), "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); switch (tlb_type) { default: @@ -513,6 +489,11 @@ static void inherit_prom_mappings(void) tte_vaddr = (unsigned long) KERNBASE; /* Spitfire Errata #32 workaround */ + /* NOTE: Using plain zero for the context value is + * correct here, we are not using the Linux trap + * tables yet so we should not use the special + * UltraSPARC-III+ page size encodings yet. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -531,6 +512,11 @@ static void inherit_prom_mappings(void) /* Spitfire Errata #32 workaround */ + /* NOTE: Using plain zero for the context value is + * correct here, we are not using the Linux trap + * tables yet so we should not use the special + * UltraSPARC-III+ page size encodings yet. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -617,6 +603,9 @@ static void __flush_nucleus_vptes(void) unsigned long tag; /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no cheetah+ + * page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -783,6 +772,9 @@ void inherit_locked_prom_mappings(int save_p) unsigned long data; /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no cheetah+ + * page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -794,6 +786,9 @@ void inherit_locked_prom_mappings(int save_p) unsigned long tag; /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -821,6 +816,9 @@ void inherit_locked_prom_mappings(int save_p) unsigned long data; /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -832,6 +830,9 @@ void inherit_locked_prom_mappings(int save_p) unsigned long tag; /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -947,6 +948,7 @@ void prom_reload_locked(void) } } +#ifdef DCACHE_ALIASING_POSSIBLE void __flush_dcache_range(unsigned long start, unsigned long end) { unsigned long va; @@ -970,6 +972,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end) "i" (ASI_DCACHE_INVALIDATE)); } } +#endif /* DCACHE_ALIASING_POSSIBLE */ /* If not locked, zap it. */ void __flush_tlb_all(void) @@ -985,6 +988,9 @@ void __flush_tlb_all(void) if (tlb_type == spitfire) { for (i = 0; i < 64; i++) { /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -1000,6 +1006,9 @@ void __flush_tlb_all(void) } /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ __asm__ __volatile__("stxa %0, [%1] %2\n\t" "flush %%g6" : /* No outputs */ @@ -1033,11 +1042,14 @@ void __flush_tlb_all(void) void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; + unsigned long orig_pgsz_bits; + spin_lock(&ctx_alloc_lock); - ctx = CTX_HWBITS(tlb_context_cache + 1); - new_ctx = find_next_zero_bit(mmu_context_bmap, 1UL << CTX_VERSION_SHIFT, ctx); - if (new_ctx >= (1UL << CTX_VERSION_SHIFT)) { + orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); + ctx = (tlb_context_cache + 1) & CTX_NR_MASK; + new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); + if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { int i; @@ -1066,9 +1078,8 @@ void get_new_mmu_context(struct mm_struct *mm) new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); out: tlb_context_cache = new_ctx; + mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; spin_unlock(&ctx_alloc_lock); - - mm->context = new_ctx; } #ifndef CONFIG_SMP @@ -1087,7 +1098,7 @@ struct pgtable_cache_struct pgt_quicklists; * using the later address range, accesses with the first address * range will see the newly initialized data rather than the garbage. */ -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE #define DC_ALIAS_SHIFT 1 #else #define DC_ALIAS_SHIFT 0 @@ -1111,7 +1122,7 @@ pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) unsigned long paddr; pte_t *pte; -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE set_page_count(page, 1); ClearPageCompound(page); @@ -1129,7 +1140,7 @@ pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) to_free = (unsigned long *) paddr; } -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE /* Now free the other one up, adjust cache size. */ preempt_disable(); *to_free = (unsigned long) pte_quicklist[color ^ 0x1]; @@ -1702,22 +1713,6 @@ void __init mem_init(void) initpages = (((unsigned long) __init_end) - ((unsigned long) __init_begin)); initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; -#ifndef CONFIG_SMP - { - /* Put empty_pg_dir on pgd_quicklist */ - extern pgd_t empty_pg_dir[1024]; - unsigned long addr = (unsigned long)empty_pg_dir; - unsigned long alias_base = kern_base + PAGE_OFFSET - - (long)(KERNBASE); - - memset(empty_pg_dir, 0, sizeof(empty_pg_dir)); - addr += alias_base; - free_pgd_fast((pgd_t *)addr); - num_physpages++; - totalram_pages++; - } -#endif - printk("Memory: %uk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n", nr_free_pages() << (PAGE_SHIFT-10), codepages << (PAGE_SHIFT-10), diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c index 6255d6ef48eb0..90ca99d0b89cd 100644 --- a/arch/sparc64/mm/tlb.c +++ b/arch/sparc64/mm/tlb.c @@ -26,15 +26,13 @@ void flush_tlb_pending(void) struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); if (mp->tlb_nr) { - unsigned long context = mp->mm->context; - - if (CTX_VALID(context)) { + if (CTX_VALID(mp->mm->context)) { #ifdef CONFIG_SMP smp_flush_tlb_pending(mp->mm, mp->tlb_nr, &mp->vaddrs[0]); #else - __flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr, - &mp->vaddrs[0]); + __flush_tlb_pending(CTX_HWBITS(mp->mm->context), + mp->tlb_nr, &mp->vaddrs[0]); #endif } mp->tlb_nr = 0; @@ -73,6 +71,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t } no_cache_flush: + if (mp->tlb_frozen) return; @@ -101,11 +100,10 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long if (mp->tlb_frozen) return; - /* Nobody should call us with start below VM hole and end above. - * See if it is really true. - */ - BUG_ON(s > e); + /* If start is greater than end, that is a real problem. */ + BUG_ON(start > end); + /* However, straddling the VA space hole is quite normal. */ s &= PMD_MASK; e = (e + PMD_SIZE - 1) & PMD_MASK; @@ -123,6 +121,22 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long start = vpte_base + (s >> (PAGE_SHIFT - 3)); end = vpte_base + (e >> (PAGE_SHIFT - 3)); + + /* If the request straddles the VA space hole, we + * need to swap start and end. The reason this + * occurs is that "vpte_base" is the center of + * the linear page table mapping area. Thus, + * high addresses with the sign bit set map to + * addresses below vpte_base and non-sign bit + * addresses map to addresses above vpte_base. + */ + if (end < start) { + unsigned long tmp = start; + + start = end; + end = tmp; + } + while (start < end) { mp->vaddrs[nr] = start; mp->tlb_nr = ++nr; @@ -135,10 +149,3 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long if (nr) flush_tlb_pending(); } - -unsigned long __ptrs_per_pmd(void) -{ - if (test_thread_flag(TIF_32BIT)) - return (1UL << (32 - (PAGE_SHIFT-3) - PAGE_SHIFT)); - return REAL_PTRS_PER_PMD; -} diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index af8205edfbd0f..7a0934321010a 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -13,6 +13,7 @@ #include <asm/pil.h> #include <asm/head.h> #include <asm/thread_info.h> +#include <asm/cacheflush.h> /* Basically, most of the Spitfire vs. Cheetah madness * has to do with the fact that Cheetah does not support @@ -49,9 +50,9 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ .globl __flush_tlb_pending __flush_tlb_pending: /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ - rdpr %pstate, %g5 + rdpr %pstate, %g7 sllx %o1, 3, %o1 - andn %g5, PSTATE_IE, %g2 + andn %g7, PSTATE_IE, %g2 wrpr %g2, %pstate mov SECONDARY_CONTEXT, %o4 ldxa [%o4] ASI_DMMU, %g2 @@ -70,7 +71,7 @@ __flush_tlb_pending: stxa %g2, [%o4] ASI_DMMU flush %g6 retl - wrpr %g5, 0x0, %pstate + wrpr %g7, 0x0, %pstate .align 32 .globl __flush_tlb_kernel_range @@ -114,64 +115,27 @@ __spitfire_flush_tlb_mm_slow: .align 32 .globl __flush_icache_page __flush_icache_page: /* %o0 = phys_page */ - sethi %hi(1 << 13), %o2 ! IC_set bit - mov 1, %g1 - srlx %o0, 5, %o0 - clr %o1 ! IC_addr - sllx %g1, 36, %g1 - ldda [%o1] ASI_IC_TAG, %o4 - sub %g1, 1, %g2 - or %o0, %g1, %o0 ! VALID+phys-addr comparitor - - sllx %g2, 1, %g2 - andn %g2, ITAG_MASK, %g2 ! IC_tag mask - nop - nop - nop - nop - nop - nop - -1: addx %g0, %g0, %g0 - ldda [%o1 + %o2] ASI_IC_TAG, %g4 - addx %g0, %g0, %g0 - and %o5, %g2, %g3 - cmp %g3, %o0 - add %o1, 0x20, %o1 - ldda [%o1] ASI_IC_TAG, %o4 - be,pn %xcc, iflush1 - -2: nop - and %g5, %g2, %g5 - cmp %g5, %o0 - be,pn %xcc, iflush2 -3: cmp %o1, %o2 - bne,pt %xcc, 1b - addx %g0, %g0, %g0 - nop - + membar #StoreStore + srlx %o0, PAGE_SHIFT, %o0 + sethi %uhi(PAGE_OFFSET), %g1 + sllx %o0, PAGE_SHIFT, %o0 + sethi %hi(PAGE_SIZE), %g2 + sllx %g1, 32, %g1 + add %o0, %g1, %o0 +1: subcc %g2, 32, %g2 + bne,pt %icc, 1b + flush %o0 + %g2 retl - ldx [%g6 + TI_TASK], %g4 + nop -iflush1:sub %o1, 0x20, %g3 - stxa %g0, [%g3] ASI_IC_TAG - flush %g6 - ba,a,pt %xcc, 2b -iflush2:sub %o1, 0x20, %g3 - stxa %g0, [%o1 + %o2] ASI_IC_TAG - flush %g6 - ba,a,pt %xcc, 3b +#ifdef DCACHE_ALIASING_POSSIBLE -#if (PAGE_SHIFT == 13) -#define DTAG_MASK 0x3 -#elif (PAGE_SHIFT == 16) -#define DTAG_MASK 0x1f -#elif (PAGE_SHIFT == 19) -#define DTAG_MASK 0xff -#elif (PAGE_SHIFT == 22) -#define DTAG_MASK 0x3ff +#if (PAGE_SHIFT != 13) +#error only page shift of 13 is supported by dcache flush #endif +#define DTAG_MASK 0x3 + .align 64 .globl __flush_dcache_page __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ @@ -228,6 +192,7 @@ dflush4:stxa %g0, [%o4] ASI_DCACHE_TAG membar #Sync ba,pt %xcc, 2b nop +#endif /* DCACHE_ALIASING_POSSIBLE */ .align 32 __prefill_dtlb: @@ -258,10 +223,18 @@ __update_mmu_cache: /* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */ or %o5, %o0, %o5 ba,a,pt %xcc, __prefill_itlb - /* Cheetah specific versions, patched at boot time. */ + /* Cheetah specific versions, patched at boot time. + * + * This writes of the PRIMARY_CONTEXT register in this file are + * safe even on Cheetah+ and later wrt. the page size fields. + * The nucleus page size fields do not matter because we make + * no data references, and these instructions execute out of a + * locked I-TLB entry sitting in the fully assosciative I-TLB. + * This sequence should also never trap. + */ __cheetah_flush_tlb_mm: /* 15 insns */ - rdpr %pstate, %g5 - andn %g5, PSTATE_IE, %g2 + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 wrpr %g2, 0x0, %pstate wrpr %g0, 1, %tl mov PRIMARY_CONTEXT, %o2 @@ -274,13 +247,13 @@ __cheetah_flush_tlb_mm: /* 15 insns */ flush %g6 wrpr %g0, 0, %tl retl - wrpr %g5, 0x0, %pstate + wrpr %g7, 0x0, %pstate __cheetah_flush_tlb_pending: /* 22 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ - rdpr %pstate, %g5 + rdpr %pstate, %g7 sllx %o1, 3, %o1 - andn %g5, PSTATE_IE, %g2 + andn %g7, PSTATE_IE, %g2 wrpr %g2, 0x0, %pstate wrpr %g0, 1, %tl mov PRIMARY_CONTEXT, %o4 @@ -299,8 +272,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */ flush %g6 wrpr %g0, 0, %tl retl - wrpr %g5, 0x0, %pstate + wrpr %g7, 0x0, %pstate +#ifdef DCACHE_ALIASING_POSSIBLE flush_dcpage_cheetah: /* 11 insns */ sethi %uhi(PAGE_OFFSET), %g1 sllx %g1, 32, %g1 @@ -313,6 +287,7 @@ flush_dcpage_cheetah: /* 11 insns */ nop retl /* I-cache flush never needed on Cheetah, see callers. */ nop +#endif /* DCACHE_ALIASING_POSSIBLE */ cheetah_patch_one: 1: lduw [%o1], %g1 @@ -343,12 +318,14 @@ cheetah_patch_cachetlbops: call cheetah_patch_one mov 22, %o2 +#ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 or %o0, %lo(__flush_dcache_page), %o0 sethi %hi(flush_dcpage_cheetah), %o1 or %o1, %lo(flush_dcpage_cheetah), %o1 call cheetah_patch_one mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ ret restore @@ -464,6 +441,7 @@ xcall_report_regs: b rtrap_xcall ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 +#ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */ @@ -475,12 +453,13 @@ xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */ nop retry nop +#endif /* DCACHE_ALIASING_POSSIBLE */ .globl xcall_flush_dcache_page_spitfire xcall_flush_dcache_page_spitfire: /* %g1 == physical page address %g7 == kernel page virtual address %g5 == (page->mapping != NULL) */ -#if (L1DCACHE_SIZE > PAGE_SIZE) +#ifdef DCACHE_ALIASING_POSSIBLE srlx %g1, (13 - 2), %g1 ! Form tag comparitor sethi %hi(L1DCACHE_SIZE), %g3 ! D$ size == 16K sub %g3, (1 << 5), %g3 ! D$ linesize == 32 @@ -499,7 +478,7 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address sub %g3, (1 << 5), %g3 brz,pn %g5, 2f -#endif /* L1DCACHE_SIZE > PAGE_SIZE */ +#endif /* DCACHE_ALIASING_POSSIBLE */ sethi %hi(PAGE_SIZE), %g3 1: flush %g7 diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c index 9eab4421e1e4c..59fe38bba39e8 100644 --- a/arch/sparc64/prom/p1275.c +++ b/arch/sparc64/prom/p1275.c @@ -30,6 +30,16 @@ extern void prom_world(int); extern void prom_cif_interface(void); extern void prom_cif_callback(void); +static inline unsigned long spitfire_get_primary_context(void) +{ + unsigned long ctx; + + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=r" (ctx) + : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + return ctx; +} + /* * This provides SMP safety on the p1275buf. prom_callback() drops this lock * to allow recursuve acquisition. @@ -43,14 +53,9 @@ long p1275_cmd (char *service, long fmt, ...) int nargs, nrets, i; va_list list; long attrs, x; - long ctx = 0; p = p1275buf.prom_buffer; - ctx = spitfire_get_primary_context (); - if (ctx) { - flushw_user (); - spitfire_set_primary_context (0); - } + BUG_ON((spitfire_get_primary_context() & CTX_NR_MASK) != 0); spin_lock_irqsave(&prom_entry_lock, flags); @@ -146,9 +151,6 @@ long p1275_cmd (char *service, long fmt, ...) spin_unlock_irqrestore(&prom_entry_lock, flags); - if (ctx) - spitfire_set_primary_context (ctx); - return x; } diff --git a/include/asm-parisc/unaligned.h b/include/asm-parisc/unaligned.h index 0896a9f66529d..53c905838d933 100644 --- a/include/asm-parisc/unaligned.h +++ b/include/asm-parisc/unaligned.h @@ -1,7 +1,7 @@ #ifndef _ASM_PARISC_UNALIGNED_H_ #define _ASM_PARISC_UNALIGNED_H_ -#include <asm-parisc/unaligned.h> +#include <asm-generic/unaligned.h> #ifdef __KERNEL__ struct pt_regs; diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h index f1f8661cf83a8..86f02937ff1b7 100644 --- a/include/asm-sparc64/cacheflush.h +++ b/include/asm-sparc64/cacheflush.h @@ -2,6 +2,17 @@ #define _SPARC64_CACHEFLUSH_H #include <linux/config.h> +#include <asm/page.h> + +/* Flushing for D-cache alias handling is only needed if + * the page size is smaller than 16K. + */ +#if PAGE_SHIFT < 14 +#define DCACHE_ALIASING_POSSIBLE +#endif + +#ifndef __ASSEMBLY__ + #include <linux/mm.h> /* Cache flush operations. */ @@ -20,9 +31,9 @@ * module load, so we need this. */ extern void flush_icache_range(unsigned long start, unsigned long end); +extern void __flush_icache_page(unsigned long); extern void __flush_dcache_page(void *addr, int flush_icache); -extern void __flush_icache_page(unsigned long); extern void flush_dcache_page_impl(struct page *page); #ifdef CONFIG_SMP extern void smp_flush_dcache_page_impl(struct page *page, int cpu); @@ -33,6 +44,7 @@ extern void flush_dcache_page_all(struct mm_struct *mm, struct page *page); #endif extern void __flush_dcache_range(unsigned long start, unsigned long end); +extern void flush_dcache_page(struct page *page); #define flush_icache_page(vma, pg) do { } while(0) #define flush_icache_user_range(vma,pg,adr,len) do { } while (0) @@ -49,11 +61,12 @@ extern void __flush_dcache_range(unsigned long start, unsigned long end); memcpy(dst, src, len); \ } while (0) -extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) +#endif /* !__ASSEMBLY__ */ + #endif /* _SPARC64_CACHEFLUSH_H */ diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index d7625ffc0b85a..b1f1f4e4bd4c6 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -19,7 +19,7 @@ typedef struct { /* Dcache line 2 */ unsigned int pgcache_size; - unsigned int pgdcache_size; + unsigned int __pad1; unsigned long *pte_cache[2]; unsigned long *pgd_cache; } cpuinfo_sparc; diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h index 6b327402277fd..4c1098474c73f 100644 --- a/include/asm-sparc64/ide.h +++ b/include/asm-sparc64/ide.h @@ -13,8 +13,8 @@ #include <linux/config.h> #include <asm/pgalloc.h> #include <asm/io.h> -#include <asm/page.h> #include <asm/spitfire.h> +#include <asm/cacheflush.h> #ifndef MAX_HWIFS # ifdef CONFIG_BLK_DEV_IDEPCI @@ -51,7 +51,7 @@ static inline unsigned int inw_be(void __iomem *addr) static inline void __ide_insw(void __iomem *port, void *dst, u32 count) { -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE unsigned long end = (unsigned long)dst + (count << 1); #endif u16 *ps = dst; @@ -74,7 +74,7 @@ static inline void __ide_insw(void __iomem *port, void *dst, u32 count) if(count) *ps++ = inw_be(port); -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE __flush_dcache_range((unsigned long)dst, end); #endif } @@ -88,7 +88,7 @@ static inline void outw_be(unsigned short w, void __iomem *addr) static inline void __ide_outsw(void __iomem *port, void *src, u32 count) { -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE unsigned long end = (unsigned long)src + (count << 1); #endif const u16 *ps = src; @@ -111,7 +111,7 @@ static inline void __ide_outsw(void __iomem *port, void *src, u32 count) if(count) outw_be(*ps, port); -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE __flush_dcache_range((unsigned long)src, end); #endif } diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h index ccd36d26615a7..8627eed6e83df 100644 --- a/include/asm-sparc64/mmu.h +++ b/include/asm-sparc64/mmu.h @@ -1,7 +1,99 @@ #ifndef __MMU_H #define __MMU_H -/* Default "unsigned long" context */ -typedef unsigned long mm_context_t; +#include <linux/config.h> +#include <asm/page.h> +#include <asm/const.h> +/* + * For the 8k pagesize kernel, use only 10 hw context bits to optimize some + * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically + * for vpte offset calculation). For other pagesizes, this optimization in + * the tlbhandlers can not be done; but still, all 13 bits can not be used + * because the tlb handlers use "andcc" instruction which sign extends 13 + * bit arguments. + */ +#if PAGE_SHIFT == 13 +#define CTX_NR_BITS 10 +#else +#define CTX_NR_BITS 12 #endif + +#define TAG_CONTEXT_BITS ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL)) + +/* UltraSPARC-III+ and later have a feature whereby you can + * select what page size the various Data-TLB instances in the + * chip. In order to gracefully support this, we put the version + * field in a spot outside of the areas of the context register + * where this parameter is specified. + */ +#define CTX_VERSION_SHIFT 22 +#define CTX_VERSION_MASK ((~0UL) << CTX_VERSION_SHIFT) + +#define CTX_PGSZ_8KB _AC(0x0,UL) +#define CTX_PGSZ_64KB _AC(0x1,UL) +#define CTX_PGSZ_512KB _AC(0x2,UL) +#define CTX_PGSZ_4MB _AC(0x3,UL) +#define CTX_PGSZ_BITS _AC(0x7,UL) +#define CTX_PGSZ0_NUC_SHIFT 61 +#define CTX_PGSZ1_NUC_SHIFT 58 +#define CTX_PGSZ0_SHIFT 16 +#define CTX_PGSZ1_SHIFT 19 +#define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \ + (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT)) + +#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) +#define CTX_PGSZ_BASE CTX_PGSZ_8KB +#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) +#define CTX_PGSZ_BASE CTX_PGSZ_64KB +#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB) +#define CTX_PGSZ_BASE CTX_PGSZ_512KB +#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB) +#define CTX_PGSZ_BASE CTX_PGSZ_4MB +#else +#error No page size specified in kernel configuration +#endif + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) +#define CTX_PGSZ_HUGE CTX_PGSZ_4MB +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +#define CTX_PGSZ_HUGE CTX_PGSZ_512KB +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +#define CTX_PGSZ_HUGE CTX_PGSZ_64KB +#endif + +#define CTX_PGSZ_KERN CTX_PGSZ_4MB + +/* Thus, when running on UltraSPARC-III+ and later, we use the following + * PRIMARY_CONTEXT register values for the kernel context. + */ +#define CTX_CHEETAH_PLUS_NUC \ + ((CTX_PGSZ_KERN << CTX_PGSZ0_NUC_SHIFT) | \ + (CTX_PGSZ_BASE << CTX_PGSZ1_NUC_SHIFT)) + +#define CTX_CHEETAH_PLUS_CTX0 \ + ((CTX_PGSZ_KERN << CTX_PGSZ0_SHIFT) | \ + (CTX_PGSZ_BASE << CTX_PGSZ1_SHIFT)) + +/* If you want "the TLB context number" use CTX_NR_MASK. If you + * want "the bits I program into the context registers" use + * CTX_HW_MASK. + */ +#define CTX_NR_MASK TAG_CONTEXT_BITS +#define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK) + +#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL)) +#define CTX_VALID(__ctx) \ + (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK)) +#define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) +#define CTX_NRBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_NR_MASK) + +#ifndef __ASSEMBLY__ + +typedef struct { + unsigned long sparc64_ctx_val; +} mm_context_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __MMU_H */ diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 08275bc3478ac..87c43c67866e9 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -4,23 +4,6 @@ /* Derived heavily from Linus's Alpha/AXP ASN code... */ -#include <asm/page.h> - -/* - * For the 8k pagesize kernel, use only 10 hw context bits to optimize some shifts in - * the fast tlbmiss handlers, instead of all 13 bits (specifically for vpte offset - * calculation). For other pagesizes, this optimization in the tlbhandlers can not be - * done; but still, all 13 bits can not be used because the tlb handlers use "andcc" - * instruction which sign extends 13 bit arguments. - */ -#if PAGE_SHIFT == 13 -#define CTX_VERSION_SHIFT 10 -#define TAG_CONTEXT_BITS 0x3ff -#else -#define CTX_VERSION_SHIFT 12 -#define TAG_CONTEXT_BITS 0xfff -#endif - #ifndef __ASSEMBLY__ #include <linux/spinlock.h> @@ -35,19 +18,14 @@ extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; -#define CTX_VERSION_MASK ((~0UL) << CTX_VERSION_SHIFT) -#define CTX_FIRST_VERSION ((1UL << CTX_VERSION_SHIFT) + 1UL) -#define CTX_VALID(__ctx) \ - (!(((__ctx) ^ tlb_context_cache) & CTX_VERSION_MASK)) -#define CTX_HWBITS(__ctx) ((__ctx) & ~CTX_VERSION_MASK) - extern void get_new_mmu_context(struct mm_struct *mm); /* Initialize a new mmu context. This is invoked when a new * address space instance (unique or shared) is instantiated. * This just needs to set mm->context to an invalid context. */ -#define init_new_context(__tsk, __mm) (((__mm)->context = 0UL), 0) +#define init_new_context(__tsk, __mm) \ + (((__mm)->context.sparc64_ctx_val = 0UL), 0) /* Destroy a dead context. This occurs when mmput drops the * mm_users count to zero, the mmaps have been released, and @@ -59,7 +37,7 @@ extern void get_new_mmu_context(struct mm_struct *mm); #define destroy_context(__mm) \ do { spin_lock(&ctx_alloc_lock); \ if (CTX_VALID((__mm)->context)) { \ - unsigned long nr = CTX_HWBITS((__mm)->context); \ + unsigned long nr = CTX_NRBITS((__mm)->context); \ mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \ } \ spin_unlock(&ctx_alloc_lock); \ @@ -101,7 +79,7 @@ do { \ "flush %%g6" \ : /* No outputs */ \ : "r" (CTX_HWBITS((__mm)->context)), \ - "r" (0x10), "i" (ASI_DMMU)) + "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU)) extern void __flush_tlb_mm(unsigned long, unsigned long); @@ -135,7 +113,8 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str */ if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { cpu_set(cpu, mm->cpu_vm_mask); - __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); + __flush_tlb_mm(CTX_HWBITS(mm->context), + SECONDARY_CONTEXT); } } spin_unlock(&mm->page_table_lock); diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index c3dc444563e07..219ea043a14a8 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -6,7 +6,18 @@ #include <linux/config.h> #include <asm/const.h> +#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) #define PAGE_SHIFT 13 +#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) +#define PAGE_SHIFT 16 +#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB) +#define PAGE_SHIFT 19 +#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB) +#define PAGE_SHIFT 22 +#else +#error No page size specified in kernel configuration +#endif + #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 167d514bdf6ee..88f9c142947cf 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -9,6 +9,7 @@ #include <asm/spitfire.h> #include <asm/cpudata.h> +#include <asm/cacheflush.h> /* Page table allocation/freeing. */ #ifdef CONFIG_SMP @@ -19,74 +20,12 @@ extern struct pgtable_cache_struct { unsigned long *pgd_cache; unsigned long *pte_cache[2]; unsigned int pgcache_size; - unsigned int pgdcache_size; } pgt_quicklists; #endif #define pgd_quicklist (pgt_quicklists.pgd_cache) #define pmd_quicklist ((unsigned long *)0) #define pte_quicklist (pgt_quicklists.pte_cache) #define pgtable_cache_size (pgt_quicklists.pgcache_size) -#define pgd_cache_size (pgt_quicklists.pgdcache_size) - -#ifndef CONFIG_SMP - -static __inline__ void free_pgd_fast(pgd_t *pgd) -{ - struct page *page = virt_to_page(pgd); - - preempt_disable(); - if (!page->lru.prev) { - page->lru.next = (void *) pgd_quicklist; - pgd_quicklist = (unsigned long *)page; - } - page->lru.prev = (void *) - (((unsigned long)page->lru.prev) | - (((unsigned long)pgd & (PAGE_SIZE / 2)) ? 2 : 1)); - pgd_cache_size++; - preempt_enable(); -} - -static __inline__ pgd_t *get_pgd_fast(void) -{ - struct page *ret; - - preempt_disable(); - if ((ret = (struct page *)pgd_quicklist) != NULL) { - unsigned long mask = (unsigned long)ret->lru.prev; - unsigned long off = 0; - - if (mask & 1) - mask &= ~1; - else { - off = PAGE_SIZE / 2; - mask &= ~2; - } - ret->lru.prev = (void *) mask; - if (!mask) - pgd_quicklist = (unsigned long *)ret->lru.next; - ret = (struct page *)(__page_address(ret) + off); - pgd_cache_size--; - preempt_enable(); - } else { - struct page *page; - - preempt_enable(); - page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - if (page) { - ret = (struct page *)page_address(page); - page->lru.prev = (void *) 2UL; - - preempt_disable(); - page->lru.next = (void *) pgd_quicklist; - pgd_quicklist = (unsigned long *)page; - pgd_cache_size++; - preempt_enable(); - } - } - return (pgd_t *)ret; -} - -#else /* CONFIG_SMP */ static __inline__ void free_pgd_fast(pgd_t *pgd) { @@ -121,9 +60,7 @@ static __inline__ void free_pgd_slow(pgd_t *pgd) free_page((unsigned long)pgd); } -#endif /* CONFIG_SMP */ - -#if (L1DCACHE_SIZE > PAGE_SIZE) /* is there D$ aliasing problem */ +#ifdef DCACHE_ALIASING_POSSIBLE #define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL) #define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL) #else diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index dfb8a88863186..ca04ac105b694 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -60,44 +60,24 @@ #define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PMD_BITS 11 +#define PMD_BITS (PAGE_SHIFT - 2) /* PGDIR_SHIFT determines what a third-level page table entry can map */ #define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PGDIR_BITS (PAGE_SHIFT - 2) #ifndef __ASSEMBLY__ #include <linux/sched.h> /* Entries per page directory level. */ -#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) - -/* We the first one in this file, what we export to the kernel - * is different so we can optimize correctly for 32-bit tasks. - */ -#define REAL_PTRS_PER_PMD (1UL << PMD_BITS) - -/* This is gross, but unless we do this gcc retests the - * thread flag every interation in pmd traversal loops. - */ -extern unsigned long __ptrs_per_pmd(void) __attribute_const__; -#define PTRS_PER_PMD __ptrs_per_pmd() - -/* - * We cannot use the top address range because VPTE table lives there. This - * formula finds the total legal virtual space in the processor, subtracts the - * vpte size, then aligns it to the number of bytes mapped by one pgde, and - * thus calculates the number of pgdes needed. - */ -#define PTRS_PER_PGD (((1UL << VA_BITS) - VPTE_SIZE + (1UL << (PAGE_SHIFT + \ - (PAGE_SHIFT-3) + PMD_BITS)) - 1) / (1UL << (PAGE_SHIFT + \ - (PAGE_SHIFT-3) + PMD_BITS))) +#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) +#define PTRS_PER_PMD (1UL << PMD_BITS) +#define PTRS_PER_PGD (1UL << PGDIR_BITS) /* Kernel has a separate 44bit address space. */ -#define USER_PTRS_PER_PGD ((const int)(test_thread_flag(TIF_32BIT)) ? \ - (1) : (PTRS_PER_PGD)) #define FIRST_USER_PGD_NR 0 #define pte_ERROR(e) __builtin_trap() @@ -236,8 +216,8 @@ extern struct page *mem_map_zero; /* PFNs are real physical page numbers. However, mem_map only begins to record * per-page information starting at pfn_base. This is to handle systems where - * the first physical page in the machine is at some huge physical address, such - * as 4GB. This is common on a partitioned E10000, for example. + * the first physical page in the machine is at some huge physical address, + * such as 4GB. This is common on a partitioned E10000, for example. */ #define pfn_pte(pfn, prot) \ @@ -308,7 +288,7 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) /* to find an entry in a page-table-directory. */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD)) +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) /* to find an entry in a kernel page-table-directory */ @@ -322,7 +302,7 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page(*(pudp)) + \ - (((address) >> PMD_SHIFT) & (REAL_PTRS_PER_PMD-1))) + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ #define pte_index(dir, address) \ diff --git a/include/asm-sparc64/rwsem-const.h b/include/asm-sparc64/rwsem-const.h new file mode 100644 index 0000000000000..a303c9d64d845 --- /dev/null +++ b/include/asm-sparc64/rwsem-const.h @@ -0,0 +1,12 @@ +/* rwsem-const.h: RW semaphore counter constants. */ +#ifndef _SPARC64_RWSEM_CONST_H +#define _SPARC64_RWSEM_CONST_H + +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS 0xffff0000 +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +#endif /* _SPARC64_RWSEM_CONST_H */ diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h index 82fffac5b0b81..bf2ae90ed3df8 100644 --- a/include/asm-sparc64/rwsem.h +++ b/include/asm-sparc64/rwsem.h @@ -15,17 +15,12 @@ #include <linux/list.h> #include <linux/spinlock.h> +#include <asm/rwsem-const.h> struct rwsem_waiter; struct rw_semaphore { signed int count; -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS 0xffff0000 -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; }; @@ -56,16 +51,16 @@ static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem) int tmp = delta; __asm__ __volatile__( - "1:\tlduw [%2], %%g5\n\t" - "add %%g5, %1, %%g7\n\t" - "cas [%2], %%g5, %%g7\n\t" - "cmp %%g5, %%g7\n\t" + "1:\tlduw [%2], %%g1\n\t" + "add %%g1, %1, %%g7\n\t" + "cas [%2], %%g1, %%g7\n\t" + "cmp %%g1, %%g7\n\t" "bne,pn %%icc, 1b\n\t" " membar #StoreLoad | #StoreStore\n\t" "mov %%g7, %0\n\t" : "=&r" (tmp) : "0" (tmp), "r" (sem) - : "g5", "g7", "memory", "cc"); + : "g1", "g7", "memory", "cc"); return tmp + delta; } diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 6ee83ff2fde36..ad78ce64d69ee 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h @@ -34,6 +34,9 @@ #define PHYS_WATCHPOINT 0x0000000000000040 #define SPITFIRE_HIGHEST_LOCKED_TLBENT (64 - 1) +#define CHEETAH_HIGHEST_LOCKED_TLBENT (16 - 1) + +#define L1DCACHE_SIZE 0x4000 #ifndef __ASSEMBLY__ @@ -45,10 +48,6 @@ enum ultra_tlb_layout { extern enum ultra_tlb_layout tlb_type; -#define CHEETAH_HIGHEST_LOCKED_TLBENT (16 - 1) - -#define L1DCACHE_SIZE 0x4000 - #define sparc64_highest_locked_tlbent() \ (tlb_type == spitfire ? \ SPITFIRE_HIGHEST_LOCKED_TLBENT : \ @@ -100,46 +99,6 @@ static __inline__ void spitfire_put_dsfsr(unsigned long sfsr) : "r" (sfsr), "r" (TLB_SFSR), "i" (ASI_DMMU)); } -static __inline__ unsigned long spitfire_get_primary_context(void) -{ - unsigned long ctx; - - __asm__ __volatile__("ldxa [%1] %2, %0" - : "=r" (ctx) - : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - return ctx; -} - -static __inline__ void spitfire_set_primary_context(unsigned long ctx) -{ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* No outputs */ - : "r" (ctx & 0x3ff), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - __asm__ __volatile__ ("membar #Sync" : : : "memory"); -} - -static __inline__ unsigned long spitfire_get_secondary_context(void) -{ - unsigned long ctx; - - __asm__ __volatile__("ldxa [%1] %2, %0" - : "=r" (ctx) - : "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU)); - return ctx; -} - -static __inline__ void spitfire_set_secondary_context(unsigned long ctx) -{ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* No outputs */ - : "r" (ctx & 0x3ff), - "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU)); - __asm__ __volatile__ ("membar #Sync" : : : "memory"); -} - /* The data cache is write through, so this just invalidates the * specified line. */ diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index e8ba9d5277e15..3d1af3fc10a63 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -182,7 +182,7 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ : : "r" (__thread_flag_byte_ptr(next->thread_info)[TI_FLAG_BYTE_CURRENT_DS]));\ __asm__ __volatile__( \ - "mov %%g4, %%g5\n\t" \ + "mov %%g4, %%g7\n\t" \ "wrpr %%g0, 0x95, %%pstate\n\t" \ "stx %%i6, [%%sp + 2047 + 0x70]\n\t" \ "stx %%i7, [%%sp + 2047 + 0x78]\n\t" \ @@ -207,7 +207,7 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ "wrpr %%g0, 0x96, %%pstate\n\t" \ "andcc %%o7, %6, %%g0\n\t" \ "beq,pt %%icc, 1f\n\t" \ - " mov %%g5, %0\n\t" \ + " mov %%g7, %0\n\t" \ "b,a ret_from_syscall\n\t" \ "1:\n\t" \ : "=&r" (last) \ @@ -226,37 +226,41 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ } \ } while(0) -static __inline__ unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) +static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) { + unsigned long tmp1, tmp2; + __asm__ __volatile__( " membar #StoreLoad | #LoadLoad\n" -" mov %0, %%g5\n" -"1: lduw [%2], %%g7\n" -" cas [%2], %%g7, %0\n" -" cmp %%g7, %0\n" +" mov %0, %1\n" +"1: lduw [%4], %2\n" +" cas [%4], %2, %0\n" +" cmp %2, %0\n" " bne,a,pn %%icc, 1b\n" -" mov %%g5, %0\n" +" mov %1, %0\n" " membar #StoreLoad | #StoreStore\n" - : "=&r" (val) + : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2) : "0" (val), "r" (m) - : "g5", "g7", "cc", "memory"); + : "cc", "memory"); return val; } -static __inline__ unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val) +static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val) { + unsigned long tmp1, tmp2; + __asm__ __volatile__( " membar #StoreLoad | #LoadLoad\n" -" mov %0, %%g5\n" -"1: ldx [%2], %%g7\n" -" casx [%2], %%g7, %0\n" -" cmp %%g7, %0\n" +" mov %0, %1\n" +"1: ldx [%4], %2\n" +" casx [%4], %2, %0\n" +" cmp %2, %0\n" " bne,a,pn %%xcc, 1b\n" -" mov %%g5, %0\n" +" mov %1, %0\n" " membar #StoreLoad | #StoreStore\n" - : "=&r" (val) + : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2) : "0" (val), "r" (m) - : "g5", "g7", "cc", "memory"); + : "cc", "memory"); return val; } diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h index fa0ebf6786fc9..6bad3f20f6cb5 100644 --- a/include/asm-sparc64/tlb.h +++ b/include/asm-sparc64/tlb.h @@ -89,9 +89,7 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un tlb_flush_mmu(mp); if (mp->tlb_frozen) { - unsigned long context = mm->context; - - if (CTX_VALID(context)) + if (CTX_VALID(mm->context)) do_flush_tlb_mm(mm); mp->tlb_frozen = 0; } else |