diff options
author | David S. Miller <davem@cheetah.davemloft.net> | 2005-03-29 04:19:36 -0800 |
---|---|---|
committer | David S. Miller <davem@cheetah.davemloft.net> | 2005-03-29 04:19:36 -0800 |
commit | 748e598f95e6dbb5921eb90dd896b3699e370ee3 (patch) | |
tree | a68cec0b82aa704a21121aaf4955c9dc1281fba3 | |
parent | 692eb311a94c3802511883fd5d2c916e585add40 (diff) | |
parent | 27649cce2886c85b367d2e2c98144c9a78526e66 (diff) | |
download | history-748e598f95e6dbb5921eb90dd896b3699e370ee3.tar.gz |
Merge davem@sunset:src/BK/sparc-2.6
into cheetah.davemloft.net:/home/davem/src/BK/sparc-2.6
-rw-r--r-- | arch/sparc64/kernel/entry.S | 63 | ||||
-rw-r--r-- | arch/sparc64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/lib/U1memcpy.S | 36 | ||||
-rw-r--r-- | arch/sparc64/lib/U3memcpy.S | 28 | ||||
-rw-r--r-- | arch/sparc64/lib/VISbzero.S | 274 | ||||
-rw-r--r-- | arch/sparc64/lib/VISmemset.S | 240 | ||||
-rw-r--r-- | arch/sparc64/lib/atomic.S | 64 | ||||
-rw-r--r-- | arch/sparc64/lib/bzero.S | 158 | ||||
-rw-r--r-- | arch/sparc64/lib/debuglocks.c | 76 | ||||
-rw-r--r-- | arch/sparc64/lib/dec_and_lock.S | 16 | ||||
-rw-r--r-- | arch/sparc64/lib/mcount.S | 18 | ||||
-rw-r--r-- | arch/sparc64/lib/memcmp.S | 4 | ||||
-rw-r--r-- | arch/sparc64/lib/memmove.S | 10 | ||||
-rw-r--r-- | arch/sparc64/lib/memscan.S | 32 | ||||
-rw-r--r-- | arch/sparc64/lib/strlen.S | 12 | ||||
-rw-r--r-- | arch/sparc64/lib/strlen_user.S | 12 | ||||
-rw-r--r-- | arch/sparc64/lib/xor.S | 46 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 24 | ||||
-rw-r--r-- | include/asm-sparc64/system.h | 36 |
19 files changed, 409 insertions, 742 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 20f1da3251b01..a47f2d0b1a29b 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -252,7 +252,7 @@ do_fpdis: faddd %f0, %f2, %f4 fmuld %f0, %f2, %f6 ldxa [%g3] ASI_DMMU, %g5 -cplus_fpdis_insn_1: +cplus_fptrap_insn_1: sethi %hi(0), %g2 stxa %g2, [%g3] ASI_DMMU membar #Sync @@ -280,7 +280,9 @@ cplus_fpdis_insn_1: fzero %f34 ldxa [%g3] ASI_DMMU, %g5 add %g6, TI_FPREGS, %g1 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_2: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync add %g6, TI_FPREGS + 0x40, %g2 faddd %f32, %f34, %f36 @@ -304,9 +306,11 @@ cplus_fpdis_insn_1: 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 ldxa [%g3] ASI_DMMU, %g5 - mov 0x40, %g2 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_3: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync + mov 0x40, %g2 ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-( ldda [%g1 + %g2] ASI_BLK_S, %f16 add %g1, 0x80, %g1 @@ -325,24 +329,6 @@ fpdis_exit2: wr %g0, FPRS_FEF, %fprs ! clean DU/DL bits retry -cplus_fpdis_1: - sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 - - .globl cheetah_plus_patch_fpdis -cheetah_plus_patch_fpdis: - /* We configure the dTLB512_0 for 4MB pages and the - * dTLB512_1 for 8K pages when in context zero. - */ - sethi %hi(cplus_fpdis_1), %o0 - sethi %hi(cplus_fpdis_insn_1), %o2 - lduw [%o0 + %lo(cplus_fpdis_1)], %o1 - or %o2, %lo(cplus_fpdis_insn_1), %o2 - stw %o1, [%o2] - flush %o2 - - retl - nop - .align 32 fp_other_bounce: call do_fpother @@ -485,10 +471,12 @@ do_fptrap_after_fsr: rd %gsr, %g3 stx %g3, [%g6 + TI_GSR] mov SECONDARY_CONTEXT, %g3 - add %g6, TI_FPREGS, %g2 ldxa [%g3] ASI_DMMU, %g5 - stxa %g0, [%g3] ASI_DMMU +cplus_fptrap_insn_4: + sethi %hi(0), %g2 + stxa %g2, [%g3] ASI_DMMU membar #Sync + add %g6, TI_FPREGS, %g2 andcc %g1, FPRS_DL, %g0 be,pn %icc, 4f mov 0x40, %g3 @@ -506,6 +494,33 @@ do_fptrap_after_fsr: ba,pt %xcc, etrap wr %g0, 0, %fprs +cplus_fptrap_1: + sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2 + + .globl cheetah_plus_patch_fpdis +cheetah_plus_patch_fpdis: + /* We configure the dTLB512_0 for 4MB pages and the + * dTLB512_1 for 8K pages when in context zero. + */ + sethi %hi(cplus_fptrap_1), %o0 + lduw [%o0 + %lo(cplus_fptrap_1)], %o1 + + set cplus_fptrap_insn_1, %o2 + stw %o1, [%o2] + flush %o2 + set cplus_fptrap_insn_2, %o2 + stw %o1, [%o2] + flush %o2 + set cplus_fptrap_insn_3, %o2 + stw %o1, [%o2] + flush %o2 + set cplus_fptrap_insn_4, %o2 + stw %o1, [%o2] + flush %o2 + + retl + nop + /* The registers for cross calls will be: * * DATA 0: [low 32-bits] Address of function to call, jmp to this diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 3cf408cb1695e..d58575a978d5b 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -7,7 +7,7 @@ EXTRA_CFLAGS := -Werror lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \ - VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ + bzero.o VIScsum.o VIScsumcopy.o \ VIScsumcopyusr.o VISsave.o atomic.o bitops.o \ U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index fffec2e3cef8e..ec0788236a816 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include <asm/visasm.h> #include <asm/asi.h> +#define GLOBAL_SPARE %g7 #else #define ASI_BLK_P 0xf0 #define FPRS_FEF 0x04 @@ -18,6 +19,7 @@ #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs #endif +#define GLOBAL_SPARE %g5 #endif #ifndef EX_LD @@ -123,7 +125,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %g2, 0 tne %xcc, 5 PREAMBLE - mov %o0, %g5 + mov %o0, %o4 cmp %o2, 0 be,pn %XCC, 85f or %o0, %o1, %o3 @@ -146,7 +148,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * of bytes to copy to make 'dst' 64-byte aligned. We pre- * subtract this from 'len'. */ - sub %o0, %o1, %o4 + sub %o0, %o1, GLOBAL_SPARE sub %g2, 0x40, %g2 sub %g0, %g2, %g2 sub %o2, %g2, %o2 @@ -156,11 +158,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %g1, 0x1, %g1 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST(STORE(stb, %o3, %o1 + %o4)) + EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 0x1, %o1 - add %o1, %o4, %o0 + add %o1, GLOBAL_SPARE, %o0 2: cmp %g2, 0x0 and %o1, 0x7, %g1 @@ -188,19 +190,19 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 3: membar #LoadStore | #StoreStore | #StoreLoad - subcc %o2, 0x40, %o4 + subcc %o2, 0x40, GLOBAL_SPARE add %o1, %g1, %g1 - andncc %o4, (0x40 - 1), %o4 + andncc GLOBAL_SPARE, (0x40 - 1), GLOBAL_SPARE srl %g1, 3, %g2 - sub %o2, %o4, %g3 + sub %o2, GLOBAL_SPARE, %g3 andn %o1, (0x40 - 1), %o1 and %g2, 7, %g2 andncc %g3, 0x7, %g3 fmovd %f0, %f2 sub %g3, 0x8, %g3 - sub %o2, %o4, %o2 + sub %o2, GLOBAL_SPARE, %o2 - add %g1, %o4, %g1 + add %g1, GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 EX_LD(LOAD_BLK(%o1, %f0)) @@ -208,7 +210,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %g3, %g1 EX_LD(LOAD_BLK(%o1, %f16)) add %o1, 0x40, %o1 - sub %o4, 0x80, %o4 + sub GLOBAL_SPARE, 0x80, GLOBAL_SPARE EX_LD(LOAD_BLK(%o1, %f32)) add %o1, 0x40, %o1 @@ -449,18 +451,18 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 2: membar #StoreLoad | #StoreStore VISExit retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .align 64 70: /* 16 < len <= (5 * 64) */ bne,pn %XCC, 75f sub %o0, %o1, %o3 -72: andn %o2, 0xf, %o4 +72: andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - subcc %o4, 0x10, %o4 + subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE EX_ST(STORE(stx, %o5, %o1 + %o3)) add %o1, 0x8, %o1 EX_ST(STORE(stx, %g1, %o1 + %o3)) @@ -512,10 +514,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o1, 0x7, %o1 EX_LD(LOAD(ldx, %o1, %g2)) sub %o3, %g1, %o3 - andn %o2, 0x7, %o4 + andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) - subcc %o4, 0x8, %o4 + subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 @@ -544,7 +546,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o1, 4, %o1 85: retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .align 32 90: EX_LD(LOAD(ldub, %o1, %g1)) @@ -553,6 +555,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bgu,pt %XCC, 90b add %o1, 1, %o1 retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/U3memcpy.S b/arch/sparc64/lib/U3memcpy.S index 8fe195a10bbad..7cae9cc6a204a 100644 --- a/arch/sparc64/lib/U3memcpy.S +++ b/arch/sparc64/lib/U3memcpy.S @@ -6,6 +6,7 @@ #ifdef __KERNEL__ #include <asm/visasm.h> #include <asm/asi.h> +#define GLOBAL_SPARE %g7 #else #define ASI_BLK_P 0xf0 #define FPRS_FEF 0x04 @@ -17,6 +18,7 @@ #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs #endif +#define GLOBAL_SPARE %g5 #endif #ifndef EX_LD @@ -84,7 +86,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %g2, 0 tne %xcc, 5 PREAMBLE - mov %o0, %g5 + mov %o0, %o4 cmp %o2, 0 be,pn %XCC, 85f or %o0, %o1, %o3 @@ -109,7 +111,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * of bytes to copy to make 'dst' 64-byte aligned. We pre- * subtract this from 'len'. */ - sub %o0, %o1, %o4 + sub %o0, %o1, GLOBAL_SPARE sub %g2, 0x40, %g2 sub %g0, %g2, %g2 sub %o2, %g2, %o2 @@ -119,11 +121,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %g1, 0x1, %g1 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST(STORE(stb, %o3, %o1 + %o4)) + EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 0x1, %o1 - add %o1, %o4, %o0 + add %o1, GLOBAL_SPARE, %o0 2: cmp %g2, 0x0 and %o1, 0x7, %g1 @@ -149,7 +151,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 3: LOAD(prefetch, %o1 + 0x000, #one_read) LOAD(prefetch, %o1 + 0x040, #one_read) - andn %o2, (0x40 - 1), %o4 + andn %o2, (0x40 - 1), GLOBAL_SPARE LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) @@ -173,10 +175,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) - subcc %o4, 0x80, %o4 + subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 bgu,pt %XCC, 1f - srl %o4, 6, %o3 + srl GLOBAL_SPARE, 6, %o3 ba,pt %xcc, 2f nop @@ -315,9 +317,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o0, %o1, %o3 72: - andn %o2, 0xf, %o4 + andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 -1: subcc %o4, 0x10, %o4 +1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) EX_ST(STORE(stx, %o5, %o1 + %o3)) @@ -372,10 +374,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o1, 0x7, %o1 EX_LD(LOAD(ldx, %o1, %g2)) sub %o3, %g1, %o3 - andn %o2, 0x7, %o4 + andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) - subcc %o4, 0x8, %o4 + subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 @@ -405,7 +407,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o1, 4, %o1 85: retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .align 32 90: @@ -415,6 +417,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bgu,pt %XCC, 90b add %o1, 1, %o1 retl - mov EX_RETVAL(%g5), %o0 + mov EX_RETVAL(%o4), %o0 .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/VISbzero.S b/arch/sparc64/lib/VISbzero.S deleted file mode 100644 index 06b697bab974b..0000000000000 --- a/arch/sparc64/lib/VISbzero.S +++ /dev/null @@ -1,274 +0,0 @@ -/* $Id: VISbzero.S,v 1.11 2001/03/15 08:51:24 anton Exp $ - * VISbzero.S: High speed clear operations utilizing the UltraSparc - * Visual Instruction Set. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include "VIS.h" - -#ifdef __KERNEL__ -#include <asm/visasm.h> - -#define EXN(x,y,a,b,z) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: ba VISbzerofixup_ret##z; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXC(x,y,a,b,c...) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: c; \ - ba VISbzerofixup_ret0; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXO1(x,y) \ -98: x,y; \ - .section __ex_table; \ - .align 4; \ - .word 98b, VISbzerofixup_reto1; \ - .text; \ - .align 4; -#define EX(x,y,a,b) EXN(x,y,a,b,0) -#define EX1(x,y,a,b) EXN(x,y,a,b,1) -#define EX2(x,y,a,b) EXN(x,y,a,b,2) -#define EXT(start,end,handler) \ - .section __ex_table; \ - .align 4; \ - .word start, 0, end, handler; \ - .text; \ - .align 4 -#else -#define EX(x,y,a,b) x,y -#define EX1(x,y,a,b) x,y -#define EX2(x,y,a,b) x,y -#define EXC(x,y,a,b,c...) x,y -#define EXO1(x,y) x,y -#define EXT(a,b,c) -#endif - -#define ZERO_BLOCKS(base, offset, source) \ - STX source, [base - offset - 0x38] ASINORMAL; \ - STX source, [base - offset - 0x30] ASINORMAL; \ - STX source, [base - offset - 0x28] ASINORMAL; \ - STX source, [base - offset - 0x20] ASINORMAL; \ - STX source, [base - offset - 0x18] ASINORMAL; \ - STX source, [base - offset - 0x10] ASINORMAL; \ - STX source, [base - offset - 0x08] ASINORMAL; \ - STX source, [base - offset - 0x00] ASINORMAL; - -#ifdef __KERNEL__ -#define RETL clr %o0 -#else -#define RETL mov %g3, %o0 -#endif - - /* Well, bzero is a lot easier to get right than bcopy... */ -#ifdef __KERNEL__ - .section __ex_table,#alloc - .section .fixup,#alloc,#execinstr -#endif - .text - .align 32 -#ifdef __KERNEL__ - .globl __bzero, __bzero_noasi -__bzero_noasi: - rd %asi, %g5 - ba,pt %xcc, __bzero+12 - mov %g5, %o4 -__bzero: - rd %asi, %g5 - wr %g0, ASI_P, %asi ! LSU Group - mov ASI_P, %o4 -#else - .globl bzero -bzero_private: -bzero: -#ifndef REGS_64BIT - srl %o1, 0, %o1 -#endif - mov %o0, %g3 -#endif - cmp %o1, 7 - bleu,pn %xcc, 17f - andcc %o0, 3, %o2 - be,a,pt %xcc, 4f - andcc %o0, 4, %g0 - cmp %o2, 3 - be,pn %xcc, 2f - EXO1(STB %g0, [%o0 + 0x00] ASINORMAL) - cmp %o2, 2 - be,pt %xcc, 2f - EX(STB %g0, [%o0 + 0x01] ASINORMAL, sub %o1, 1) - EX(STB %g0, [%o0 + 0x02] ASINORMAL, sub %o1, 2) -2: sub %o2, 4, %o2 - sub %o0, %o2, %o0 - add %o1, %o2, %o1 - andcc %o0, 4, %g0 -4: be,pt %xcc, 2f - cmp %o1, 128 - EXO1(STW %g0, [%o0] ASINORMAL) - sub %o1, 4, %o1 - add %o0, 4, %o0 -2: blu,pn %xcc, 9f - andcc %o0, 0x38, %o2 - be,pn %icc, 6f - mov 64, %o5 - andcc %o0, 8, %g0 - be,pn %icc, 1f - sub %o5, %o2, %o5 - EX(STX %g0, [%o0] ASINORMAL, sub %o1, 0) - add %o0, 8, %o0 -1: andcc %o5, 16, %g0 - be,pn %icc, 1f - sub %o1, %o5, %o1 - EX1(STX %g0, [%o0] ASINORMAL, add %g0, 0) - EX1(STX %g0, [%o0 + 8] ASINORMAL, sub %g0, 8) - add %o0, 16, %o0 -1: andcc %o5, 32, %g0 - be,pn %icc, 7f - andncc %o1, 0x3f, %o3 - EX(STX %g0, [%o0] ASINORMAL, add %o1, 32) - EX(STX %g0, [%o0 + 8] ASINORMAL, add %o1, 24) - EX(STX %g0, [%o0 + 16] ASINORMAL, add %o1, 16) - EX(STX %g0, [%o0 + 24] ASINORMAL, add %o1, 8) - add %o0, 32, %o0 -6: andncc %o1, 0x3f, %o3 -7: be,pn %xcc, 9f -#ifdef __KERNEL__ - or %o4, ASI_BLK_OR, %g7 - wr %g7, %g0, %asi - VISEntryHalf -#else - wr %g0, ASI_BLK_P, %asi -#endif - membar #StoreLoad | #StoreStore | #LoadStore - fzero %f0 - andcc %o3, 0xc0, %o2 - and %o1, 0x3f, %o1 - fzero %f2 - andn %o3, 0xff, %o3 - faddd %f0, %f2, %f4 - fmuld %f0, %f2, %f6 - cmp %o2, 64 - faddd %f0, %f2, %f8 - fmuld %f0, %f2, %f10 - faddd %f0, %f2, %f12 - brz,pn %o2, 10f - fmuld %f0, %f2, %f14 - be,pn %icc, 2f - EXC(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o2, add %o2, %o1, %o2) - cmp %o2, 128 - be,pn %icc, 2f - EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 64, %o2) - EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 128, %o2) -2: brz,pn %o3, 12f - add %o0, %o2, %o0 -10: EX(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o1) - EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o1, sub %o1, 64, %o1) - EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o1, sub %o1, 128, %o1) - EXC(STBLK %f0, [%o0 + 0xc0] ASIBLK, add %o3, %o1, sub %o1, 192, %o1) -11: subcc %o3, 256, %o3 - bne,pt %xcc, 10b - add %o0, 256, %o0 -12: -#ifdef __KERNEL__ - VISExitHalf - wr %o4, 0x0, %asi -#else -#ifndef REGS_64BIT - wr %g0, FPRS_FEF, %fprs -#endif -#endif - membar #StoreLoad | #StoreStore -9: andcc %o1, 0xf8, %o2 - be,pn %xcc, 13f - andcc %o1, 7, %o1 -#ifdef __KERNEL__ -14: sethi %hi(13f), %o4 - srl %o2, 1, %o3 - sub %o4, %o3, %o4 - jmpl %o4 + %lo(13f), %g0 - add %o0, %o2, %o0 -#else -14: rd %pc, %o4 - srl %o2, 1, %o3 - sub %o4, %o3, %o4 - jmpl %o4 + (13f - 14b), %g0 - add %o0, %o2, %o0 -#endif -12: ZERO_BLOCKS(%o0, 0xc8, %g0) - ZERO_BLOCKS(%o0, 0x88, %g0) - ZERO_BLOCKS(%o0, 0x48, %g0) - ZERO_BLOCKS(%o0, 0x08, %g0) - EXT(12b,13f,VISbzerofixup_zb) -13: be,pn %xcc, 8f - andcc %o1, 4, %g0 - be,pn %xcc, 1f - andcc %o1, 2, %g0 - EX(STW %g0, [%o0] ASINORMAL, and %o1, 7) - add %o0, 4, %o0 -1: be,pn %xcc, 1f - andcc %o1, 1, %g0 - EX(STH %g0, [%o0] ASINORMAL, and %o1, 3) - add %o0, 2, %o0 -1: bne,a,pn %xcc, 8f - EX(STB %g0, [%o0] ASINORMAL, add %g0, 1) -8: -#ifdef __KERNEL__ - wr %g5, %g0, %asi -#endif - retl - RETL -17: be,pn %xcc, 13b - orcc %o1, 0, %g0 - be,pn %xcc, 0f -8: add %o0, 1, %o0 - subcc %o1, 1, %o1 - bne,pt %xcc, 8b - EX(STB %g0, [%o0 - 1] ASINORMAL, add %o1, 1) -0: -#ifdef __KERNEL__ - wr %g5, %g0, %asi -#endif - retl - RETL - -#ifdef __KERNEL__ - .section .fixup - .align 4 -VISbzerofixup_reto1: - mov %o1, %o0 -VISbzerofixup_ret0: - wr %g5, %g0, %asi - retl - wr %g0, 0, %fprs -VISbzerofixup_ret1: - and %o5, 0x30, %o5 - add %o5, %o1, %o5 - ba,pt %xcc, VISbzerofixup_ret0 - add %o0, %o5, %o0 -VISbzerofixup_ret2: - and %o5, 0x20, %o5 - add %o5, %o1, %o5 - ba,pt %xcc, VISbzerofixup_ret0 - add %o0, %o5, %o0 -VISbzerofixup_zb: - andcc %o1, 7, %o1 - sll %g2, 3, %g2 - add %o1, 256, %o1 - ba,pt %xcc, VISbzerofixup_ret0 - sub %o1, %g2, %o0 -#endif diff --git a/arch/sparc64/lib/VISmemset.S b/arch/sparc64/lib/VISmemset.S deleted file mode 100644 index 152723a490141..0000000000000 --- a/arch/sparc64/lib/VISmemset.S +++ /dev/null @@ -1,240 +0,0 @@ -/* $Id: VISmemset.S,v 1.10 1999/12/23 17:02:16 jj Exp $ - * VISmemset.S: High speed memset operations utilizing the UltraSparc - * Visual Instruction Set. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jakub@redhat.com) - */ - -#include "VIS.h" - -#ifdef REGS_64BIT -#define SET_BLOCKS(base, offset, source) \ - stx source, [base - offset - 0x18]; \ - stx source, [base - offset - 0x10]; \ - stx source, [base - offset - 0x08]; \ - stx source, [base - offset - 0x00]; -#else -#define SET_BLOCKS(base, offset, source) \ - stw source, [base - offset - 0x18]; \ - stw source, [base - offset - 0x14]; \ - stw source, [base - offset - 0x10]; \ - stw source, [base - offset - 0x0c]; \ - stw source, [base - offset - 0x08]; \ - stw source, [base - offset - 0x04]; \ - stw source, [base - offset - 0x00]; \ - stw source, [base - offset + 0x04]; -#endif - -#ifndef __KERNEL__ -/* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */ -#include "VISbzero.S" -#endif - -#ifdef __KERNEL__ -#include <asm/visasm.h> -#endif - - /* Well, memset is a lot easier to get right than bcopy... */ - .text - .align 32 -#ifdef __KERNEL__ - .globl __memset -__memset: -#endif - .globl memset -memset: -#ifndef __KERNEL__ - brz,a,pt %o1, bzero_private - mov %o2, %o1 -#ifndef REGS_64BIT - srl %o2, 0, %o2 -#endif -#endif - mov %o0, %o4 - cmp %o2, 7 - bleu,pn %xcc, 17f - andcc %o0, 3, %g5 - be,pt %xcc, 4f - and %o1, 0xff, %o1 - cmp %g5, 3 - be,pn %xcc, 2f - stb %o1, [%o0 + 0x00] - cmp %g5, 2 - be,pt %xcc, 2f - stb %o1, [%o0 + 0x01] - stb %o1, [%o0 + 0x02] -2: sub %g5, 4, %g5 - sub %o0, %g5, %o0 - add %o2, %g5, %o2 -4: sllx %o1, 8, %g1 - andcc %o0, 4, %g0 - or %o1, %g1, %o1 - sllx %o1, 16, %g1 - or %o1, %g1, %o1 - be,pt %xcc, 2f -#ifdef REGS_64BIT - sllx %o1, 32, %g1 -#else - cmp %o2, 128 -#endif - stw %o1, [%o0] - sub %o2, 4, %o2 - add %o0, 4, %o0 -2: -#ifdef REGS_64BIT - cmp %o2, 128 - or %o1, %g1, %o1 -#endif - blu,pn %xcc, 9f - andcc %o0, 0x38, %g5 - be,pn %icc, 6f - mov 64, %o5 - andcc %o0, 8, %g0 - be,pn %icc, 1f - sub %o5, %g5, %o5 -#ifdef REGS_64BIT - stx %o1, [%o0] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] -#endif - add %o0, 8, %o0 -1: andcc %o5, 16, %g0 - be,pn %icc, 1f - sub %o2, %o5, %o2 -#ifdef REGS_64BIT - stx %o1, [%o0] - stx %o1, [%o0 + 8] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] - stw %o1, [%o0 + 8] - stw %o1, [%o0 + 12] -#endif - add %o0, 16, %o0 -1: andcc %o5, 32, %g0 - be,pn %icc, 7f - andncc %o2, 0x3f, %o3 -#ifdef REGS_64BIT - stx %o1, [%o0] - stx %o1, [%o0 + 8] - stx %o1, [%o0 + 16] - stx %o1, [%o0 + 24] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] - stw %o1, [%o0 + 8] - stw %o1, [%o0 + 12] - stw %o1, [%o0 + 16] - stw %o1, [%o0 + 20] - stw %o1, [%o0 + 24] - stw %o1, [%o0 + 28] -#endif - add %o0, 32, %o0 -7: be,pn %xcc, 9f - nop -#ifdef __KERNEL__ - VISEntryHalf -#endif - ldd [%o0 - 8], %f0 -18: rd %asi, %g2 - wr %g0, ASI_BLK_P, %asi - membar #StoreStore | #LoadStore - andcc %o3, 0xc0, %g5 - and %o2, 0x3f, %o2 - fmovd %f0, %f2 - fmovd %f0, %f4 - andn %o3, 0xff, %o3 - fmovd %f0, %f6 - cmp %g5, 64 - fmovd %f0, %f8 - fmovd %f0, %f10 - fmovd %f0, %f12 - brz,pn %g5, 10f - fmovd %f0, %f14 - be,pn %icc, 2f - stda %f0, [%o0 + 0x00] %asi - cmp %g5, 128 - be,pn %icc, 2f - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi -2: brz,pn %o3, 12f - add %o0, %g5, %o0 -10: stda %f0, [%o0 + 0x00] %asi - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi - stda %f0, [%o0 + 0xc0] %asi -11: subcc %o3, 256, %o3 - bne,pt %xcc, 10b - add %o0, 256, %o0 -12: -#ifdef __KERNEL__ - wr %g2, %g0, %asi - VISExitHalf -#else -#ifndef REGS_64BIT - wr %g0, FPRS_FEF, %fprs -#endif -#endif - membar #StoreLoad | #StoreStore -9: andcc %o2, 0x78, %g5 - be,pn %xcc, 13f - andcc %o2, 7, %o2 -#ifdef __KERNEL__ -14: srl %g5, 1, %o3 - sethi %hi(13f), %g3 - sub %g3, %o3, %g3 - jmpl %g3 + %lo(13f), %g0 - add %o0, %g5, %o0 -#else -14: rd %pc, %g3 -#ifdef REGS_64BIT - srl %g5, 1, %o3 - sub %g3, %o3, %g3 -#else - sub %g3, %g5, %g3 -#endif - jmpl %g3 + (13f - 14b), %g0 - add %o0, %g5, %o0 -#endif -12: SET_BLOCKS(%o0, 0x68, %o1) - SET_BLOCKS(%o0, 0x48, %o1) - SET_BLOCKS(%o0, 0x28, %o1) - SET_BLOCKS(%o0, 0x08, %o1) -13: be,pn %xcc, 8f - andcc %o2, 4, %g0 - be,pn %xcc, 1f - andcc %o2, 2, %g0 - stw %o1, [%o0] - add %o0, 4, %o0 -1: be,pn %xcc, 1f - andcc %o2, 1, %g0 - sth %o1, [%o0] - add %o0, 2, %o0 -1: bne,a,pn %xcc, 8f - stb %o1, [%o0] -8: retl - mov %o4, %o0 -17: brz,pn %o2, 0f -8: add %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %xcc, 8b - stb %o1, [%o0 - 1] -0: retl - mov %o4, %o0 -6: -#ifdef REGS_64BIT - stx %o1, [%o0] -#else - stw %o1, [%o0] - stw %o1, [%o0 + 4] -#endif - andncc %o2, 0x3f, %o3 - be,pn %xcc, 9b - nop -#ifdef __KERNEL__ - VISEntryHalf -#endif - ba,pt %xcc, 18b - ldd [%o0], %f0 diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index 41be4131f8008..e528b8d1a3e69 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S @@ -29,10 +29,10 @@ .globl atomic_add .type atomic_add,#function atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ -1: lduw [%o1], %g5 - add %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + add %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b nop retl @@ -42,10 +42,10 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ .globl atomic_sub .type atomic_sub,#function atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ -1: lduw [%o1], %g5 - sub %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + sub %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b nop retl @@ -56,10 +56,10 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ .type atomic_add_ret,#function atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: lduw [%o1], %g5 - add %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + add %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b add %g7, %o0, %g7 ATOMIC_POST_BARRIER @@ -71,10 +71,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ .type atomic_sub_ret,#function atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: lduw [%o1], %g5 - sub %g5, %o0, %g7 - cas [%o1], %g5, %g7 - cmp %g5, %g7 +1: lduw [%o1], %g1 + sub %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %icc, 1b sub %g7, %o0, %g7 ATOMIC_POST_BARRIER @@ -85,10 +85,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ .globl atomic64_add .type atomic64_add,#function atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ -1: ldx [%o1], %g5 - add %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + add %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b nop retl @@ -98,10 +98,10 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ .globl atomic64_sub .type atomic64_sub,#function atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ -1: ldx [%o1], %g5 - sub %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + sub %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b nop retl @@ -112,10 +112,10 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ .type atomic64_add_ret,#function atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: ldx [%o1], %g5 - add %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + add %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b add %g7, %o0, %g7 ATOMIC_POST_BARRIER @@ -127,10 +127,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ .type atomic64_sub_ret,#function atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ ATOMIC_PRE_BARRIER -1: ldx [%o1], %g5 - sub %g5, %o0, %g7 - casx [%o1], %g5, %g7 - cmp %g5, %g7 +1: ldx [%o1], %g1 + sub %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 bne,pn %xcc, 1b sub %g7, %o0, %g7 ATOMIC_POST_BARRIER diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S new file mode 100644 index 0000000000000..21a933ffb7c29 --- /dev/null +++ b/arch/sparc64/lib/bzero.S @@ -0,0 +1,158 @@ +/* bzero.S: Simple prefetching memset, bzero, and clear_user + * implementations. + * + * Copyright (C) 2005 David S. Miller <davem@davemloft.net> + */ + + .text + + .globl __memset + .type __memset, #function +__memset: /* %o0=buf, %o1=pat, %o2=len */ + + .globl memset + .type memset, #function +memset: /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %xcc, 1f + or %g1, %o2, %o2 + + .globl __bzero + .type __bzero, #function +__bzero: /* %o0=buf, %o1=len */ + clr %o2 +1: mov %o0, %o3 + brz,pn %o1, __bzero_done + cmp %o1, 16 + bl,pn %icc, __bzero_tiny + prefetch [%o0 + 0x000], #n_writes + andcc %o0, 0x3, %g0 + be,pt %icc, 2f +1: stb %o2, [%o0 + 0x00] + add %o0, 1, %o0 + andcc %o0, 0x3, %g0 + bne,pn %icc, 1b + sub %o1, 1, %o1 +2: andcc %o0, 0x7, %g0 + be,pt %icc, 3f + stw %o2, [%o0 + 0x00] + sub %o1, 4, %o1 + add %o0, 4, %o0 +3: and %o1, 0x38, %g1 + cmp %o1, 0x40 + andn %o1, 0x3f, %o4 + bl,pn %icc, 5f + and %o1, 0x7, %o1 + prefetch [%o0 + 0x040], #n_writes + prefetch [%o0 + 0x080], #n_writes + prefetch [%o0 + 0x0c0], #n_writes + prefetch [%o0 + 0x100], #n_writes + prefetch [%o0 + 0x140], #n_writes +4: prefetch [%o0 + 0x180], #n_writes + stx %o2, [%o0 + 0x00] + stx %o2, [%o0 + 0x08] + stx %o2, [%o0 + 0x10] + stx %o2, [%o0 + 0x18] + stx %o2, [%o0 + 0x20] + stx %o2, [%o0 + 0x28] + stx %o2, [%o0 + 0x30] + stx %o2, [%o0 + 0x38] + subcc %o4, 0x40, %o4 + bne,pt %icc, 4b + add %o0, 0x40, %o0 + brz,pn %g1, 6f + nop +5: stx %o2, [%o0 + 0x00] + subcc %g1, 8, %g1 + bne,pt %icc, 5b + add %o0, 0x8, %o0 +6: brz,pt %o1, __bzero_done + nop +__bzero_tiny: +1: stb %o2, [%o0 + 0x00] + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 +__bzero_done: + retl + mov %o3, %o0 + .size __bzero, .-__bzero + .size __memset, .-__memset + .size memset, .-memset + +#define EX_ST(x,y) \ +98: x,y; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov %o1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + + .globl __bzero_noasi + .type __bzero_noasi, #function +__bzero_noasi: /* %o0=buf, %o1=len */ + brz,pn %o1, __bzero_noasi_done + cmp %o1, 16 + bl,pn %icc, __bzero_noasi_tiny + EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) + andcc %o0, 0x3, %g0 + be,pt %icc, 2f +1: EX_ST(stba %g0, [%o0 + 0x00] %asi) + add %o0, 1, %o0 + andcc %o0, 0x3, %g0 + bne,pn %icc, 1b + sub %o1, 1, %o1 +2: andcc %o0, 0x7, %g0 + be,pt %icc, 3f + EX_ST(stwa %g0, [%o0 + 0x00] %asi) + sub %o1, 4, %o1 + add %o0, 4, %o0 +3: and %o1, 0x38, %g1 + cmp %o1, 0x40 + andn %o1, 0x3f, %o4 + bl,pn %icc, 5f + and %o1, 0x7, %o1 + EX_ST(prefetcha [%o0 + 0x040] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x080] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x0c0] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x100] %asi, #n_writes) + EX_ST(prefetcha [%o0 + 0x140] %asi, #n_writes) +4: EX_ST(prefetcha [%o0 + 0x180] %asi, #n_writes) + EX_ST(stxa %g0, [%o0 + 0x00] %asi) + EX_ST(stxa %g0, [%o0 + 0x08] %asi) + EX_ST(stxa %g0, [%o0 + 0x10] %asi) + EX_ST(stxa %g0, [%o0 + 0x18] %asi) + EX_ST(stxa %g0, [%o0 + 0x20] %asi) + EX_ST(stxa %g0, [%o0 + 0x28] %asi) + EX_ST(stxa %g0, [%o0 + 0x30] %asi) + EX_ST(stxa %g0, [%o0 + 0x38] %asi) + subcc %o4, 0x40, %o4 + bne,pt %icc, 4b + add %o0, 0x40, %o0 + brz,pn %g1, 6f + nop +5: EX_ST(stxa %g0, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %icc, 5b + add %o0, 0x8, %o0 +6: brz,pt %o1, __bzero_noasi_done + nop +__bzero_noasi_tiny: +1: EX_ST(stba %g0, [%o0 + 0x00] %asi) + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 +__bzero_noasi_done: + retl + clr %o0 + .size __bzero_noasi, .-__bzero_noasi diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index 46e5ebfb4b7ce..c421e0c653253 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c @@ -138,15 +138,15 @@ wlock_again: } /* Try once to increment the counter. */ __asm__ __volatile__( -" ldx [%0], %%g5\n" -" brlz,a,pn %%g5, 2f\n" +" ldx [%0], %%g1\n" +" brlz,a,pn %%g1, 2f\n" " mov 1, %0\n" -" add %%g5, 1, %%g7\n" -" casx [%0], %%g5, %%g7\n" -" sub %%g5, %%g7, %0\n" +" add %%g1, 1, %%g7\n" +" casx [%0], %%g1, %%g7\n" +" sub %%g1, %%g7, %0\n" "2:" : "=r" (val) : "0" (&(rw->lock)) - : "g5", "g7", "memory"); + : "g1", "g7", "memory"); membar("#StoreLoad | #StoreStore"); if (val) goto wlock_again; @@ -173,14 +173,14 @@ runlock_again: /* Spin trying to decrement the counter using casx. */ __asm__ __volatile__( " membar #StoreLoad | #LoadLoad\n" -" ldx [%0], %%g5\n" -" sub %%g5, 1, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" sub %%g1, 1, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" : "=r" (val) : "0" (&(rw->lock)) - : "g5", "g7", "memory"); + : "g1", "g7", "memory"); if (val) { if (!--stuck) { if (shown++ <= 2) @@ -216,17 +216,17 @@ wlock_again: __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -" ldx [%0], %%g5\n" -" brlz,pn %%g5, 1f\n" -" or %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" brlz,pn %%g1, 1f\n" +" or %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" " ba,pt %%xcc, 2f\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" "1: mov 1, %0\n" "2:" : "=r" (val) : "0" (&(rw->lock)) - : "g3", "g5", "g7", "memory"); + : "g3", "g1", "g7", "memory"); if (val) { /* We couldn't get the write bit. */ if (!--stuck) { @@ -248,15 +248,15 @@ wlock_again: __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -"1: ldx [%0], %%g5\n" -" andn %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: ldx [%0], %%g1\n" +" andn %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%xcc, 1b\n" " membar #StoreLoad | #StoreStore" : /* no outputs */ : "r" (&(rw->lock)) - : "g3", "g5", "g7", "cc", "memory"); + : "g3", "g1", "g7", "cc", "memory"); while(rw->lock != 0) { if (!--stuck) { if (shown++ <= 2) @@ -294,14 +294,14 @@ wlock_again: " membar #StoreLoad | #LoadLoad\n" " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -" ldx [%0], %%g5\n" -" andn %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" andn %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" : "=r" (val) : "0" (&(rw->lock)) - : "g3", "g5", "g7", "memory"); + : "g3", "g1", "g7", "memory"); if (val) { if (!--stuck) { if (shown++ <= 2) @@ -323,17 +323,17 @@ int _do_write_trylock (rwlock_t *rw, char *str) __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -" ldx [%0], %%g5\n" -" brlz,pn %%g5, 1f\n" -" or %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" +" ldx [%0], %%g1\n" +" brlz,pn %%g1, 1f\n" +" or %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" " membar #StoreLoad | #StoreStore\n" " ba,pt %%xcc, 2f\n" -" sub %%g5, %%g7, %0\n" +" sub %%g1, %%g7, %0\n" "1: mov 1, %0\n" "2:" : "=r" (val) : "0" (&(rw->lock)) - : "g3", "g5", "g7", "memory"); + : "g3", "g1", "g7", "memory"); if (val) { put_cpu(); @@ -347,15 +347,15 @@ int _do_write_trylock (rwlock_t *rw, char *str) __asm__ __volatile__( " mov 1, %%g3\n" " sllx %%g3, 63, %%g3\n" -"1: ldx [%0], %%g5\n" -" andn %%g5, %%g3, %%g7\n" -" casx [%0], %%g5, %%g7\n" -" cmp %%g5, %%g7\n" +"1: ldx [%0], %%g1\n" +" andn %%g1, %%g3, %%g7\n" +" casx [%0], %%g1, %%g7\n" +" cmp %%g1, %%g7\n" " bne,pn %%xcc, 1b\n" " membar #StoreLoad | #StoreStore" : /* no outputs */ : "r" (&(rw->lock)) - : "g3", "g5", "g7", "cc", "memory"); + : "g3", "g1", "g7", "cc", "memory"); put_cpu(); diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index e86906744cf6f..7e6fdaebedbab 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S @@ -27,12 +27,12 @@ .globl _atomic_dec_and_lock _atomic_dec_and_lock: /* %o0 = counter, %o1 = lock */ -loop1: lduw [%o0], %g5 - subcc %g5, 1, %g7 +loop1: lduw [%o0], %g2 + subcc %g2, 1, %g7 be,pn %icc, start_to_zero nop -nzero: cas [%o0], %g5, %g7 - cmp %g5, %g7 +nzero: cas [%o0], %g2, %g7 + cmp %g2, %g7 bne,pn %icc, loop1 mov 0, %g1 @@ -50,13 +50,13 @@ to_zero: ldstub [%o1], %g3 brnz,pn %g3, spin_on_lock membar #StoreLoad | #StoreStore -loop2: cas [%o0], %g5, %g7 /* ASSERT(g7 == 0) */ - cmp %g5, %g7 +loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ + cmp %g2, %g7 be,pt %icc, out mov 1, %g1 - lduw [%o0], %g5 - subcc %g5, 1, %g7 + lduw [%o0], %g2 + subcc %g2, 1, %g7 be,pn %icc, loop2 nop membar #StoreStore | #LoadStore diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S index 4e8c7928c49f1..2ef2e268bdcfd 100644 --- a/arch/sparc64/lib/mcount.S +++ b/arch/sparc64/lib/mcount.S @@ -38,22 +38,22 @@ _mcount: * Check whether %sp is dangerously low. */ ldub [%g6 + TI_FPDEPTH], %g1 - srl %g1, 1, %g5 - add %g5, 1, %g5 - sllx %g5, 8, %g5 ! each fpregs frame is 256b - add %g5, 192, %g5 - add %g6, %g5, %g5 ! where does task_struct+frame end? - sub %g5, STACK_BIAS, %g5 - cmp %sp, %g5 + srl %g1, 1, %g3 + add %g3, 1, %g3 + sllx %g3, 8, %g3 ! each fpregs frame is 256b + add %g3, 192, %g3 + add %g6, %g3, %g3 ! where does task_struct+frame end? + sub %g3, STACK_BIAS, %g3 + cmp %sp, %g3 bg,pt %xcc, 1f - sethi %hi(panicstring), %g5 + sethi %hi(panicstring), %g3 sethi %hi(ovstack), %g7 ! cant move to panic stack fast enough or %g7, %lo(ovstack), %g7 add %g7, OVSTACKSIZE, %g7 sub %g7, STACK_BIAS, %g7 mov %g7, %sp call prom_printf - or %g5, %lo(panicstring), %o0 + or %g3, %lo(panicstring), %o0 call prom_halt nop #endif diff --git a/arch/sparc64/lib/memcmp.S b/arch/sparc64/lib/memcmp.S index d34dc3d874dae..c90ad96c51b9c 100644 --- a/arch/sparc64/lib/memcmp.S +++ b/arch/sparc64/lib/memcmp.S @@ -13,12 +13,12 @@ memcmp: cmp %o2, 0 ! IEU1 Group loop: be,pn %icc, ret_0 ! CTI nop ! IEU0 - ldub [%o0], %g5 ! LSU Group + ldub [%o0], %g7 ! LSU Group ldub [%o1], %g3 ! LSU Group sub %o2, 1, %o2 ! IEU0 add %o0, 1, %o0 ! IEU1 add %o1, 1, %o1 ! IEU0 Group - subcc %g5, %g3, %g3 ! IEU1 Group + subcc %g7, %g3, %g3 ! IEU1 Group be,pt %icc, loop ! CTI cmp %o2, 0 ! IEU1 Group diff --git a/arch/sparc64/lib/memmove.S b/arch/sparc64/lib/memmove.S index 1c1ebbbdf830e..97395802c23c4 100644 --- a/arch/sparc64/lib/memmove.S +++ b/arch/sparc64/lib/memmove.S @@ -12,17 +12,17 @@ memmove: /* o0=dst o1=src o2=len */ mov %o0, %g1 cmp %o0, %o1 bleu,pt %xcc, memcpy - add %o1, %o2, %g5 - cmp %g5, %o0 + add %o1, %o2, %g7 + cmp %g7, %o0 bleu,pt %xcc, memcpy add %o0, %o2, %o5 - sub %g5, 1, %o1 + sub %g7, 1, %o1 sub %o5, 1, %o0 -1: ldub [%o1], %g5 +1: ldub [%o1], %g7 subcc %o2, 1, %o2 sub %o1, 1, %o1 - stb %g5, [%o0] + stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 diff --git a/arch/sparc64/lib/memscan.S b/arch/sparc64/lib/memscan.S index a34c6b9d21e85..5e72d49114179 100644 --- a/arch/sparc64/lib/memscan.S +++ b/arch/sparc64/lib/memscan.S @@ -52,43 +52,43 @@ check_bytes: andcc %o5, 0xff, %g0 add %o0, -5, %g2 ba,pt %xcc, 3f - srlx %o5, 32, %g5 + srlx %o5, 32, %g7 -2: srlx %o5, 8, %g5 +2: srlx %o5, 8, %g7 be,pn %icc, 1f add %o0, -8, %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 + andcc %g7, 0xff, %g0 - srlx %g5, 8, %g5 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 andcc %g3, %o3, %g0 be,a,pn %icc, 2f mov %o0, %g2 -3: andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 +3: andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 - andcc %g5, 0xff, %g0 - srlx %g5, 8, %g5 + andcc %g7, 0xff, %g0 + srlx %g7, 8, %g7 be,pn %icc, 1f inc %g2 diff --git a/arch/sparc64/lib/strlen.S b/arch/sparc64/lib/strlen.S index 066ec1ed7d0dd..e9ba1920d818e 100644 --- a/arch/sparc64/lib/strlen.S +++ b/arch/sparc64/lib/strlen.S @@ -48,16 +48,16 @@ strlen: add %o0, 4, %o0 /* Check every byte. */ - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 24, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 16, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 8, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 andcc %o5, 0xff, %g0 diff --git a/arch/sparc64/lib/strlen_user.S b/arch/sparc64/lib/strlen_user.S index 4af69a0adfbcc..9ed54ba14fc63 100644 --- a/arch/sparc64/lib/strlen_user.S +++ b/arch/sparc64/lib/strlen_user.S @@ -54,16 +54,16 @@ __strnlen_user: ba,a,pt %xcc, 1f /* Check every byte. */ -82: srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 +82: srl %o5, 24, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o0, -3, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 16, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 + srl %o5, 8, %g7 + andcc %g7, 0xff, %g0 be,pn %icc, 1f add %o4, 1, %o4 andcc %o5, 0xff, %g0 diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S index f748fd6bbc389..4cd5d2be1ae1f 100644 --- a/arch/sparc64/lib/xor.S +++ b/arch/sparc64/lib/xor.S @@ -248,7 +248,7 @@ xor_vis_4: .globl xor_vis_5 .type xor_vis_5,#function xor_vis_5: - mov %o5, %g5 + save %sp, -192, %sp rd %fprs, %o5 andcc %o5, FPRS_FEF|FPRS_DU, %g0 be,pt %icc, 0f @@ -256,61 +256,60 @@ xor_vis_5: jmpl %g1 + %lo(VISenter), %g7 add %g7, 8, %g7 0: wr %g0, FPRS_FEF, %fprs - mov %g5, %o5 rd %asi, %g1 wr %g0, ASI_BLK_P, %asi membar #LoadStore|#StoreLoad|#StoreStore - sub %o0, 64, %o0 - ldda [%o1] %asi, %f0 - ldda [%o2] %asi, %f16 + sub %i0, 64, %i0 + ldda [%i1] %asi, %f0 + ldda [%i2] %asi, %f16 -5: ldda [%o3] %asi, %f32 +5: ldda [%i3] %asi, %f32 fxor %f0, %f16, %f48 fxor %f2, %f18, %f50 - add %o1, 64, %o1 + add %i1, 64, %i1 fxor %f4, %f20, %f52 fxor %f6, %f22, %f54 - add %o2, 64, %o2 + add %i2, 64, %i2 fxor %f8, %f24, %f56 fxor %f10, %f26, %f58 fxor %f12, %f28, %f60 fxor %f14, %f30, %f62 - ldda [%o4] %asi, %f16 + ldda [%i4] %asi, %f16 fxor %f48, %f32, %f48 fxor %f50, %f34, %f50 fxor %f52, %f36, %f52 fxor %f54, %f38, %f54 - add %o3, 64, %o3 + add %i3, 64, %i3 fxor %f56, %f40, %f56 fxor %f58, %f42, %f58 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - ldda [%o5] %asi, %f32 + ldda [%i5] %asi, %f32 fxor %f48, %f16, %f48 fxor %f50, %f18, %f50 - add %o4, 64, %o4 + add %i4, 64, %i4 fxor %f52, %f20, %f52 fxor %f54, %f22, %f54 - add %o5, 64, %o5 + add %i5, 64, %i5 fxor %f56, %f24, %f56 fxor %f58, %f26, %f58 fxor %f60, %f28, %f60 fxor %f62, %f30, %f62 - ldda [%o1] %asi, %f0 + ldda [%i1] %asi, %f0 fxor %f48, %f32, %f48 fxor %f50, %f34, %f50 fxor %f52, %f36, %f52 fxor %f54, %f38, %f54 fxor %f56, %f40, %f56 fxor %f58, %f42, %f58 - subcc %o0, 64, %o0 + subcc %i0, 64, %i0 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - stda %f48, [%o1 - 64] %asi + stda %f48, [%i1 - 64] %asi bne,pt %xcc, 5b - ldda [%o2] %asi, %f16 + ldda [%i2] %asi, %f16 - ldda [%o3] %asi, %f32 + ldda [%i3] %asi, %f32 fxor %f0, %f16, %f48 fxor %f2, %f18, %f50 fxor %f4, %f20, %f52 @@ -319,7 +318,7 @@ xor_vis_5: fxor %f10, %f26, %f58 fxor %f12, %f28, %f60 fxor %f14, %f30, %f62 - ldda [%o4] %asi, %f16 + ldda [%i4] %asi, %f16 fxor %f48, %f32, %f48 fxor %f50, %f34, %f50 fxor %f52, %f36, %f52 @@ -328,7 +327,7 @@ xor_vis_5: fxor %f58, %f42, %f58 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - ldda [%o5] %asi, %f32 + ldda [%i5] %asi, %f32 fxor %f48, %f16, %f48 fxor %f50, %f18, %f50 fxor %f52, %f20, %f52 @@ -346,9 +345,10 @@ xor_vis_5: fxor %f58, %f42, %f58 fxor %f60, %f44, %f60 fxor %f62, %f46, %f62 - stda %f48, [%o1] %asi + stda %f48, [%i1] %asi membar #Sync|#StoreStore|#StoreLoad wr %g1, %g0, %asi - retl - wr %g0, 0, %fprs + wr %g0, 0, %fprs + ret + restore .size xor_vis_5, .-xor_vis_5 diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 4c97fbe0bb7b7..89022ccaa75bb 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -132,15 +132,15 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) mask = (mask << 24) | (1UL << PG_dcache_dirty); __asm__ __volatile__("1:\n\t" "ldx [%2], %%g7\n\t" - "and %%g7, %1, %%g5\n\t" - "or %%g5, %0, %%g5\n\t" - "casx [%2], %%g7, %%g5\n\t" - "cmp %%g7, %%g5\n\t" + "and %%g7, %1, %%g1\n\t" + "or %%g1, %0, %%g1\n\t" + "casx [%2], %%g7, %%g1\n\t" + "cmp %%g7, %%g1\n\t" "bne,pn %%xcc, 1b\n\t" " membar #StoreLoad | #StoreStore" : /* no outputs */ : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) - : "g5", "g7"); + : "g1", "g7"); } static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu) @@ -150,20 +150,20 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c __asm__ __volatile__("! test_and_clear_dcache_dirty\n" "1:\n\t" "ldx [%2], %%g7\n\t" - "srlx %%g7, 24, %%g5\n\t" - "and %%g5, %3, %%g5\n\t" - "cmp %%g5, %0\n\t" + "srlx %%g7, 24, %%g1\n\t" + "and %%g1, %3, %%g1\n\t" + "cmp %%g1, %0\n\t" "bne,pn %%icc, 2f\n\t" - " andn %%g7, %1, %%g5\n\t" - "casx [%2], %%g7, %%g5\n\t" - "cmp %%g7, %%g5\n\t" + " andn %%g7, %1, %%g1\n\t" + "casx [%2], %%g7, %%g1\n\t" + "cmp %%g7, %%g1\n\t" "bne,pn %%xcc, 1b\n\t" " membar #StoreLoad | #StoreStore\n" "2:" : /* no outputs */ : "r" (cpu), "r" (mask), "r" (&page->flags), "i" (NR_CPUS - 1UL) - : "g5", "g7"); + : "g1", "g7"); } extern void __update_mmu_cache(unsigned long mmu_context_hw, unsigned long address, pte_t pte, int code); diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index 3077046a268d5..3d1af3fc10a63 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -226,37 +226,41 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ } \ } while(0) -static __inline__ unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) +static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) { + unsigned long tmp1, tmp2; + __asm__ __volatile__( " membar #StoreLoad | #LoadLoad\n" -" mov %0, %%g5\n" -"1: lduw [%2], %%g7\n" -" cas [%2], %%g7, %0\n" -" cmp %%g7, %0\n" +" mov %0, %1\n" +"1: lduw [%4], %2\n" +" cas [%4], %2, %0\n" +" cmp %2, %0\n" " bne,a,pn %%icc, 1b\n" -" mov %%g5, %0\n" +" mov %1, %0\n" " membar #StoreLoad | #StoreStore\n" - : "=&r" (val) + : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2) : "0" (val), "r" (m) - : "g5", "g7", "cc", "memory"); + : "cc", "memory"); return val; } -static __inline__ unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val) +static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val) { + unsigned long tmp1, tmp2; + __asm__ __volatile__( " membar #StoreLoad | #LoadLoad\n" -" mov %0, %%g5\n" -"1: ldx [%2], %%g7\n" -" casx [%2], %%g7, %0\n" -" cmp %%g7, %0\n" +" mov %0, %1\n" +"1: ldx [%4], %2\n" +" casx [%4], %2, %0\n" +" cmp %2, %0\n" " bne,a,pn %%xcc, 1b\n" -" mov %%g5, %0\n" +" mov %1, %0\n" " membar #StoreLoad | #StoreStore\n" - : "=&r" (val) + : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2) : "0" (val), "r" (m) - : "g5", "g7", "cc", "memory"); + : "cc", "memory"); return val; } |