aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@cheetah.davemloft.net>2005-03-29 04:19:36 -0800
committerDavid S. Miller <davem@cheetah.davemloft.net>2005-03-29 04:19:36 -0800
commit748e598f95e6dbb5921eb90dd896b3699e370ee3 (patch)
treea68cec0b82aa704a21121aaf4955c9dc1281fba3
parent692eb311a94c3802511883fd5d2c916e585add40 (diff)
parent27649cce2886c85b367d2e2c98144c9a78526e66 (diff)
downloadhistory-748e598f95e6dbb5921eb90dd896b3699e370ee3.tar.gz
Merge davem@sunset:src/BK/sparc-2.6
into cheetah.davemloft.net:/home/davem/src/BK/sparc-2.6
-rw-r--r--arch/sparc64/kernel/entry.S63
-rw-r--r--arch/sparc64/lib/Makefile2
-rw-r--r--arch/sparc64/lib/U1memcpy.S36
-rw-r--r--arch/sparc64/lib/U3memcpy.S28
-rw-r--r--arch/sparc64/lib/VISbzero.S274
-rw-r--r--arch/sparc64/lib/VISmemset.S240
-rw-r--r--arch/sparc64/lib/atomic.S64
-rw-r--r--arch/sparc64/lib/bzero.S158
-rw-r--r--arch/sparc64/lib/debuglocks.c76
-rw-r--r--arch/sparc64/lib/dec_and_lock.S16
-rw-r--r--arch/sparc64/lib/mcount.S18
-rw-r--r--arch/sparc64/lib/memcmp.S4
-rw-r--r--arch/sparc64/lib/memmove.S10
-rw-r--r--arch/sparc64/lib/memscan.S32
-rw-r--r--arch/sparc64/lib/strlen.S12
-rw-r--r--arch/sparc64/lib/strlen_user.S12
-rw-r--r--arch/sparc64/lib/xor.S46
-rw-r--r--arch/sparc64/mm/init.c24
-rw-r--r--include/asm-sparc64/system.h36
19 files changed, 409 insertions, 742 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 20f1da3251b01..a47f2d0b1a29b 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -252,7 +252,7 @@ do_fpdis:
faddd %f0, %f2, %f4
fmuld %f0, %f2, %f6
ldxa [%g3] ASI_DMMU, %g5
-cplus_fpdis_insn_1:
+cplus_fptrap_insn_1:
sethi %hi(0), %g2
stxa %g2, [%g3] ASI_DMMU
membar #Sync
@@ -280,7 +280,9 @@ cplus_fpdis_insn_1:
fzero %f34
ldxa [%g3] ASI_DMMU, %g5
add %g6, TI_FPREGS, %g1
- stxa %g0, [%g3] ASI_DMMU
+cplus_fptrap_insn_2:
+ sethi %hi(0), %g2
+ stxa %g2, [%g3] ASI_DMMU
membar #Sync
add %g6, TI_FPREGS + 0x40, %g2
faddd %f32, %f34, %f36
@@ -304,9 +306,11 @@ cplus_fpdis_insn_1:
3: mov SECONDARY_CONTEXT, %g3
add %g6, TI_FPREGS, %g1
ldxa [%g3] ASI_DMMU, %g5
- mov 0x40, %g2
- stxa %g0, [%g3] ASI_DMMU
+cplus_fptrap_insn_3:
+ sethi %hi(0), %g2
+ stxa %g2, [%g3] ASI_DMMU
membar #Sync
+ mov 0x40, %g2
ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-(
ldda [%g1 + %g2] ASI_BLK_S, %f16
add %g1, 0x80, %g1
@@ -325,24 +329,6 @@ fpdis_exit2:
wr %g0, FPRS_FEF, %fprs ! clean DU/DL bits
retry
-cplus_fpdis_1:
- sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
-
- .globl cheetah_plus_patch_fpdis
-cheetah_plus_patch_fpdis:
- /* We configure the dTLB512_0 for 4MB pages and the
- * dTLB512_1 for 8K pages when in context zero.
- */
- sethi %hi(cplus_fpdis_1), %o0
- sethi %hi(cplus_fpdis_insn_1), %o2
- lduw [%o0 + %lo(cplus_fpdis_1)], %o1
- or %o2, %lo(cplus_fpdis_insn_1), %o2
- stw %o1, [%o2]
- flush %o2
-
- retl
- nop
-
.align 32
fp_other_bounce:
call do_fpother
@@ -485,10 +471,12 @@ do_fptrap_after_fsr:
rd %gsr, %g3
stx %g3, [%g6 + TI_GSR]
mov SECONDARY_CONTEXT, %g3
- add %g6, TI_FPREGS, %g2
ldxa [%g3] ASI_DMMU, %g5
- stxa %g0, [%g3] ASI_DMMU
+cplus_fptrap_insn_4:
+ sethi %hi(0), %g2
+ stxa %g2, [%g3] ASI_DMMU
membar #Sync
+ add %g6, TI_FPREGS, %g2
andcc %g1, FPRS_DL, %g0
be,pn %icc, 4f
mov 0x40, %g3
@@ -506,6 +494,33 @@ do_fptrap_after_fsr:
ba,pt %xcc, etrap
wr %g0, 0, %fprs
+cplus_fptrap_1:
+ sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+
+ .globl cheetah_plus_patch_fpdis
+cheetah_plus_patch_fpdis:
+ /* We configure the dTLB512_0 for 4MB pages and the
+ * dTLB512_1 for 8K pages when in context zero.
+ */
+ sethi %hi(cplus_fptrap_1), %o0
+ lduw [%o0 + %lo(cplus_fptrap_1)], %o1
+
+ set cplus_fptrap_insn_1, %o2
+ stw %o1, [%o2]
+ flush %o2
+ set cplus_fptrap_insn_2, %o2
+ stw %o1, [%o2]
+ flush %o2
+ set cplus_fptrap_insn_3, %o2
+ stw %o1, [%o2]
+ flush %o2
+ set cplus_fptrap_insn_4, %o2
+ stw %o1, [%o2]
+ flush %o2
+
+ retl
+ nop
+
/* The registers for cross calls will be:
*
* DATA 0: [low 32-bits] Address of function to call, jmp to this
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 3cf408cb1695e..d58575a978d5b 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -7,7 +7,7 @@ EXTRA_CFLAGS := -Werror
lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \
- VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \
+ bzero.o VIScsum.o VIScsumcopy.o \
VIScsumcopyusr.o VISsave.o atomic.o bitops.o \
U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index fffec2e3cef8e..ec0788236a816 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -7,6 +7,7 @@
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
+#define GLOBAL_SPARE %g7
#else
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
@@ -18,6 +19,7 @@
#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#endif
+#define GLOBAL_SPARE %g5
#endif
#ifndef EX_LD
@@ -123,7 +125,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
cmp %g2, 0
tne %xcc, 5
PREAMBLE
- mov %o0, %g5
+ mov %o0, %o4
cmp %o2, 0
be,pn %XCC, 85f
or %o0, %o1, %o3
@@ -146,7 +148,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
* of bytes to copy to make 'dst' 64-byte aligned. We pre-
* subtract this from 'len'.
*/
- sub %o0, %o1, %o4
+ sub %o0, %o1, GLOBAL_SPARE
sub %g2, 0x40, %g2
sub %g0, %g2, %g2
sub %o2, %g2, %o2
@@ -156,11 +158,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1: subcc %g1, 0x1, %g1
EX_LD(LOAD(ldub, %o1 + 0x00, %o3))
- EX_ST(STORE(stb, %o3, %o1 + %o4))
+ EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
bgu,pt %XCC, 1b
add %o1, 0x1, %o1
- add %o1, %o4, %o0
+ add %o1, GLOBAL_SPARE, %o0
2: cmp %g2, 0x0
and %o1, 0x7, %g1
@@ -188,19 +190,19 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
3:
membar #LoadStore | #StoreStore | #StoreLoad
- subcc %o2, 0x40, %o4
+ subcc %o2, 0x40, GLOBAL_SPARE
add %o1, %g1, %g1
- andncc %o4, (0x40 - 1), %o4
+ andncc GLOBAL_SPARE, (0x40 - 1), GLOBAL_SPARE
srl %g1, 3, %g2
- sub %o2, %o4, %g3
+ sub %o2, GLOBAL_SPARE, %g3
andn %o1, (0x40 - 1), %o1
and %g2, 7, %g2
andncc %g3, 0x7, %g3
fmovd %f0, %f2
sub %g3, 0x8, %g3
- sub %o2, %o4, %o2
+ sub %o2, GLOBAL_SPARE, %o2
- add %g1, %o4, %g1
+ add %g1, GLOBAL_SPARE, %g1
subcc %o2, %g3, %o2
EX_LD(LOAD_BLK(%o1, %f0))
@@ -208,7 +210,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %g1, %g3, %g1
EX_LD(LOAD_BLK(%o1, %f16))
add %o1, 0x40, %o1
- sub %o4, 0x80, %o4
+ sub GLOBAL_SPARE, 0x80, GLOBAL_SPARE
EX_LD(LOAD_BLK(%o1, %f32))
add %o1, 0x40, %o1
@@ -449,18 +451,18 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2: membar #StoreLoad | #StoreStore
VISExit
retl
- mov EX_RETVAL(%g5), %o0
+ mov EX_RETVAL(%o4), %o0
.align 64
70: /* 16 < len <= (5 * 64) */
bne,pn %XCC, 75f
sub %o0, %o1, %o3
-72: andn %o2, 0xf, %o4
+72: andn %o2, 0xf, GLOBAL_SPARE
and %o2, 0xf, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
- subcc %o4, 0x10, %o4
+ subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
EX_ST(STORE(stx, %o5, %o1 + %o3))
add %o1, 0x8, %o1
EX_ST(STORE(stx, %g1, %o1 + %o3))
@@ -512,10 +514,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2))
sub %o3, %g1, %o3
- andn %o2, 0x7, %o4
+ andn %o2, 0x7, GLOBAL_SPARE
sllx %g2, %g1, %g2
1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
- subcc %o4, 0x8, %o4
+ subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
add %o1, 0x8, %o1
srlx %g3, %o3, %o5
or %o5, %g2, %o5
@@ -544,7 +546,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %o1, 4, %o1
85: retl
- mov EX_RETVAL(%g5), %o0
+ mov EX_RETVAL(%o4), %o0
.align 32
90: EX_LD(LOAD(ldub, %o1, %g1))
@@ -553,6 +555,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
- mov EX_RETVAL(%g5), %o0
+ mov EX_RETVAL(%o4), %o0
.size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc64/lib/U3memcpy.S b/arch/sparc64/lib/U3memcpy.S
index 8fe195a10bbad..7cae9cc6a204a 100644
--- a/arch/sparc64/lib/U3memcpy.S
+++ b/arch/sparc64/lib/U3memcpy.S
@@ -6,6 +6,7 @@
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
+#define GLOBAL_SPARE %g7
#else
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
@@ -17,6 +18,7 @@
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#endif
+#define GLOBAL_SPARE %g5
#endif
#ifndef EX_LD
@@ -84,7 +86,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
cmp %g2, 0
tne %xcc, 5
PREAMBLE
- mov %o0, %g5
+ mov %o0, %o4
cmp %o2, 0
be,pn %XCC, 85f
or %o0, %o1, %o3
@@ -109,7 +111,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
* of bytes to copy to make 'dst' 64-byte aligned. We pre-
* subtract this from 'len'.
*/
- sub %o0, %o1, %o4
+ sub %o0, %o1, GLOBAL_SPARE
sub %g2, 0x40, %g2
sub %g0, %g2, %g2
sub %o2, %g2, %o2
@@ -119,11 +121,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1: subcc %g1, 0x1, %g1
EX_LD(LOAD(ldub, %o1 + 0x00, %o3))
- EX_ST(STORE(stb, %o3, %o1 + %o4))
+ EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
bgu,pt %XCC, 1b
add %o1, 0x1, %o1
- add %o1, %o4, %o0
+ add %o1, GLOBAL_SPARE, %o0
2: cmp %g2, 0x0
and %o1, 0x7, %g1
@@ -149,7 +151,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
3: LOAD(prefetch, %o1 + 0x000, #one_read)
LOAD(prefetch, %o1 + 0x040, #one_read)
- andn %o2, (0x40 - 1), %o4
+ andn %o2, (0x40 - 1), GLOBAL_SPARE
LOAD(prefetch, %o1 + 0x080, #one_read)
LOAD(prefetch, %o1 + 0x0c0, #one_read)
LOAD(prefetch, %o1 + 0x100, #one_read)
@@ -173,10 +175,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
faligndata %f10, %f12, %f26
EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
- subcc %o4, 0x80, %o4
+ subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
add %o1, 0x40, %o1
bgu,pt %XCC, 1f
- srl %o4, 6, %o3
+ srl GLOBAL_SPARE, 6, %o3
ba,pt %xcc, 2f
nop
@@ -315,9 +317,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o0, %o1, %o3
72:
- andn %o2, 0xf, %o4
+ andn %o2, 0xf, GLOBAL_SPARE
and %o2, 0xf, %o2
-1: subcc %o4, 0x10, %o4
+1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
EX_ST(STORE(stx, %o5, %o1 + %o3))
@@ -372,10 +374,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2))
sub %o3, %g1, %o3
- andn %o2, 0x7, %o4
+ andn %o2, 0x7, GLOBAL_SPARE
sllx %g2, %g1, %g2
1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
- subcc %o4, 0x8, %o4
+ subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
add %o1, 0x8, %o1
srlx %g3, %o3, %o5
or %o5, %g2, %o5
@@ -405,7 +407,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %o1, 4, %o1
85: retl
- mov EX_RETVAL(%g5), %o0
+ mov EX_RETVAL(%o4), %o0
.align 32
90:
@@ -415,6 +417,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
- mov EX_RETVAL(%g5), %o0
+ mov EX_RETVAL(%o4), %o0
.size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc64/lib/VISbzero.S b/arch/sparc64/lib/VISbzero.S
deleted file mode 100644
index 06b697bab974b..0000000000000
--- a/arch/sparc64/lib/VISbzero.S
+++ /dev/null
@@ -1,274 +0,0 @@
-/* $Id: VISbzero.S,v 1.11 2001/03/15 08:51:24 anton Exp $
- * VISbzero.S: High speed clear operations utilizing the UltraSparc
- * Visual Instruction Set.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include "VIS.h"
-
-#ifdef __KERNEL__
-#include <asm/visasm.h>
-
-#define EXN(x,y,a,b,z) \
-98: x,y; \
- .section .fixup; \
- .align 4; \
-99: ba VISbzerofixup_ret##z; \
- a, b, %o0; \
- .section __ex_table; \
- .align 4; \
- .word 98b, 99b; \
- .text; \
- .align 4;
-#define EXC(x,y,a,b,c...) \
-98: x,y; \
- .section .fixup; \
- .align 4; \
-99: c; \
- ba VISbzerofixup_ret0; \
- a, b, %o0; \
- .section __ex_table; \
- .align 4; \
- .word 98b, 99b; \
- .text; \
- .align 4;
-#define EXO1(x,y) \
-98: x,y; \
- .section __ex_table; \
- .align 4; \
- .word 98b, VISbzerofixup_reto1; \
- .text; \
- .align 4;
-#define EX(x,y,a,b) EXN(x,y,a,b,0)
-#define EX1(x,y,a,b) EXN(x,y,a,b,1)
-#define EX2(x,y,a,b) EXN(x,y,a,b,2)
-#define EXT(start,end,handler) \
- .section __ex_table; \
- .align 4; \
- .word start, 0, end, handler; \
- .text; \
- .align 4
-#else
-#define EX(x,y,a,b) x,y
-#define EX1(x,y,a,b) x,y
-#define EX2(x,y,a,b) x,y
-#define EXC(x,y,a,b,c...) x,y
-#define EXO1(x,y) x,y
-#define EXT(a,b,c)
-#endif
-
-#define ZERO_BLOCKS(base, offset, source) \
- STX source, [base - offset - 0x38] ASINORMAL; \
- STX source, [base - offset - 0x30] ASINORMAL; \
- STX source, [base - offset - 0x28] ASINORMAL; \
- STX source, [base - offset - 0x20] ASINORMAL; \
- STX source, [base - offset - 0x18] ASINORMAL; \
- STX source, [base - offset - 0x10] ASINORMAL; \
- STX source, [base - offset - 0x08] ASINORMAL; \
- STX source, [base - offset - 0x00] ASINORMAL;
-
-#ifdef __KERNEL__
-#define RETL clr %o0
-#else
-#define RETL mov %g3, %o0
-#endif
-
- /* Well, bzero is a lot easier to get right than bcopy... */
-#ifdef __KERNEL__
- .section __ex_table,#alloc
- .section .fixup,#alloc,#execinstr
-#endif
- .text
- .align 32
-#ifdef __KERNEL__
- .globl __bzero, __bzero_noasi
-__bzero_noasi:
- rd %asi, %g5
- ba,pt %xcc, __bzero+12
- mov %g5, %o4
-__bzero:
- rd %asi, %g5
- wr %g0, ASI_P, %asi ! LSU Group
- mov ASI_P, %o4
-#else
- .globl bzero
-bzero_private:
-bzero:
-#ifndef REGS_64BIT
- srl %o1, 0, %o1
-#endif
- mov %o0, %g3
-#endif
- cmp %o1, 7
- bleu,pn %xcc, 17f
- andcc %o0, 3, %o2
- be,a,pt %xcc, 4f
- andcc %o0, 4, %g0
- cmp %o2, 3
- be,pn %xcc, 2f
- EXO1(STB %g0, [%o0 + 0x00] ASINORMAL)
- cmp %o2, 2
- be,pt %xcc, 2f
- EX(STB %g0, [%o0 + 0x01] ASINORMAL, sub %o1, 1)
- EX(STB %g0, [%o0 + 0x02] ASINORMAL, sub %o1, 2)
-2: sub %o2, 4, %o2
- sub %o0, %o2, %o0
- add %o1, %o2, %o1
- andcc %o0, 4, %g0
-4: be,pt %xcc, 2f
- cmp %o1, 128
- EXO1(STW %g0, [%o0] ASINORMAL)
- sub %o1, 4, %o1
- add %o0, 4, %o0
-2: blu,pn %xcc, 9f
- andcc %o0, 0x38, %o2
- be,pn %icc, 6f
- mov 64, %o5
- andcc %o0, 8, %g0
- be,pn %icc, 1f
- sub %o5, %o2, %o5
- EX(STX %g0, [%o0] ASINORMAL, sub %o1, 0)
- add %o0, 8, %o0
-1: andcc %o5, 16, %g0
- be,pn %icc, 1f
- sub %o1, %o5, %o1
- EX1(STX %g0, [%o0] ASINORMAL, add %g0, 0)
- EX1(STX %g0, [%o0 + 8] ASINORMAL, sub %g0, 8)
- add %o0, 16, %o0
-1: andcc %o5, 32, %g0
- be,pn %icc, 7f
- andncc %o1, 0x3f, %o3
- EX(STX %g0, [%o0] ASINORMAL, add %o1, 32)
- EX(STX %g0, [%o0 + 8] ASINORMAL, add %o1, 24)
- EX(STX %g0, [%o0 + 16] ASINORMAL, add %o1, 16)
- EX(STX %g0, [%o0 + 24] ASINORMAL, add %o1, 8)
- add %o0, 32, %o0
-6: andncc %o1, 0x3f, %o3
-7: be,pn %xcc, 9f
-#ifdef __KERNEL__
- or %o4, ASI_BLK_OR, %g7
- wr %g7, %g0, %asi
- VISEntryHalf
-#else
- wr %g0, ASI_BLK_P, %asi
-#endif
- membar #StoreLoad | #StoreStore | #LoadStore
- fzero %f0
- andcc %o3, 0xc0, %o2
- and %o1, 0x3f, %o1
- fzero %f2
- andn %o3, 0xff, %o3
- faddd %f0, %f2, %f4
- fmuld %f0, %f2, %f6
- cmp %o2, 64
- faddd %f0, %f2, %f8
- fmuld %f0, %f2, %f10
- faddd %f0, %f2, %f12
- brz,pn %o2, 10f
- fmuld %f0, %f2, %f14
- be,pn %icc, 2f
- EXC(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o2, add %o2, %o1, %o2)
- cmp %o2, 128
- be,pn %icc, 2f
- EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 64, %o2)
- EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 128, %o2)
-2: brz,pn %o3, 12f
- add %o0, %o2, %o0
-10: EX(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o1)
- EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o1, sub %o1, 64, %o1)
- EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o1, sub %o1, 128, %o1)
- EXC(STBLK %f0, [%o0 + 0xc0] ASIBLK, add %o3, %o1, sub %o1, 192, %o1)
-11: subcc %o3, 256, %o3
- bne,pt %xcc, 10b
- add %o0, 256, %o0
-12:
-#ifdef __KERNEL__
- VISExitHalf
- wr %o4, 0x0, %asi
-#else
-#ifndef REGS_64BIT
- wr %g0, FPRS_FEF, %fprs
-#endif
-#endif
- membar #StoreLoad | #StoreStore
-9: andcc %o1, 0xf8, %o2
- be,pn %xcc, 13f
- andcc %o1, 7, %o1
-#ifdef __KERNEL__
-14: sethi %hi(13f), %o4
- srl %o2, 1, %o3
- sub %o4, %o3, %o4
- jmpl %o4 + %lo(13f), %g0
- add %o0, %o2, %o0
-#else
-14: rd %pc, %o4
- srl %o2, 1, %o3
- sub %o4, %o3, %o4
- jmpl %o4 + (13f - 14b), %g0
- add %o0, %o2, %o0
-#endif
-12: ZERO_BLOCKS(%o0, 0xc8, %g0)
- ZERO_BLOCKS(%o0, 0x88, %g0)
- ZERO_BLOCKS(%o0, 0x48, %g0)
- ZERO_BLOCKS(%o0, 0x08, %g0)
- EXT(12b,13f,VISbzerofixup_zb)
-13: be,pn %xcc, 8f
- andcc %o1, 4, %g0
- be,pn %xcc, 1f
- andcc %o1, 2, %g0
- EX(STW %g0, [%o0] ASINORMAL, and %o1, 7)
- add %o0, 4, %o0
-1: be,pn %xcc, 1f
- andcc %o1, 1, %g0
- EX(STH %g0, [%o0] ASINORMAL, and %o1, 3)
- add %o0, 2, %o0
-1: bne,a,pn %xcc, 8f
- EX(STB %g0, [%o0] ASINORMAL, add %g0, 1)
-8:
-#ifdef __KERNEL__
- wr %g5, %g0, %asi
-#endif
- retl
- RETL
-17: be,pn %xcc, 13b
- orcc %o1, 0, %g0
- be,pn %xcc, 0f
-8: add %o0, 1, %o0
- subcc %o1, 1, %o1
- bne,pt %xcc, 8b
- EX(STB %g0, [%o0 - 1] ASINORMAL, add %o1, 1)
-0:
-#ifdef __KERNEL__
- wr %g5, %g0, %asi
-#endif
- retl
- RETL
-
-#ifdef __KERNEL__
- .section .fixup
- .align 4
-VISbzerofixup_reto1:
- mov %o1, %o0
-VISbzerofixup_ret0:
- wr %g5, %g0, %asi
- retl
- wr %g0, 0, %fprs
-VISbzerofixup_ret1:
- and %o5, 0x30, %o5
- add %o5, %o1, %o5
- ba,pt %xcc, VISbzerofixup_ret0
- add %o0, %o5, %o0
-VISbzerofixup_ret2:
- and %o5, 0x20, %o5
- add %o5, %o1, %o5
- ba,pt %xcc, VISbzerofixup_ret0
- add %o0, %o5, %o0
-VISbzerofixup_zb:
- andcc %o1, 7, %o1
- sll %g2, 3, %g2
- add %o1, 256, %o1
- ba,pt %xcc, VISbzerofixup_ret0
- sub %o1, %g2, %o0
-#endif
diff --git a/arch/sparc64/lib/VISmemset.S b/arch/sparc64/lib/VISmemset.S
deleted file mode 100644
index 152723a490141..0000000000000
--- a/arch/sparc64/lib/VISmemset.S
+++ /dev/null
@@ -1,240 +0,0 @@
-/* $Id: VISmemset.S,v 1.10 1999/12/23 17:02:16 jj Exp $
- * VISmemset.S: High speed memset operations utilizing the UltraSparc
- * Visual Instruction Set.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include "VIS.h"
-
-#ifdef REGS_64BIT
-#define SET_BLOCKS(base, offset, source) \
- stx source, [base - offset - 0x18]; \
- stx source, [base - offset - 0x10]; \
- stx source, [base - offset - 0x08]; \
- stx source, [base - offset - 0x00];
-#else
-#define SET_BLOCKS(base, offset, source) \
- stw source, [base - offset - 0x18]; \
- stw source, [base - offset - 0x14]; \
- stw source, [base - offset - 0x10]; \
- stw source, [base - offset - 0x0c]; \
- stw source, [base - offset - 0x08]; \
- stw source, [base - offset - 0x04]; \
- stw source, [base - offset - 0x00]; \
- stw source, [base - offset + 0x04];
-#endif
-
-#ifndef __KERNEL__
-/* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */
-#include "VISbzero.S"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/visasm.h>
-#endif
-
- /* Well, memset is a lot easier to get right than bcopy... */
- .text
- .align 32
-#ifdef __KERNEL__
- .globl __memset
-__memset:
-#endif
- .globl memset
-memset:
-#ifndef __KERNEL__
- brz,a,pt %o1, bzero_private
- mov %o2, %o1
-#ifndef REGS_64BIT
- srl %o2, 0, %o2
-#endif
-#endif
- mov %o0, %o4
- cmp %o2, 7
- bleu,pn %xcc, 17f
- andcc %o0, 3, %g5
- be,pt %xcc, 4f
- and %o1, 0xff, %o1
- cmp %g5, 3
- be,pn %xcc, 2f
- stb %o1, [%o0 + 0x00]
- cmp %g5, 2
- be,pt %xcc, 2f
- stb %o1, [%o0 + 0x01]
- stb %o1, [%o0 + 0x02]
-2: sub %g5, 4, %g5
- sub %o0, %g5, %o0
- add %o2, %g5, %o2
-4: sllx %o1, 8, %g1
- andcc %o0, 4, %g0
- or %o1, %g1, %o1
- sllx %o1, 16, %g1
- or %o1, %g1, %o1
- be,pt %xcc, 2f
-#ifdef REGS_64BIT
- sllx %o1, 32, %g1
-#else
- cmp %o2, 128
-#endif
- stw %o1, [%o0]
- sub %o2, 4, %o2
- add %o0, 4, %o0
-2:
-#ifdef REGS_64BIT
- cmp %o2, 128
- or %o1, %g1, %o1
-#endif
- blu,pn %xcc, 9f
- andcc %o0, 0x38, %g5
- be,pn %icc, 6f
- mov 64, %o5
- andcc %o0, 8, %g0
- be,pn %icc, 1f
- sub %o5, %g5, %o5
-#ifdef REGS_64BIT
- stx %o1, [%o0]
-#else
- stw %o1, [%o0]
- stw %o1, [%o0 + 4]
-#endif
- add %o0, 8, %o0
-1: andcc %o5, 16, %g0
- be,pn %icc, 1f
- sub %o2, %o5, %o2
-#ifdef REGS_64BIT
- stx %o1, [%o0]
- stx %o1, [%o0 + 8]
-#else
- stw %o1, [%o0]
- stw %o1, [%o0 + 4]
- stw %o1, [%o0 + 8]
- stw %o1, [%o0 + 12]
-#endif
- add %o0, 16, %o0
-1: andcc %o5, 32, %g0
- be,pn %icc, 7f
- andncc %o2, 0x3f, %o3
-#ifdef REGS_64BIT
- stx %o1, [%o0]
- stx %o1, [%o0 + 8]
- stx %o1, [%o0 + 16]
- stx %o1, [%o0 + 24]
-#else
- stw %o1, [%o0]
- stw %o1, [%o0 + 4]
- stw %o1, [%o0 + 8]
- stw %o1, [%o0 + 12]
- stw %o1, [%o0 + 16]
- stw %o1, [%o0 + 20]
- stw %o1, [%o0 + 24]
- stw %o1, [%o0 + 28]
-#endif
- add %o0, 32, %o0
-7: be,pn %xcc, 9f
- nop
-#ifdef __KERNEL__
- VISEntryHalf
-#endif
- ldd [%o0 - 8], %f0
-18: rd %asi, %g2
- wr %g0, ASI_BLK_P, %asi
- membar #StoreStore | #LoadStore
- andcc %o3, 0xc0, %g5
- and %o2, 0x3f, %o2
- fmovd %f0, %f2
- fmovd %f0, %f4
- andn %o3, 0xff, %o3
- fmovd %f0, %f6
- cmp %g5, 64
- fmovd %f0, %f8
- fmovd %f0, %f10
- fmovd %f0, %f12
- brz,pn %g5, 10f
- fmovd %f0, %f14
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x00] %asi
- cmp %g5, 128
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
-2: brz,pn %o3, 12f
- add %o0, %g5, %o0
-10: stda %f0, [%o0 + 0x00] %asi
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- stda %f0, [%o0 + 0xc0] %asi
-11: subcc %o3, 256, %o3
- bne,pt %xcc, 10b
- add %o0, 256, %o0
-12:
-#ifdef __KERNEL__
- wr %g2, %g0, %asi
- VISExitHalf
-#else
-#ifndef REGS_64BIT
- wr %g0, FPRS_FEF, %fprs
-#endif
-#endif
- membar #StoreLoad | #StoreStore
-9: andcc %o2, 0x78, %g5
- be,pn %xcc, 13f
- andcc %o2, 7, %o2
-#ifdef __KERNEL__
-14: srl %g5, 1, %o3
- sethi %hi(13f), %g3
- sub %g3, %o3, %g3
- jmpl %g3 + %lo(13f), %g0
- add %o0, %g5, %o0
-#else
-14: rd %pc, %g3
-#ifdef REGS_64BIT
- srl %g5, 1, %o3
- sub %g3, %o3, %g3
-#else
- sub %g3, %g5, %g3
-#endif
- jmpl %g3 + (13f - 14b), %g0
- add %o0, %g5, %o0
-#endif
-12: SET_BLOCKS(%o0, 0x68, %o1)
- SET_BLOCKS(%o0, 0x48, %o1)
- SET_BLOCKS(%o0, 0x28, %o1)
- SET_BLOCKS(%o0, 0x08, %o1)
-13: be,pn %xcc, 8f
- andcc %o2, 4, %g0
- be,pn %xcc, 1f
- andcc %o2, 2, %g0
- stw %o1, [%o0]
- add %o0, 4, %o0
-1: be,pn %xcc, 1f
- andcc %o2, 1, %g0
- sth %o1, [%o0]
- add %o0, 2, %o0
-1: bne,a,pn %xcc, 8f
- stb %o1, [%o0]
-8: retl
- mov %o4, %o0
-17: brz,pn %o2, 0f
-8: add %o0, 1, %o0
- subcc %o2, 1, %o2
- bne,pt %xcc, 8b
- stb %o1, [%o0 - 1]
-0: retl
- mov %o4, %o0
-6:
-#ifdef REGS_64BIT
- stx %o1, [%o0]
-#else
- stw %o1, [%o0]
- stw %o1, [%o0 + 4]
-#endif
- andncc %o2, 0x3f, %o3
- be,pn %xcc, 9b
- nop
-#ifdef __KERNEL__
- VISEntryHalf
-#endif
- ba,pt %xcc, 18b
- ldd [%o0], %f0
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S
index 41be4131f8008..e528b8d1a3e69 100644
--- a/arch/sparc64/lib/atomic.S
+++ b/arch/sparc64/lib/atomic.S
@@ -29,10 +29,10 @@
.globl atomic_add
.type atomic_add,#function
atomic_add: /* %o0 = increment, %o1 = atomic_ptr */
-1: lduw [%o1], %g5
- add %g5, %o0, %g7
- cas [%o1], %g5, %g7
- cmp %g5, %g7
+1: lduw [%o1], %g1
+ add %g1, %o0, %g7
+ cas [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %icc, 1b
nop
retl
@@ -42,10 +42,10 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */
.globl atomic_sub
.type atomic_sub,#function
atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
-1: lduw [%o1], %g5
- sub %g5, %o0, %g7
- cas [%o1], %g5, %g7
- cmp %g5, %g7
+1: lduw [%o1], %g1
+ sub %g1, %o0, %g7
+ cas [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %icc, 1b
nop
retl
@@ -56,10 +56,10 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
.type atomic_add_ret,#function
atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
ATOMIC_PRE_BARRIER
-1: lduw [%o1], %g5
- add %g5, %o0, %g7
- cas [%o1], %g5, %g7
- cmp %g5, %g7
+1: lduw [%o1], %g1
+ add %g1, %o0, %g7
+ cas [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %icc, 1b
add %g7, %o0, %g7
ATOMIC_POST_BARRIER
@@ -71,10 +71,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
.type atomic_sub_ret,#function
atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
ATOMIC_PRE_BARRIER
-1: lduw [%o1], %g5
- sub %g5, %o0, %g7
- cas [%o1], %g5, %g7
- cmp %g5, %g7
+1: lduw [%o1], %g1
+ sub %g1, %o0, %g7
+ cas [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %icc, 1b
sub %g7, %o0, %g7
ATOMIC_POST_BARRIER
@@ -85,10 +85,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
.globl atomic64_add
.type atomic64_add,#function
atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */
-1: ldx [%o1], %g5
- add %g5, %o0, %g7
- casx [%o1], %g5, %g7
- cmp %g5, %g7
+1: ldx [%o1], %g1
+ add %g1, %o0, %g7
+ casx [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %xcc, 1b
nop
retl
@@ -98,10 +98,10 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */
.globl atomic64_sub
.type atomic64_sub,#function
atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */
-1: ldx [%o1], %g5
- sub %g5, %o0, %g7
- casx [%o1], %g5, %g7
- cmp %g5, %g7
+1: ldx [%o1], %g1
+ sub %g1, %o0, %g7
+ casx [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %xcc, 1b
nop
retl
@@ -112,10 +112,10 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */
.type atomic64_add_ret,#function
atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
ATOMIC_PRE_BARRIER
-1: ldx [%o1], %g5
- add %g5, %o0, %g7
- casx [%o1], %g5, %g7
- cmp %g5, %g7
+1: ldx [%o1], %g1
+ add %g1, %o0, %g7
+ casx [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %xcc, 1b
add %g7, %o0, %g7
ATOMIC_POST_BARRIER
@@ -127,10 +127,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
.type atomic64_sub_ret,#function
atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
ATOMIC_PRE_BARRIER
-1: ldx [%o1], %g5
- sub %g5, %o0, %g7
- casx [%o1], %g5, %g7
- cmp %g5, %g7
+1: ldx [%o1], %g1
+ sub %g1, %o0, %g7
+ casx [%o1], %g1, %g7
+ cmp %g1, %g7
bne,pn %xcc, 1b
sub %g7, %o0, %g7
ATOMIC_POST_BARRIER
diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S
new file mode 100644
index 0000000000000..21a933ffb7c29
--- /dev/null
+++ b/arch/sparc64/lib/bzero.S
@@ -0,0 +1,158 @@
+/* bzero.S: Simple prefetching memset, bzero, and clear_user
+ * implementations.
+ *
+ * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+ */
+
+ .text
+
+ .globl __memset
+ .type __memset, #function
+__memset: /* %o0=buf, %o1=pat, %o2=len */
+
+ .globl memset
+ .type memset, #function
+memset: /* %o0=buf, %o1=pat, %o2=len */
+ and %o1, 0xff, %o3
+ mov %o2, %o1
+ sllx %o3, 8, %g1
+ or %g1, %o3, %o2
+ sllx %o2, 16, %g1
+ or %g1, %o2, %o2
+ sllx %o2, 32, %g1
+ ba,pt %xcc, 1f
+ or %g1, %o2, %o2
+
+ .globl __bzero
+ .type __bzero, #function
+__bzero: /* %o0=buf, %o1=len */
+ clr %o2
+1: mov %o0, %o3
+ brz,pn %o1, __bzero_done
+ cmp %o1, 16
+ bl,pn %icc, __bzero_tiny
+ prefetch [%o0 + 0x000], #n_writes
+ andcc %o0, 0x3, %g0
+ be,pt %icc, 2f
+1: stb %o2, [%o0 + 0x00]
+ add %o0, 1, %o0
+ andcc %o0, 0x3, %g0
+ bne,pn %icc, 1b
+ sub %o1, 1, %o1
+2: andcc %o0, 0x7, %g0
+ be,pt %icc, 3f
+ stw %o2, [%o0 + 0x00]
+ sub %o1, 4, %o1
+ add %o0, 4, %o0
+3: and %o1, 0x38, %g1
+ cmp %o1, 0x40
+ andn %o1, 0x3f, %o4
+ bl,pn %icc, 5f
+ and %o1, 0x7, %o1
+ prefetch [%o0 + 0x040], #n_writes
+ prefetch [%o0 + 0x080], #n_writes
+ prefetch [%o0 + 0x0c0], #n_writes
+ prefetch [%o0 + 0x100], #n_writes
+ prefetch [%o0 + 0x140], #n_writes
+4: prefetch [%o0 + 0x180], #n_writes
+ stx %o2, [%o0 + 0x00]
+ stx %o2, [%o0 + 0x08]
+ stx %o2, [%o0 + 0x10]
+ stx %o2, [%o0 + 0x18]
+ stx %o2, [%o0 + 0x20]
+ stx %o2, [%o0 + 0x28]
+ stx %o2, [%o0 + 0x30]
+ stx %o2, [%o0 + 0x38]
+ subcc %o4, 0x40, %o4
+ bne,pt %icc, 4b
+ add %o0, 0x40, %o0
+ brz,pn %g1, 6f
+ nop
+5: stx %o2, [%o0 + 0x00]
+ subcc %g1, 8, %g1
+ bne,pt %icc, 5b
+ add %o0, 0x8, %o0
+6: brz,pt %o1, __bzero_done
+ nop
+__bzero_tiny:
+1: stb %o2, [%o0 + 0x00]
+ subcc %o1, 1, %o1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+__bzero_done:
+ retl
+ mov %o3, %o0
+ .size __bzero, .-__bzero
+ .size __memset, .-__memset
+ .size memset, .-memset
+
+#define EX_ST(x,y) \
+98: x,y; \
+ .section .fixup; \
+ .align 4; \
+99: retl; \
+ mov %o1, %o0; \
+ .section __ex_table; \
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+ .globl __bzero_noasi
+ .type __bzero_noasi, #function
+__bzero_noasi: /* %o0=buf, %o1=len */
+ brz,pn %o1, __bzero_noasi_done
+ cmp %o1, 16
+ bl,pn %icc, __bzero_noasi_tiny
+ EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes)
+ andcc %o0, 0x3, %g0
+ be,pt %icc, 2f
+1: EX_ST(stba %g0, [%o0 + 0x00] %asi)
+ add %o0, 1, %o0
+ andcc %o0, 0x3, %g0
+ bne,pn %icc, 1b
+ sub %o1, 1, %o1
+2: andcc %o0, 0x7, %g0
+ be,pt %icc, 3f
+ EX_ST(stwa %g0, [%o0 + 0x00] %asi)
+ sub %o1, 4, %o1
+ add %o0, 4, %o0
+3: and %o1, 0x38, %g1
+ cmp %o1, 0x40
+ andn %o1, 0x3f, %o4
+ bl,pn %icc, 5f
+ and %o1, 0x7, %o1
+ EX_ST(prefetcha [%o0 + 0x040] %asi, #n_writes)
+ EX_ST(prefetcha [%o0 + 0x080] %asi, #n_writes)
+ EX_ST(prefetcha [%o0 + 0x0c0] %asi, #n_writes)
+ EX_ST(prefetcha [%o0 + 0x100] %asi, #n_writes)
+ EX_ST(prefetcha [%o0 + 0x140] %asi, #n_writes)
+4: EX_ST(prefetcha [%o0 + 0x180] %asi, #n_writes)
+ EX_ST(stxa %g0, [%o0 + 0x00] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x08] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x10] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x18] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x20] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x28] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x30] %asi)
+ EX_ST(stxa %g0, [%o0 + 0x38] %asi)
+ subcc %o4, 0x40, %o4
+ bne,pt %icc, 4b
+ add %o0, 0x40, %o0
+ brz,pn %g1, 6f
+ nop
+5: EX_ST(stxa %g0, [%o0 + 0x00] %asi)
+ subcc %g1, 8, %g1
+ bne,pt %icc, 5b
+ add %o0, 0x8, %o0
+6: brz,pt %o1, __bzero_noasi_done
+ nop
+__bzero_noasi_tiny:
+1: EX_ST(stba %g0, [%o0 + 0x00] %asi)
+ subcc %o1, 1, %o1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+__bzero_noasi_done:
+ retl
+ clr %o0
+ .size __bzero_noasi, .-__bzero_noasi
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c
index 46e5ebfb4b7ce..c421e0c653253 100644
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -138,15 +138,15 @@ wlock_again:
}
/* Try once to increment the counter. */
__asm__ __volatile__(
-" ldx [%0], %%g5\n"
-" brlz,a,pn %%g5, 2f\n"
+" ldx [%0], %%g1\n"
+" brlz,a,pn %%g1, 2f\n"
" mov 1, %0\n"
-" add %%g5, 1, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
-" sub %%g5, %%g7, %0\n"
+" add %%g1, 1, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
+" sub %%g1, %%g7, %0\n"
"2:" : "=r" (val)
: "0" (&(rw->lock))
- : "g5", "g7", "memory");
+ : "g1", "g7", "memory");
membar("#StoreLoad | #StoreStore");
if (val)
goto wlock_again;
@@ -173,14 +173,14 @@ runlock_again:
/* Spin trying to decrement the counter using casx. */
__asm__ __volatile__(
" membar #StoreLoad | #LoadLoad\n"
-" ldx [%0], %%g5\n"
-" sub %%g5, 1, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
+" ldx [%0], %%g1\n"
+" sub %%g1, 1, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
" membar #StoreLoad | #StoreStore\n"
-" sub %%g5, %%g7, %0\n"
+" sub %%g1, %%g7, %0\n"
: "=r" (val)
: "0" (&(rw->lock))
- : "g5", "g7", "memory");
+ : "g1", "g7", "memory");
if (val) {
if (!--stuck) {
if (shown++ <= 2)
@@ -216,17 +216,17 @@ wlock_again:
__asm__ __volatile__(
" mov 1, %%g3\n"
" sllx %%g3, 63, %%g3\n"
-" ldx [%0], %%g5\n"
-" brlz,pn %%g5, 1f\n"
-" or %%g5, %%g3, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
+" ldx [%0], %%g1\n"
+" brlz,pn %%g1, 1f\n"
+" or %%g1, %%g3, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
" membar #StoreLoad | #StoreStore\n"
" ba,pt %%xcc, 2f\n"
-" sub %%g5, %%g7, %0\n"
+" sub %%g1, %%g7, %0\n"
"1: mov 1, %0\n"
"2:" : "=r" (val)
: "0" (&(rw->lock))
- : "g3", "g5", "g7", "memory");
+ : "g3", "g1", "g7", "memory");
if (val) {
/* We couldn't get the write bit. */
if (!--stuck) {
@@ -248,15 +248,15 @@ wlock_again:
__asm__ __volatile__(
" mov 1, %%g3\n"
" sllx %%g3, 63, %%g3\n"
-"1: ldx [%0], %%g5\n"
-" andn %%g5, %%g3, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
-" cmp %%g5, %%g7\n"
+"1: ldx [%0], %%g1\n"
+" andn %%g1, %%g3, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
+" cmp %%g1, %%g7\n"
" bne,pn %%xcc, 1b\n"
" membar #StoreLoad | #StoreStore"
: /* no outputs */
: "r" (&(rw->lock))
- : "g3", "g5", "g7", "cc", "memory");
+ : "g3", "g1", "g7", "cc", "memory");
while(rw->lock != 0) {
if (!--stuck) {
if (shown++ <= 2)
@@ -294,14 +294,14 @@ wlock_again:
" membar #StoreLoad | #LoadLoad\n"
" mov 1, %%g3\n"
" sllx %%g3, 63, %%g3\n"
-" ldx [%0], %%g5\n"
-" andn %%g5, %%g3, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
+" ldx [%0], %%g1\n"
+" andn %%g1, %%g3, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
" membar #StoreLoad | #StoreStore\n"
-" sub %%g5, %%g7, %0\n"
+" sub %%g1, %%g7, %0\n"
: "=r" (val)
: "0" (&(rw->lock))
- : "g3", "g5", "g7", "memory");
+ : "g3", "g1", "g7", "memory");
if (val) {
if (!--stuck) {
if (shown++ <= 2)
@@ -323,17 +323,17 @@ int _do_write_trylock (rwlock_t *rw, char *str)
__asm__ __volatile__(
" mov 1, %%g3\n"
" sllx %%g3, 63, %%g3\n"
-" ldx [%0], %%g5\n"
-" brlz,pn %%g5, 1f\n"
-" or %%g5, %%g3, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
+" ldx [%0], %%g1\n"
+" brlz,pn %%g1, 1f\n"
+" or %%g1, %%g3, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
" membar #StoreLoad | #StoreStore\n"
" ba,pt %%xcc, 2f\n"
-" sub %%g5, %%g7, %0\n"
+" sub %%g1, %%g7, %0\n"
"1: mov 1, %0\n"
"2:" : "=r" (val)
: "0" (&(rw->lock))
- : "g3", "g5", "g7", "memory");
+ : "g3", "g1", "g7", "memory");
if (val) {
put_cpu();
@@ -347,15 +347,15 @@ int _do_write_trylock (rwlock_t *rw, char *str)
__asm__ __volatile__(
" mov 1, %%g3\n"
" sllx %%g3, 63, %%g3\n"
-"1: ldx [%0], %%g5\n"
-" andn %%g5, %%g3, %%g7\n"
-" casx [%0], %%g5, %%g7\n"
-" cmp %%g5, %%g7\n"
+"1: ldx [%0], %%g1\n"
+" andn %%g1, %%g3, %%g7\n"
+" casx [%0], %%g1, %%g7\n"
+" cmp %%g1, %%g7\n"
" bne,pn %%xcc, 1b\n"
" membar #StoreLoad | #StoreStore"
: /* no outputs */
: "r" (&(rw->lock))
- : "g3", "g5", "g7", "cc", "memory");
+ : "g3", "g1", "g7", "cc", "memory");
put_cpu();
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S
index e86906744cf6f..7e6fdaebedbab 100644
--- a/arch/sparc64/lib/dec_and_lock.S
+++ b/arch/sparc64/lib/dec_and_lock.S
@@ -27,12 +27,12 @@
.globl _atomic_dec_and_lock
_atomic_dec_and_lock: /* %o0 = counter, %o1 = lock */
-loop1: lduw [%o0], %g5
- subcc %g5, 1, %g7
+loop1: lduw [%o0], %g2
+ subcc %g2, 1, %g7
be,pn %icc, start_to_zero
nop
-nzero: cas [%o0], %g5, %g7
- cmp %g5, %g7
+nzero: cas [%o0], %g2, %g7
+ cmp %g2, %g7
bne,pn %icc, loop1
mov 0, %g1
@@ -50,13 +50,13 @@ to_zero:
ldstub [%o1], %g3
brnz,pn %g3, spin_on_lock
membar #StoreLoad | #StoreStore
-loop2: cas [%o0], %g5, %g7 /* ASSERT(g7 == 0) */
- cmp %g5, %g7
+loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
+ cmp %g2, %g7
be,pt %icc, out
mov 1, %g1
- lduw [%o0], %g5
- subcc %g5, 1, %g7
+ lduw [%o0], %g2
+ subcc %g2, 1, %g7
be,pn %icc, loop2
nop
membar #StoreStore | #LoadStore
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 4e8c7928c49f1..2ef2e268bdcfd 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -38,22 +38,22 @@ _mcount:
* Check whether %sp is dangerously low.
*/
ldub [%g6 + TI_FPDEPTH], %g1
- srl %g1, 1, %g5
- add %g5, 1, %g5
- sllx %g5, 8, %g5 ! each fpregs frame is 256b
- add %g5, 192, %g5
- add %g6, %g5, %g5 ! where does task_struct+frame end?
- sub %g5, STACK_BIAS, %g5
- cmp %sp, %g5
+ srl %g1, 1, %g3
+ add %g3, 1, %g3
+ sllx %g3, 8, %g3 ! each fpregs frame is 256b
+ add %g3, 192, %g3
+ add %g6, %g3, %g3 ! where does task_struct+frame end?
+ sub %g3, STACK_BIAS, %g3
+ cmp %sp, %g3
bg,pt %xcc, 1f
- sethi %hi(panicstring), %g5
+ sethi %hi(panicstring), %g3
sethi %hi(ovstack), %g7 ! cant move to panic stack fast enough
or %g7, %lo(ovstack), %g7
add %g7, OVSTACKSIZE, %g7
sub %g7, STACK_BIAS, %g7
mov %g7, %sp
call prom_printf
- or %g5, %lo(panicstring), %o0
+ or %g3, %lo(panicstring), %o0
call prom_halt
nop
#endif
diff --git a/arch/sparc64/lib/memcmp.S b/arch/sparc64/lib/memcmp.S
index d34dc3d874dae..c90ad96c51b9c 100644
--- a/arch/sparc64/lib/memcmp.S
+++ b/arch/sparc64/lib/memcmp.S
@@ -13,12 +13,12 @@ memcmp:
cmp %o2, 0 ! IEU1 Group
loop: be,pn %icc, ret_0 ! CTI
nop ! IEU0
- ldub [%o0], %g5 ! LSU Group
+ ldub [%o0], %g7 ! LSU Group
ldub [%o1], %g3 ! LSU Group
sub %o2, 1, %o2 ! IEU0
add %o0, 1, %o0 ! IEU1
add %o1, 1, %o1 ! IEU0 Group
- subcc %g5, %g3, %g3 ! IEU1 Group
+ subcc %g7, %g3, %g3 ! IEU1 Group
be,pt %icc, loop ! CTI
cmp %o2, 0 ! IEU1 Group
diff --git a/arch/sparc64/lib/memmove.S b/arch/sparc64/lib/memmove.S
index 1c1ebbbdf830e..97395802c23c4 100644
--- a/arch/sparc64/lib/memmove.S
+++ b/arch/sparc64/lib/memmove.S
@@ -12,17 +12,17 @@ memmove: /* o0=dst o1=src o2=len */
mov %o0, %g1
cmp %o0, %o1
bleu,pt %xcc, memcpy
- add %o1, %o2, %g5
- cmp %g5, %o0
+ add %o1, %o2, %g7
+ cmp %g7, %o0
bleu,pt %xcc, memcpy
add %o0, %o2, %o5
- sub %g5, 1, %o1
+ sub %g7, 1, %o1
sub %o5, 1, %o0
-1: ldub [%o1], %g5
+1: ldub [%o1], %g7
subcc %o2, 1, %o2
sub %o1, 1, %o1
- stb %g5, [%o0]
+ stb %g7, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
diff --git a/arch/sparc64/lib/memscan.S b/arch/sparc64/lib/memscan.S
index a34c6b9d21e85..5e72d49114179 100644
--- a/arch/sparc64/lib/memscan.S
+++ b/arch/sparc64/lib/memscan.S
@@ -52,43 +52,43 @@ check_bytes:
andcc %o5, 0xff, %g0
add %o0, -5, %g2
ba,pt %xcc, 3f
- srlx %o5, 32, %g5
+ srlx %o5, 32, %g7
-2: srlx %o5, 8, %g5
+2: srlx %o5, 8, %g7
be,pn %icc, 1f
add %o0, -8, %g2
- andcc %g5, 0xff, %g0
- srlx %g5, 8, %g5
+ andcc %g7, 0xff, %g0
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
- andcc %g5, 0xff, %g0
+ andcc %g7, 0xff, %g0
- srlx %g5, 8, %g5
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
- andcc %g5, 0xff, %g0
- srlx %g5, 8, %g5
+ andcc %g7, 0xff, %g0
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
andcc %g3, %o3, %g0
be,a,pn %icc, 2f
mov %o0, %g2
-3: andcc %g5, 0xff, %g0
- srlx %g5, 8, %g5
+3: andcc %g7, 0xff, %g0
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
- andcc %g5, 0xff, %g0
- srlx %g5, 8, %g5
+ andcc %g7, 0xff, %g0
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
- andcc %g5, 0xff, %g0
- srlx %g5, 8, %g5
+ andcc %g7, 0xff, %g0
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
- andcc %g5, 0xff, %g0
- srlx %g5, 8, %g5
+ andcc %g7, 0xff, %g0
+ srlx %g7, 8, %g7
be,pn %icc, 1f
inc %g2
diff --git a/arch/sparc64/lib/strlen.S b/arch/sparc64/lib/strlen.S
index 066ec1ed7d0dd..e9ba1920d818e 100644
--- a/arch/sparc64/lib/strlen.S
+++ b/arch/sparc64/lib/strlen.S
@@ -48,16 +48,16 @@ strlen:
add %o0, 4, %o0
/* Check every byte. */
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
+ srl %o5, 24, %g7
+ andcc %g7, 0xff, %g0
be,pn %icc, 1f
add %o0, -4, %o4
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
+ srl %o5, 16, %g7
+ andcc %g7, 0xff, %g0
be,pn %icc, 1f
add %o4, 1, %o4
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
+ srl %o5, 8, %g7
+ andcc %g7, 0xff, %g0
be,pn %icc, 1f
add %o4, 1, %o4
andcc %o5, 0xff, %g0
diff --git a/arch/sparc64/lib/strlen_user.S b/arch/sparc64/lib/strlen_user.S
index 4af69a0adfbcc..9ed54ba14fc63 100644
--- a/arch/sparc64/lib/strlen_user.S
+++ b/arch/sparc64/lib/strlen_user.S
@@ -54,16 +54,16 @@ __strnlen_user:
ba,a,pt %xcc, 1f
/* Check every byte. */
-82: srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
+82: srl %o5, 24, %g7
+ andcc %g7, 0xff, %g0
be,pn %icc, 1f
add %o0, -3, %o4
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
+ srl %o5, 16, %g7
+ andcc %g7, 0xff, %g0
be,pn %icc, 1f
add %o4, 1, %o4
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
+ srl %o5, 8, %g7
+ andcc %g7, 0xff, %g0
be,pn %icc, 1f
add %o4, 1, %o4
andcc %o5, 0xff, %g0
diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S
index f748fd6bbc389..4cd5d2be1ae1f 100644
--- a/arch/sparc64/lib/xor.S
+++ b/arch/sparc64/lib/xor.S
@@ -248,7 +248,7 @@ xor_vis_4:
.globl xor_vis_5
.type xor_vis_5,#function
xor_vis_5:
- mov %o5, %g5
+ save %sp, -192, %sp
rd %fprs, %o5
andcc %o5, FPRS_FEF|FPRS_DU, %g0
be,pt %icc, 0f
@@ -256,61 +256,60 @@ xor_vis_5:
jmpl %g1 + %lo(VISenter), %g7
add %g7, 8, %g7
0: wr %g0, FPRS_FEF, %fprs
- mov %g5, %o5
rd %asi, %g1
wr %g0, ASI_BLK_P, %asi
membar #LoadStore|#StoreLoad|#StoreStore
- sub %o0, 64, %o0
- ldda [%o1] %asi, %f0
- ldda [%o2] %asi, %f16
+ sub %i0, 64, %i0
+ ldda [%i1] %asi, %f0
+ ldda [%i2] %asi, %f16
-5: ldda [%o3] %asi, %f32
+5: ldda [%i3] %asi, %f32
fxor %f0, %f16, %f48
fxor %f2, %f18, %f50
- add %o1, 64, %o1
+ add %i1, 64, %i1
fxor %f4, %f20, %f52
fxor %f6, %f22, %f54
- add %o2, 64, %o2
+ add %i2, 64, %i2
fxor %f8, %f24, %f56
fxor %f10, %f26, %f58
fxor %f12, %f28, %f60
fxor %f14, %f30, %f62
- ldda [%o4] %asi, %f16
+ ldda [%i4] %asi, %f16
fxor %f48, %f32, %f48
fxor %f50, %f34, %f50
fxor %f52, %f36, %f52
fxor %f54, %f38, %f54
- add %o3, 64, %o3
+ add %i3, 64, %i3
fxor %f56, %f40, %f56
fxor %f58, %f42, %f58
fxor %f60, %f44, %f60
fxor %f62, %f46, %f62
- ldda [%o5] %asi, %f32
+ ldda [%i5] %asi, %f32
fxor %f48, %f16, %f48
fxor %f50, %f18, %f50
- add %o4, 64, %o4
+ add %i4, 64, %i4
fxor %f52, %f20, %f52
fxor %f54, %f22, %f54
- add %o5, 64, %o5
+ add %i5, 64, %i5
fxor %f56, %f24, %f56
fxor %f58, %f26, %f58
fxor %f60, %f28, %f60
fxor %f62, %f30, %f62
- ldda [%o1] %asi, %f0
+ ldda [%i1] %asi, %f0
fxor %f48, %f32, %f48
fxor %f50, %f34, %f50
fxor %f52, %f36, %f52
fxor %f54, %f38, %f54
fxor %f56, %f40, %f56
fxor %f58, %f42, %f58
- subcc %o0, 64, %o0
+ subcc %i0, 64, %i0
fxor %f60, %f44, %f60
fxor %f62, %f46, %f62
- stda %f48, [%o1 - 64] %asi
+ stda %f48, [%i1 - 64] %asi
bne,pt %xcc, 5b
- ldda [%o2] %asi, %f16
+ ldda [%i2] %asi, %f16
- ldda [%o3] %asi, %f32
+ ldda [%i3] %asi, %f32
fxor %f0, %f16, %f48
fxor %f2, %f18, %f50
fxor %f4, %f20, %f52
@@ -319,7 +318,7 @@ xor_vis_5:
fxor %f10, %f26, %f58
fxor %f12, %f28, %f60
fxor %f14, %f30, %f62
- ldda [%o4] %asi, %f16
+ ldda [%i4] %asi, %f16
fxor %f48, %f32, %f48
fxor %f50, %f34, %f50
fxor %f52, %f36, %f52
@@ -328,7 +327,7 @@ xor_vis_5:
fxor %f58, %f42, %f58
fxor %f60, %f44, %f60
fxor %f62, %f46, %f62
- ldda [%o5] %asi, %f32
+ ldda [%i5] %asi, %f32
fxor %f48, %f16, %f48
fxor %f50, %f18, %f50
fxor %f52, %f20, %f52
@@ -346,9 +345,10 @@ xor_vis_5:
fxor %f58, %f42, %f58
fxor %f60, %f44, %f60
fxor %f62, %f46, %f62
- stda %f48, [%o1] %asi
+ stda %f48, [%i1] %asi
membar #Sync|#StoreStore|#StoreLoad
wr %g1, %g0, %asi
- retl
- wr %g0, 0, %fprs
+ wr %g0, 0, %fprs
+ ret
+ restore
.size xor_vis_5, .-xor_vis_5
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 4c97fbe0bb7b7..89022ccaa75bb 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -132,15 +132,15 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
mask = (mask << 24) | (1UL << PG_dcache_dirty);
__asm__ __volatile__("1:\n\t"
"ldx [%2], %%g7\n\t"
- "and %%g7, %1, %%g5\n\t"
- "or %%g5, %0, %%g5\n\t"
- "casx [%2], %%g7, %%g5\n\t"
- "cmp %%g7, %%g5\n\t"
+ "and %%g7, %1, %%g1\n\t"
+ "or %%g1, %0, %%g1\n\t"
+ "casx [%2], %%g7, %%g1\n\t"
+ "cmp %%g7, %%g1\n\t"
"bne,pn %%xcc, 1b\n\t"
" membar #StoreLoad | #StoreStore"
: /* no outputs */
: "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
- : "g5", "g7");
+ : "g1", "g7");
}
static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
@@ -150,20 +150,20 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
__asm__ __volatile__("! test_and_clear_dcache_dirty\n"
"1:\n\t"
"ldx [%2], %%g7\n\t"
- "srlx %%g7, 24, %%g5\n\t"
- "and %%g5, %3, %%g5\n\t"
- "cmp %%g5, %0\n\t"
+ "srlx %%g7, 24, %%g1\n\t"
+ "and %%g1, %3, %%g1\n\t"
+ "cmp %%g1, %0\n\t"
"bne,pn %%icc, 2f\n\t"
- " andn %%g7, %1, %%g5\n\t"
- "casx [%2], %%g7, %%g5\n\t"
- "cmp %%g7, %%g5\n\t"
+ " andn %%g7, %1, %%g1\n\t"
+ "casx [%2], %%g7, %%g1\n\t"
+ "cmp %%g7, %%g1\n\t"
"bne,pn %%xcc, 1b\n\t"
" membar #StoreLoad | #StoreStore\n"
"2:"
: /* no outputs */
: "r" (cpu), "r" (mask), "r" (&page->flags),
"i" (NR_CPUS - 1UL)
- : "g5", "g7");
+ : "g1", "g7");
}
extern void __update_mmu_cache(unsigned long mmu_context_hw, unsigned long address, pte_t pte, int code);
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index 3077046a268d5..3d1af3fc10a63 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -226,37 +226,41 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
} \
} while(0)
-static __inline__ unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
+static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
{
+ unsigned long tmp1, tmp2;
+
__asm__ __volatile__(
" membar #StoreLoad | #LoadLoad\n"
-" mov %0, %%g5\n"
-"1: lduw [%2], %%g7\n"
-" cas [%2], %%g7, %0\n"
-" cmp %%g7, %0\n"
+" mov %0, %1\n"
+"1: lduw [%4], %2\n"
+" cas [%4], %2, %0\n"
+" cmp %2, %0\n"
" bne,a,pn %%icc, 1b\n"
-" mov %%g5, %0\n"
+" mov %1, %0\n"
" membar #StoreLoad | #StoreStore\n"
- : "=&r" (val)
+ : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2)
: "0" (val), "r" (m)
- : "g5", "g7", "cc", "memory");
+ : "cc", "memory");
return val;
}
-static __inline__ unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val)
+static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val)
{
+ unsigned long tmp1, tmp2;
+
__asm__ __volatile__(
" membar #StoreLoad | #LoadLoad\n"
-" mov %0, %%g5\n"
-"1: ldx [%2], %%g7\n"
-" casx [%2], %%g7, %0\n"
-" cmp %%g7, %0\n"
+" mov %0, %1\n"
+"1: ldx [%4], %2\n"
+" casx [%4], %2, %0\n"
+" cmp %2, %0\n"
" bne,a,pn %%xcc, 1b\n"
-" mov %%g5, %0\n"
+" mov %1, %0\n"
" membar #StoreLoad | #StoreStore\n"
- : "=&r" (val)
+ : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2)
: "0" (val), "r" (m)
- : "g5", "g7", "cc", "memory");
+ : "cc", "memory");
return val;
}