aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.davemloft.net>2004-08-08 23:45:00 -0700
committerDavid S. Miller <davem@nuts.davemloft.net>2004-08-08 23:45:00 -0700
commit6d5a0e9141324dc56517ccad55f71f5c478fd12e (patch)
tree16421d56fd798f26f681efa5257b831f5fb7267a /arch
parented4076ee1e980aacb0038f795e1715b4bddefaee (diff)
downloadhistory-6d5a0e9141324dc56517ccad55f71f5c478fd12e.tar.gz
[SPARC64]: Fix up copy_page just like clear_page.
- Do not save/restore existing TLB entries, that is expensive, complicated, and does not really help performance much at all. - Only one unrolled loop for cheetah copy code. - For spitfire, use %asi register for block commit handling so we do not need two copies of the code. Signed-off-by: David S. Miller <davem@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/kernel/head.S2
-rw-r--r--arch/sparc64/lib/Makefile2
-rw-r--r--arch/sparc64/lib/VIScopy.S1
-rw-r--r--arch/sparc64/lib/blockops.S353
-rw-r--r--arch/sparc64/lib/copy_page.S239
5 files changed, 240 insertions, 357 deletions
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 14de65beec031b..01c8869a7f6f45 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -522,8 +522,6 @@ cheetah_tlb_fixup:
/* Patch copy/page operations to cheetah optimized versions. */
call cheetah_patch_copyops
nop
- call cheetah_patch_pgcopyops
- nop
call cheetah_patch_cachetlbops
nop
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 2c75d533a87178..2da5af9a1f3d47 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -5,7 +5,7 @@
EXTRA_AFLAGS := -ansi
EXTRA_CFLAGS := -Werror
-lib-y := PeeCeeI.o blockops.o clear_page.o strlen.o strncmp.o \
+lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \
VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \
VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \
diff --git a/arch/sparc64/lib/VIScopy.S b/arch/sparc64/lib/VIScopy.S
index b3e0fa5dca0e45..79052cdf8ab374 100644
--- a/arch/sparc64/lib/VIScopy.S
+++ b/arch/sparc64/lib/VIScopy.S
@@ -402,7 +402,6 @@ cheetah_patch_copyops:
ULTRA3_PCACHE_DO_NOP(U3memcpy)
ULTRA3_PCACHE_DO_NOP(U3copy_from_user)
ULTRA3_PCACHE_DO_NOP(U3copy_to_user)
- ULTRA3_PCACHE_DO_NOP(cheetah_copy_user_page)
#if 0
pcache_disabled:
#endif
diff --git a/arch/sparc64/lib/blockops.S b/arch/sparc64/lib/blockops.S
deleted file mode 100644
index e670e393619a63..00000000000000
--- a/arch/sparc64/lib/blockops.S
+++ /dev/null
@@ -1,353 +0,0 @@
-/* $Id: blockops.S,v 1.42 2002/02/09 19:49:30 davem Exp $
- * blockops.S: UltraSparc block zero optimized routines.
- *
- * Copyright (C) 1996, 1998, 1999, 2000 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include "VIS.h"
-#include <asm/visasm.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/dcu.h>
-#include <asm/spitfire.h>
-#include <asm/pgtable.h>
-
-#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \
- fmovd %reg0, %f48; fmovd %reg1, %f50; \
- fmovd %reg2, %f52; fmovd %reg3, %f54; \
- fmovd %reg4, %f56; fmovd %reg5, %f58; \
- fmovd %reg6, %f60; fmovd %reg7, %f62;
-
-#define DCACHE_SIZE (PAGE_SIZE * 2)
-#define TLBTEMP_ENT1 (60 << 3)
-#define TLBTEMP_ENT2 (61 << 3)
-#define TLBTEMP_ENTSZ (1 << 3)
-
-#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19)
-#define PAGE_SIZE_REM 0x80
-#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
-#define PAGE_SIZE_REM 0x100
-#else
-#error Wrong PAGE_SHIFT specified
-#endif
-
- .text
-
- .align 32
- .globl copy_user_page
- .type copy_user_page,@function
-copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
- VISEntry
- sethi %hi(PAGE_SIZE), %g3
- sethi %uhi(PAGE_OFFSET), %g2
- sllx %g2, 32, %g2
- sub %o0, %g2, %g1
- and %o2, %g3, %o0
- sethi %hi(TLBTEMP_BASE), %o3
- sethi %uhi(_PAGE_VALID | _PAGE_SZBITS), %g3
- sub %o1, %g2, %g2
- sllx %g3, 32, %g3
- mov TLB_TAG_ACCESS, %o2
- or %g3, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W), %g3
- sethi %hi(DCACHE_SIZE), %o1
- or %g1, %g3, %g1
- or %g2, %g3, %g2
- add %o0, %o3, %o0
- add %o0, %o1, %o1
-#define FIX_INSN_1 0x96102060 /* mov (12 << 3), %o3 */
-cheetah_patch_1:
- mov TLBTEMP_ENT1, %o3
- rdpr %pstate, %g3
- wrpr %g3, PSTATE_IE, %pstate
-
- /* Do this now, before loading the fixed TLB entries for copying,
- * so we do not risk a multiple TLB match condition later when
- * restoring those entries.
- */
- ldx [%g6 + TI_FLAGS], %g3
-
- /* Spitfire Errata #32 workaround */
- mov PRIMARY_CONTEXT, %o4
- stxa %g0, [%o4] ASI_DMMU
- membar #Sync
-
- ldxa [%o3] ASI_DTLB_TAG_READ, %o4
-
- /* Spitfire Errata #32 workaround */
- mov PRIMARY_CONTEXT, %o5
- stxa %g0, [%o5] ASI_DMMU
- membar #Sync
-
- ldxa [%o3] ASI_DTLB_DATA_ACCESS, %g0
- ldxa [%o3] ASI_DTLB_DATA_ACCESS, %o5
- stxa %o0, [%o2] ASI_DMMU
- stxa %g1, [%o3] ASI_DTLB_DATA_ACCESS
- membar #Sync
- add %o3, (TLBTEMP_ENTSZ), %o3
-
- /* Spitfire Errata #32 workaround */
- mov PRIMARY_CONTEXT, %g5
- stxa %g0, [%g5] ASI_DMMU
- membar #Sync
-
- ldxa [%o3] ASI_DTLB_TAG_READ, %g5
-
- /* Spitfire Errata #32 workaround */
- mov PRIMARY_CONTEXT, %g7
- stxa %g0, [%g7] ASI_DMMU
- membar #Sync
-
- ldxa [%o3] ASI_DTLB_DATA_ACCESS, %g0
- ldxa [%o3] ASI_DTLB_DATA_ACCESS, %g7
- stxa %o1, [%o2] ASI_DMMU
- stxa %g2, [%o3] ASI_DTLB_DATA_ACCESS
- membar #Sync
-
- andcc %g3, _TIF_BLKCOMMIT, %g0
- bne,pn %xcc, copy_page_using_blkcommit
- nop
-
- BRANCH_IF_ANY_CHEETAH(g3,o2,cheetah_copy_user_page)
- ba,pt %xcc, spitfire_copy_user_page
- nop
-
-cheetah_copy_user_page:
- .globl cheetah_copy_user_page_nop_1_6
-cheetah_copy_user_page_nop_1_6:
- ldxa [%g0] ASI_DCU_CONTROL_REG, %g3
- sethi %uhi(DCU_PE), %o2
- sllx %o2, 32, %o2
- or %g3, %o2, %o2
- stxa %o2, [%g0] ASI_DCU_CONTROL_REG ! Enable P-cache
- membar #Sync
-
- sethi %hi((PAGE_SIZE/64)-7), %o2 ! A0 Group
- prefetch [%o1 + 0x000], #one_read ! MS
- or %o2, %lo((PAGE_SIZE/64)-7), %o2 ! A1 Group
- prefetch [%o1 + 0x040], #one_read ! MS
- prefetch [%o1 + 0x080], #one_read ! MS Group
- prefetch [%o1 + 0x0c0], #one_read ! MS Group
- ldd [%o1 + 0x000], %f0 ! MS Group
- prefetch [%o1 + 0x100], #one_read ! MS Group
- ldd [%o1 + 0x008], %f2 ! AX
- prefetch [%o1 + 0x140], #one_read ! MS Group
- ldd [%o1 + 0x010], %f4 ! AX
- prefetch [%o1 + 0x180], #one_read ! MS Group
- fmovd %f0, %f32 ! FGA Group
- ldd [%o1 + 0x018], %f6 ! AX
- fmovd %f2, %f34 ! FGA Group
- ldd [%o1 + 0x020], %f8 ! MS
- fmovd %f4, %f36 ! FGA Group
- ldd [%o1 + 0x028], %f10 ! AX
- membar #StoreStore ! MS
- fmovd %f6, %f38 ! FGA Group
- ldd [%o1 + 0x030], %f12 ! MS
- fmovd %f8, %f40 ! FGA Group
- ldd [%o1 + 0x038], %f14 ! AX
- fmovd %f10, %f42 ! FGA Group
- ldd [%o1 + 0x040], %f16 ! MS
-1: ldd [%o1 + 0x048], %f2 ! AX (Group)
- fmovd %f12, %f44 ! FGA
- ldd [%o1 + 0x050], %f4 ! MS
- fmovd %f14, %f46 ! FGA Group
- stda %f32, [%o0] ASI_BLK_P ! MS
- ldd [%o1 + 0x058], %f6 ! AX
- fmovd %f16, %f32 ! FGA Group (8-cycle stall)
- ldd [%o1 + 0x060], %f8 ! MS
- fmovd %f2, %f34 ! FGA Group
- ldd [%o1 + 0x068], %f10 ! AX
- fmovd %f4, %f36 ! FGA Group
- ldd [%o1 + 0x070], %f12 ! MS
- fmovd %f6, %f38 ! FGA Group
- ldd [%o1 + 0x078], %f14 ! AX
- fmovd %f8, %f40 ! FGA Group
- ldd [%o1 + 0x080], %f16 ! AX
- prefetch [%o1 + 0x180], #one_read ! MS
- fmovd %f10, %f42 ! FGA Group
- subcc %o2, 1, %o2 ! A0
- add %o0, 0x40, %o0 ! A1
- bne,pt %xcc, 1b ! BR
- add %o1, 0x40, %o1 ! A0 Group
-
- mov 5, %o2 ! A0 Group
-1: ldd [%o1 + 0x048], %f2 ! AX
- fmovd %f12, %f44 ! FGA
- ldd [%o1 + 0x050], %f4 ! MS
- fmovd %f14, %f46 ! FGA Group
- stda %f32, [%o0] ASI_BLK_P ! MS
- ldd [%o1 + 0x058], %f6 ! AX
- fmovd %f16, %f32 ! FGA Group (8-cycle stall)
- ldd [%o1 + 0x060], %f8 ! MS
- fmovd %f2, %f34 ! FGA Group
- ldd [%o1 + 0x068], %f10 ! AX
- fmovd %f4, %f36 ! FGA Group
- ldd [%o1 + 0x070], %f12 ! MS
- fmovd %f6, %f38 ! FGA Group
- ldd [%o1 + 0x078], %f14 ! AX
- fmovd %f8, %f40 ! FGA Group
- ldd [%o1 + 0x080], %f16 ! MS
- fmovd %f10, %f42 ! FGA Group
- subcc %o2, 1, %o2 ! A0
- add %o0, 0x40, %o0 ! A1
- bne,pt %xcc, 1b ! BR
- add %o1, 0x40, %o1 ! A0 Group
-
- ldd [%o1 + 0x048], %f2 ! AX
- fmovd %f12, %f44 ! FGA
- ldd [%o1 + 0x050], %f4 ! MS
- fmovd %f14, %f46 ! FGA Group
- stda %f32, [%o0] ASI_BLK_P ! MS
- ldd [%o1 + 0x058], %f6 ! AX
- fmovd %f16, %f32 ! FGA Group (8-cycle stall)
- ldd [%o1 + 0x060], %f8 ! MS
- fmovd %f2, %f34 ! FGA Group
- ldd [%o1 + 0x068], %f10 ! AX
- fmovd %f4, %f36 ! FGA Group
- ldd [%o1 + 0x070], %f12 ! MS
- fmovd %f6, %f38 ! FGA Group
- add %o0, 0x40, %o0 ! A0
- ldd [%o1 + 0x078], %f14 ! AX
- fmovd %f8, %f40 ! FGA Group
- fmovd %f10, %f42 ! FGA Group
- fmovd %f12, %f44 ! FGA Group
- fmovd %f14, %f46 ! FGA Group
- stda %f32, [%o0] ASI_BLK_P ! MS
- .globl cheetah_copy_user_page_nop_2_3
-cheetah_copy_user_page_nop_2_3:
- mov PRIMARY_CONTEXT, %o2
- stxa %g0, [%o2] ASI_DMMU ! Flush P-cache
- stxa %g3, [%g0] ASI_DCU_CONTROL_REG ! Disable P-cache
- ba,a,pt %xcc, copy_user_page_continue
-
-spitfire_copy_user_page:
- ldda [%o1] ASI_BLK_P, %f0
- add %o1, 0x40, %o1
- ldda [%o1] ASI_BLK_P, %f16
- add %o1, 0x40, %o1
- sethi %hi(PAGE_SIZE), %o2
-1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
- ldda [%o1] ASI_BLK_P, %f32
- stda %f48, [%o0] ASI_BLK_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
- ldda [%o1] ASI_BLK_P, %f0
- stda %f48, [%o0] ASI_BLK_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
- ldda [%o1] ASI_BLK_P, %f16
- stda %f48, [%o0] ASI_BLK_P
- sub %o2, 0x40, %o2
- add %o1, 0x40, %o1
- cmp %o2, PAGE_SIZE_REM
- bne,pt %xcc, 1b
- add %o0, 0x40, %o0
-#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
- TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
- ldda [%o1] ASI_BLK_P, %f32
- stda %f48, [%o0] ASI_BLK_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
- ldda [%o1] ASI_BLK_P, %f0
- stda %f48, [%o0] ASI_BLK_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- membar #Sync
- stda %f32, [%o0] ASI_BLK_P
- add %o0, 0x40, %o0
- stda %f0, [%o0] ASI_BLK_P
-#else
- membar #Sync
- stda %f0, [%o0] ASI_BLK_P
- add %o0, 0x40, %o0
- stda %f16, [%o0] ASI_BLK_P
-#endif
-copy_user_page_continue:
- membar #Sync
- VISExit
-
- mov TLB_TAG_ACCESS, %o2
- stxa %g5, [%o2] ASI_DMMU
- stxa %g7, [%o3] ASI_DTLB_DATA_ACCESS
- membar #Sync
- sub %o3, (TLBTEMP_ENTSZ), %o3
- stxa %o4, [%o2] ASI_DMMU
- stxa %o5, [%o3] ASI_DTLB_DATA_ACCESS
- membar #Sync
- rdpr %pstate, %g3
- jmpl %o7 + 0x8, %g0
- wrpr %g3, PSTATE_IE, %pstate
-
-copy_page_using_blkcommit:
- membar #LoadStore | #StoreStore | #StoreLoad
- ldda [%o1] ASI_BLK_P, %f0
- add %o1, 0x40, %o1
- ldda [%o1] ASI_BLK_P, %f16
- add %o1, 0x40, %o1
- sethi %hi(PAGE_SIZE), %o2
-1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
- ldda [%o1] ASI_BLK_P, %f32
- stda %f48, [%o0] ASI_BLK_COMMIT_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
- ldda [%o1] ASI_BLK_P, %f0
- stda %f48, [%o0] ASI_BLK_COMMIT_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
- ldda [%o1] ASI_BLK_P, %f16
- stda %f48, [%o0] ASI_BLK_COMMIT_P
- sub %o2, 0x40, %o2
- add %o1, 0x40, %o1
- cmp %o2, PAGE_SIZE_REM
- bne,pt %xcc, 1b
- add %o0, 0x40, %o0
-#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
- TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
- ldda [%o1] ASI_BLK_P, %f32
- stda %f48, [%o0] ASI_BLK_COMMIT_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
- ldda [%o1] ASI_BLK_P, %f0
- stda %f48, [%o0] ASI_BLK_COMMIT_P
- add %o1, 0x40, %o1
- sub %o2, 0x40, %o2
- add %o0, 0x40, %o0
- membar #Sync
- stda %f32, [%o0] ASI_BLK_COMMIT_P
- add %o0, 0x40, %o0
- ba,pt %xcc, copy_user_page_continue
- stda %f0, [%o0] ASI_BLK_COMMIT_P
-#else
- membar #Sync
- stda %f0, [%o0] ASI_BLK_COMMIT_P
- add %o0, 0x40, %o0
- ba,pt %xcc, copy_user_page_continue
- stda %f16, [%o0] ASI_BLK_COMMIT_P
-#endif
-
- .globl cheetah_patch_pgcopyops
-cheetah_patch_pgcopyops:
- sethi %hi(FIX_INSN_1), %g1
- or %g1, %lo(FIX_INSN_1), %g1
- sethi %hi(cheetah_patch_1), %g2
- or %g2, %lo(cheetah_patch_1), %g2
- stw %g1, [%g2]
- flush %g2
- retl
- nop
-
-#undef FIX_INSN_1
-#undef PAGE_SIZE_REM
diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S
new file mode 100644
index 00000000000000..69af6a7f074236
--- /dev/null
+++ b/arch/sparc64/lib/copy_page.S
@@ -0,0 +1,239 @@
+/* clear_page.S: UltraSparc optimized copy page.
+ *
+ * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <asm/visasm.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+
+ /* What we used to do was lock a TLB entry into a specific
+ * TLB slot, clear the page with interrupts disabled, then
+ * restore the original TLB entry. This was great for
+ * disturbing the TLB as little as possible, but it meant
+ * we had to keep interrupts disabled for a long time.
+ *
+ * Now, we simply use the normal TLB loading mechanism,
+ * and this makes the cpu choose a slot all by itself.
+ * Then we do a normal TLB flush on exit. We need only
+ * disable preemption during the clear.
+ */
+
+#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS)
+#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W)
+#define DCACHE_SIZE (PAGE_SIZE * 2)
+
+#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19)
+#define PAGE_SIZE_REM 0x80
+#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
+#define PAGE_SIZE_REM 0x100
+#else
+#error Wrong PAGE_SHIFT specified
+#endif
+
+#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \
+ fmovd %reg0, %f48; fmovd %reg1, %f50; \
+ fmovd %reg2, %f52; fmovd %reg3, %f54; \
+ fmovd %reg4, %f56; fmovd %reg5, %f58; \
+ fmovd %reg6, %f60; fmovd %reg7, %f62;
+
+ .text
+
+ .align 32
+ .globl copy_user_page
+copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
+ lduw [%g6 + TI_PRE_COUNT], %o4
+ sethi %uhi(PAGE_OFFSET), %g2
+ sethi %hi(PAGE_SIZE), %o3
+
+ sllx %g2, 32, %g2
+ sethi %uhi(TTE_BITS_TOP), %g3
+
+ sllx %g3, 32, %g3
+ sub %o0, %g2, %g1 ! dest paddr
+
+ sub %o1, %g2, %g2 ! src paddr
+ or %g3, TTE_BITS_BOTTOM, %g3
+
+ and %o2, %o3, %o0 ! vaddr D-cache alias bit
+ or %g1, %g3, %g1 ! dest TTE data
+
+ or %g2, %g3, %g2 ! src TTE data
+ sethi %hi(TLBTEMP_BASE), %o3
+
+ sethi %hi(DCACHE_SIZE), %o1
+ add %o0, %o3, %o0 ! dest TTE vaddr
+
+ add %o4, 1, %o2
+ add %o0, %o1, %o1 ! src TTE vaddr
+
+ /* Disable preemption. */
+ mov TLB_TAG_ACCESS, %g3
+ stw %o2, [%g6 + TI_PRE_COUNT]
+
+ /* Load TLB entries. */
+ rdpr %pstate, %o2
+ wrpr %o2, PSTATE_IE, %pstate
+ stxa %o0, [%g3] ASI_DMMU
+ stxa %g1, [%g0] ASI_DTLB_DATA_IN
+ membar #Sync
+ stxa %o1, [%g3] ASI_DMMU
+ stxa %g2, [%g0] ASI_DTLB_DATA_IN
+ membar #Sync
+ wrpr %o2, 0x0, %pstate
+
+ BRANCH_IF_ANY_CHEETAH(g3,o2,1f)
+ ba,pt %xcc, 9f
+ nop
+
+1:
+ VISEntryHalf
+ membar #StoreLoad | #StoreStore | #LoadStore
+ sethi %hi((PAGE_SIZE/64)-2), %o2
+ mov %o0, %g1
+ prefetch [%o1 + 0x000], #one_read
+ or %o2, %lo((PAGE_SIZE/64)-2), %o2
+ prefetch [%o1 + 0x040], #one_read
+ prefetch [%o1 + 0x080], #one_read
+ prefetch [%o1 + 0x0c0], #one_read
+ ldd [%o1 + 0x000], %f0
+ prefetch [%o1 + 0x100], #one_read
+ ldd [%o1 + 0x008], %f2
+ prefetch [%o1 + 0x140], #one_read
+ ldd [%o1 + 0x010], %f4
+ prefetch [%o1 + 0x180], #one_read
+ fmovd %f0, %f16
+ ldd [%o1 + 0x018], %f6
+ fmovd %f2, %f18
+ ldd [%o1 + 0x020], %f8
+ fmovd %f4, %f20
+ ldd [%o1 + 0x028], %f10
+ fmovd %f6, %f22
+ ldd [%o1 + 0x030], %f12
+ fmovd %f8, %f24
+ ldd [%o1 + 0x038], %f14
+ fmovd %f10, %f26
+ ldd [%o1 + 0x040], %f0
+1: ldd [%o1 + 0x048], %f2
+ fmovd %f12, %f28
+ ldd [%o1 + 0x050], %f4
+ fmovd %f14, %f30
+ stda %f16, [%o0] ASI_BLK_P
+ ldd [%o1 + 0x058], %f6
+ fmovd %f0, %f16
+ ldd [%o1 + 0x060], %f8
+ fmovd %f2, %f18
+ ldd [%o1 + 0x068], %f10
+ fmovd %f4, %f20
+ ldd [%o1 + 0x070], %f12
+ fmovd %f6, %f22
+ ldd [%o1 + 0x078], %f14
+ fmovd %f8, %f24
+ ldd [%o1 + 0x080], %f0
+ prefetch [%o1 + 0x180], #one_read
+ fmovd %f10, %f26
+ subcc %o2, 1, %o2
+ add %o0, 0x40, %o0
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+ ldd [%o1 + 0x048], %f2
+ fmovd %f12, %f28
+ ldd [%o1 + 0x050], %f4
+ fmovd %f14, %f30
+ stda %f16, [%o0] ASI_BLK_P
+ ldd [%o1 + 0x058], %f6
+ fmovd %f0, %f16
+ ldd [%o1 + 0x060], %f8
+ fmovd %f2, %f18
+ ldd [%o1 + 0x068], %f10
+ fmovd %f4, %f20
+ ldd [%o1 + 0x070], %f12
+ fmovd %f6, %f22
+ add %o0, 0x40, %o0
+ ldd [%o1 + 0x078], %f14
+ fmovd %f8, %f24
+ fmovd %f10, %f26
+ fmovd %f12, %f28
+ fmovd %f14, %f30
+ stda %f16, [%o0] ASI_BLK_P
+ membar #Sync
+ VISExitHalf
+ ba,pt %xcc, 5f
+ nop
+
+9:
+ ldx [%g6 + TI_FLAGS], %g3
+ andcc %g3, _TIF_BLKCOMMIT, %g0
+ rd %asi, %g3
+ be,a,pt %icc, 1f
+ wr %g0, ASI_BLK_P, %asi
+ wr %g0, ASI_BLK_COMMIT_P, %asi
+1: VISEntry
+ mov %o0, %g1
+ ldda [%o1] ASI_BLK_P, %f0
+ add %o1, 0x40, %o1
+ ldda [%o1] ASI_BLK_P, %f16
+ add %o1, 0x40, %o1
+ sethi %hi(PAGE_SIZE), %o2
+1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
+ ldda [%o1] ASI_BLK_P, %f32
+ stda %f48, [%o0] %asi
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ add %o0, 0x40, %o0
+ TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
+ ldda [%o1] ASI_BLK_P, %f0
+ stda %f48, [%o0] %asi
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ add %o0, 0x40, %o0
+ TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
+ ldda [%o1] ASI_BLK_P, %f16
+ stda %f48, [%o0] %asi
+ sub %o2, 0x40, %o2
+ add %o1, 0x40, %o1
+ cmp %o2, PAGE_SIZE_REM
+ bne,pt %xcc, 1b
+ add %o0, 0x40, %o0
+#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
+ TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
+ ldda [%o1] ASI_BLK_P, %f32
+ stda %f48, [%o0] %asi
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ add %o0, 0x40, %o0
+ TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
+ ldda [%o1] ASI_BLK_P, %f0
+ stda %f48, [%o0] %asi
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ add %o0, 0x40, %o0
+ membar #Sync
+ stda %f32, [%o0] %asi
+ add %o0, 0x40, %o0
+ stda %f0, [%o0] %asi
+#else
+ membar #Sync
+ stda %f0, [%o0] %asi
+ add %o0, 0x40, %o0
+ stda %f16, [%o0] %asi
+#endif
+ membar #Sync
+ wr %g3, 0x0, %asi
+ VISExit
+
+5:
+ stxa %g0, [%g1] ASI_DMMU_DEMAP
+ membar #Sync
+
+ sethi %hi(DCACHE_SIZE), %g2
+ stxa %g0, [%g1 + %g2] ASI_DMMU_DEMAP
+ membar #Sync
+
+ retl
+ stw %o4, [%g6 + TI_PRE_COUNT]