diff options
author | David S. Miller <davem@nuts.davemloft.net> | 2004-08-08 23:45:00 -0700 |
---|---|---|
committer | David S. Miller <davem@nuts.davemloft.net> | 2004-08-08 23:45:00 -0700 |
commit | 6d5a0e9141324dc56517ccad55f71f5c478fd12e (patch) | |
tree | 16421d56fd798f26f681efa5257b831f5fb7267a /arch | |
parent | ed4076ee1e980aacb0038f795e1715b4bddefaee (diff) | |
download | history-6d5a0e9141324dc56517ccad55f71f5c478fd12e.tar.gz |
[SPARC64]: Fix up copy_page just like clear_page.
- Do not save/restore existing TLB entries, that is
expensive, complicated, and does not really help
performance much at all.
- Only one unrolled loop for cheetah copy code.
- For spitfire, use %asi register for block commit
handling so we do not need two copies of the code.
Signed-off-by: David S. Miller <davem@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sparc64/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/sparc64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/lib/VIScopy.S | 1 | ||||
-rw-r--r-- | arch/sparc64/lib/blockops.S | 353 | ||||
-rw-r--r-- | arch/sparc64/lib/copy_page.S | 239 |
5 files changed, 240 insertions, 357 deletions
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 14de65beec031b..01c8869a7f6f45 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -522,8 +522,6 @@ cheetah_tlb_fixup: /* Patch copy/page operations to cheetah optimized versions. */ call cheetah_patch_copyops nop - call cheetah_patch_pgcopyops - nop call cheetah_patch_cachetlbops nop diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 2c75d533a87178..2da5af9a1f3d47 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -5,7 +5,7 @@ EXTRA_AFLAGS := -ansi EXTRA_CFLAGS := -Werror -lib-y := PeeCeeI.o blockops.o clear_page.o strlen.o strncmp.o \ +lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \ VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \ diff --git a/arch/sparc64/lib/VIScopy.S b/arch/sparc64/lib/VIScopy.S index b3e0fa5dca0e45..79052cdf8ab374 100644 --- a/arch/sparc64/lib/VIScopy.S +++ b/arch/sparc64/lib/VIScopy.S @@ -402,7 +402,6 @@ cheetah_patch_copyops: ULTRA3_PCACHE_DO_NOP(U3memcpy) ULTRA3_PCACHE_DO_NOP(U3copy_from_user) ULTRA3_PCACHE_DO_NOP(U3copy_to_user) - ULTRA3_PCACHE_DO_NOP(cheetah_copy_user_page) #if 0 pcache_disabled: #endif diff --git a/arch/sparc64/lib/blockops.S b/arch/sparc64/lib/blockops.S deleted file mode 100644 index e670e393619a63..00000000000000 --- a/arch/sparc64/lib/blockops.S +++ /dev/null @@ -1,353 +0,0 @@ -/* $Id: blockops.S,v 1.42 2002/02/09 19:49:30 davem Exp $ - * blockops.S: UltraSparc block zero optimized routines. - * - * Copyright (C) 1996, 1998, 1999, 2000 David S. Miller (davem@redhat.com) - * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) - */ - -#include "VIS.h" -#include <asm/visasm.h> -#include <asm/thread_info.h> -#include <asm/page.h> -#include <asm/dcu.h> -#include <asm/spitfire.h> -#include <asm/pgtable.h> - -#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \ - fmovd %reg0, %f48; fmovd %reg1, %f50; \ - fmovd %reg2, %f52; fmovd %reg3, %f54; \ - fmovd %reg4, %f56; fmovd %reg5, %f58; \ - fmovd %reg6, %f60; fmovd %reg7, %f62; - -#define DCACHE_SIZE (PAGE_SIZE * 2) -#define TLBTEMP_ENT1 (60 << 3) -#define TLBTEMP_ENT2 (61 << 3) -#define TLBTEMP_ENTSZ (1 << 3) - -#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) -#define PAGE_SIZE_REM 0x80 -#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) -#define PAGE_SIZE_REM 0x100 -#else -#error Wrong PAGE_SHIFT specified -#endif - - .text - - .align 32 - .globl copy_user_page - .type copy_user_page,@function -copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ - VISEntry - sethi %hi(PAGE_SIZE), %g3 - sethi %uhi(PAGE_OFFSET), %g2 - sllx %g2, 32, %g2 - sub %o0, %g2, %g1 - and %o2, %g3, %o0 - sethi %hi(TLBTEMP_BASE), %o3 - sethi %uhi(_PAGE_VALID | _PAGE_SZBITS), %g3 - sub %o1, %g2, %g2 - sllx %g3, 32, %g3 - mov TLB_TAG_ACCESS, %o2 - or %g3, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W), %g3 - sethi %hi(DCACHE_SIZE), %o1 - or %g1, %g3, %g1 - or %g2, %g3, %g2 - add %o0, %o3, %o0 - add %o0, %o1, %o1 -#define FIX_INSN_1 0x96102060 /* mov (12 << 3), %o3 */ -cheetah_patch_1: - mov TLBTEMP_ENT1, %o3 - rdpr %pstate, %g3 - wrpr %g3, PSTATE_IE, %pstate - - /* Do this now, before loading the fixed TLB entries for copying, - * so we do not risk a multiple TLB match condition later when - * restoring those entries. - */ - ldx [%g6 + TI_FLAGS], %g3 - - /* Spitfire Errata #32 workaround */ - mov PRIMARY_CONTEXT, %o4 - stxa %g0, [%o4] ASI_DMMU - membar #Sync - - ldxa [%o3] ASI_DTLB_TAG_READ, %o4 - - /* Spitfire Errata #32 workaround */ - mov PRIMARY_CONTEXT, %o5 - stxa %g0, [%o5] ASI_DMMU - membar #Sync - - ldxa [%o3] ASI_DTLB_DATA_ACCESS, %g0 - ldxa [%o3] ASI_DTLB_DATA_ACCESS, %o5 - stxa %o0, [%o2] ASI_DMMU - stxa %g1, [%o3] ASI_DTLB_DATA_ACCESS - membar #Sync - add %o3, (TLBTEMP_ENTSZ), %o3 - - /* Spitfire Errata #32 workaround */ - mov PRIMARY_CONTEXT, %g5 - stxa %g0, [%g5] ASI_DMMU - membar #Sync - - ldxa [%o3] ASI_DTLB_TAG_READ, %g5 - - /* Spitfire Errata #32 workaround */ - mov PRIMARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU - membar #Sync - - ldxa [%o3] ASI_DTLB_DATA_ACCESS, %g0 - ldxa [%o3] ASI_DTLB_DATA_ACCESS, %g7 - stxa %o1, [%o2] ASI_DMMU - stxa %g2, [%o3] ASI_DTLB_DATA_ACCESS - membar #Sync - - andcc %g3, _TIF_BLKCOMMIT, %g0 - bne,pn %xcc, copy_page_using_blkcommit - nop - - BRANCH_IF_ANY_CHEETAH(g3,o2,cheetah_copy_user_page) - ba,pt %xcc, spitfire_copy_user_page - nop - -cheetah_copy_user_page: - .globl cheetah_copy_user_page_nop_1_6 -cheetah_copy_user_page_nop_1_6: - ldxa [%g0] ASI_DCU_CONTROL_REG, %g3 - sethi %uhi(DCU_PE), %o2 - sllx %o2, 32, %o2 - or %g3, %o2, %o2 - stxa %o2, [%g0] ASI_DCU_CONTROL_REG ! Enable P-cache - membar #Sync - - sethi %hi((PAGE_SIZE/64)-7), %o2 ! A0 Group - prefetch [%o1 + 0x000], #one_read ! MS - or %o2, %lo((PAGE_SIZE/64)-7), %o2 ! A1 Group - prefetch [%o1 + 0x040], #one_read ! MS - prefetch [%o1 + 0x080], #one_read ! MS Group - prefetch [%o1 + 0x0c0], #one_read ! MS Group - ldd [%o1 + 0x000], %f0 ! MS Group - prefetch [%o1 + 0x100], #one_read ! MS Group - ldd [%o1 + 0x008], %f2 ! AX - prefetch [%o1 + 0x140], #one_read ! MS Group - ldd [%o1 + 0x010], %f4 ! AX - prefetch [%o1 + 0x180], #one_read ! MS Group - fmovd %f0, %f32 ! FGA Group - ldd [%o1 + 0x018], %f6 ! AX - fmovd %f2, %f34 ! FGA Group - ldd [%o1 + 0x020], %f8 ! MS - fmovd %f4, %f36 ! FGA Group - ldd [%o1 + 0x028], %f10 ! AX - membar #StoreStore ! MS - fmovd %f6, %f38 ! FGA Group - ldd [%o1 + 0x030], %f12 ! MS - fmovd %f8, %f40 ! FGA Group - ldd [%o1 + 0x038], %f14 ! AX - fmovd %f10, %f42 ! FGA Group - ldd [%o1 + 0x040], %f16 ! MS -1: ldd [%o1 + 0x048], %f2 ! AX (Group) - fmovd %f12, %f44 ! FGA - ldd [%o1 + 0x050], %f4 ! MS - fmovd %f14, %f46 ! FGA Group - stda %f32, [%o0] ASI_BLK_P ! MS - ldd [%o1 + 0x058], %f6 ! AX - fmovd %f16, %f32 ! FGA Group (8-cycle stall) - ldd [%o1 + 0x060], %f8 ! MS - fmovd %f2, %f34 ! FGA Group - ldd [%o1 + 0x068], %f10 ! AX - fmovd %f4, %f36 ! FGA Group - ldd [%o1 + 0x070], %f12 ! MS - fmovd %f6, %f38 ! FGA Group - ldd [%o1 + 0x078], %f14 ! AX - fmovd %f8, %f40 ! FGA Group - ldd [%o1 + 0x080], %f16 ! AX - prefetch [%o1 + 0x180], #one_read ! MS - fmovd %f10, %f42 ! FGA Group - subcc %o2, 1, %o2 ! A0 - add %o0, 0x40, %o0 ! A1 - bne,pt %xcc, 1b ! BR - add %o1, 0x40, %o1 ! A0 Group - - mov 5, %o2 ! A0 Group -1: ldd [%o1 + 0x048], %f2 ! AX - fmovd %f12, %f44 ! FGA - ldd [%o1 + 0x050], %f4 ! MS - fmovd %f14, %f46 ! FGA Group - stda %f32, [%o0] ASI_BLK_P ! MS - ldd [%o1 + 0x058], %f6 ! AX - fmovd %f16, %f32 ! FGA Group (8-cycle stall) - ldd [%o1 + 0x060], %f8 ! MS - fmovd %f2, %f34 ! FGA Group - ldd [%o1 + 0x068], %f10 ! AX - fmovd %f4, %f36 ! FGA Group - ldd [%o1 + 0x070], %f12 ! MS - fmovd %f6, %f38 ! FGA Group - ldd [%o1 + 0x078], %f14 ! AX - fmovd %f8, %f40 ! FGA Group - ldd [%o1 + 0x080], %f16 ! MS - fmovd %f10, %f42 ! FGA Group - subcc %o2, 1, %o2 ! A0 - add %o0, 0x40, %o0 ! A1 - bne,pt %xcc, 1b ! BR - add %o1, 0x40, %o1 ! A0 Group - - ldd [%o1 + 0x048], %f2 ! AX - fmovd %f12, %f44 ! FGA - ldd [%o1 + 0x050], %f4 ! MS - fmovd %f14, %f46 ! FGA Group - stda %f32, [%o0] ASI_BLK_P ! MS - ldd [%o1 + 0x058], %f6 ! AX - fmovd %f16, %f32 ! FGA Group (8-cycle stall) - ldd [%o1 + 0x060], %f8 ! MS - fmovd %f2, %f34 ! FGA Group - ldd [%o1 + 0x068], %f10 ! AX - fmovd %f4, %f36 ! FGA Group - ldd [%o1 + 0x070], %f12 ! MS - fmovd %f6, %f38 ! FGA Group - add %o0, 0x40, %o0 ! A0 - ldd [%o1 + 0x078], %f14 ! AX - fmovd %f8, %f40 ! FGA Group - fmovd %f10, %f42 ! FGA Group - fmovd %f12, %f44 ! FGA Group - fmovd %f14, %f46 ! FGA Group - stda %f32, [%o0] ASI_BLK_P ! MS - .globl cheetah_copy_user_page_nop_2_3 -cheetah_copy_user_page_nop_2_3: - mov PRIMARY_CONTEXT, %o2 - stxa %g0, [%o2] ASI_DMMU ! Flush P-cache - stxa %g3, [%g0] ASI_DCU_CONTROL_REG ! Disable P-cache - ba,a,pt %xcc, copy_user_page_continue - -spitfire_copy_user_page: - ldda [%o1] ASI_BLK_P, %f0 - add %o1, 0x40, %o1 - ldda [%o1] ASI_BLK_P, %f16 - add %o1, 0x40, %o1 - sethi %hi(PAGE_SIZE), %o2 -1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) - ldda [%o1] ASI_BLK_P, %f32 - stda %f48, [%o0] ASI_BLK_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) - ldda [%o1] ASI_BLK_P, %f0 - stda %f48, [%o0] ASI_BLK_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - TOUCH(f32, f34, f36, f38, f40, f42, f44, f46) - ldda [%o1] ASI_BLK_P, %f16 - stda %f48, [%o0] ASI_BLK_P - sub %o2, 0x40, %o2 - add %o1, 0x40, %o1 - cmp %o2, PAGE_SIZE_REM - bne,pt %xcc, 1b - add %o0, 0x40, %o0 -#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) - TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) - ldda [%o1] ASI_BLK_P, %f32 - stda %f48, [%o0] ASI_BLK_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) - ldda [%o1] ASI_BLK_P, %f0 - stda %f48, [%o0] ASI_BLK_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - membar #Sync - stda %f32, [%o0] ASI_BLK_P - add %o0, 0x40, %o0 - stda %f0, [%o0] ASI_BLK_P -#else - membar #Sync - stda %f0, [%o0] ASI_BLK_P - add %o0, 0x40, %o0 - stda %f16, [%o0] ASI_BLK_P -#endif -copy_user_page_continue: - membar #Sync - VISExit - - mov TLB_TAG_ACCESS, %o2 - stxa %g5, [%o2] ASI_DMMU - stxa %g7, [%o3] ASI_DTLB_DATA_ACCESS - membar #Sync - sub %o3, (TLBTEMP_ENTSZ), %o3 - stxa %o4, [%o2] ASI_DMMU - stxa %o5, [%o3] ASI_DTLB_DATA_ACCESS - membar #Sync - rdpr %pstate, %g3 - jmpl %o7 + 0x8, %g0 - wrpr %g3, PSTATE_IE, %pstate - -copy_page_using_blkcommit: - membar #LoadStore | #StoreStore | #StoreLoad - ldda [%o1] ASI_BLK_P, %f0 - add %o1, 0x40, %o1 - ldda [%o1] ASI_BLK_P, %f16 - add %o1, 0x40, %o1 - sethi %hi(PAGE_SIZE), %o2 -1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) - ldda [%o1] ASI_BLK_P, %f32 - stda %f48, [%o0] ASI_BLK_COMMIT_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) - ldda [%o1] ASI_BLK_P, %f0 - stda %f48, [%o0] ASI_BLK_COMMIT_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - TOUCH(f32, f34, f36, f38, f40, f42, f44, f46) - ldda [%o1] ASI_BLK_P, %f16 - stda %f48, [%o0] ASI_BLK_COMMIT_P - sub %o2, 0x40, %o2 - add %o1, 0x40, %o1 - cmp %o2, PAGE_SIZE_REM - bne,pt %xcc, 1b - add %o0, 0x40, %o0 -#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) - TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) - ldda [%o1] ASI_BLK_P, %f32 - stda %f48, [%o0] ASI_BLK_COMMIT_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) - ldda [%o1] ASI_BLK_P, %f0 - stda %f48, [%o0] ASI_BLK_COMMIT_P - add %o1, 0x40, %o1 - sub %o2, 0x40, %o2 - add %o0, 0x40, %o0 - membar #Sync - stda %f32, [%o0] ASI_BLK_COMMIT_P - add %o0, 0x40, %o0 - ba,pt %xcc, copy_user_page_continue - stda %f0, [%o0] ASI_BLK_COMMIT_P -#else - membar #Sync - stda %f0, [%o0] ASI_BLK_COMMIT_P - add %o0, 0x40, %o0 - ba,pt %xcc, copy_user_page_continue - stda %f16, [%o0] ASI_BLK_COMMIT_P -#endif - - .globl cheetah_patch_pgcopyops -cheetah_patch_pgcopyops: - sethi %hi(FIX_INSN_1), %g1 - or %g1, %lo(FIX_INSN_1), %g1 - sethi %hi(cheetah_patch_1), %g2 - or %g2, %lo(cheetah_patch_1), %g2 - stw %g1, [%g2] - flush %g2 - retl - nop - -#undef FIX_INSN_1 -#undef PAGE_SIZE_REM diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S new file mode 100644 index 00000000000000..69af6a7f074236 --- /dev/null +++ b/arch/sparc64/lib/copy_page.S @@ -0,0 +1,239 @@ +/* clear_page.S: UltraSparc optimized copy page. + * + * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com) + * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) + */ + +#include <asm/visasm.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/spitfire.h> +#include <asm/head.h> + + /* What we used to do was lock a TLB entry into a specific + * TLB slot, clear the page with interrupts disabled, then + * restore the original TLB entry. This was great for + * disturbing the TLB as little as possible, but it meant + * we had to keep interrupts disabled for a long time. + * + * Now, we simply use the normal TLB loading mechanism, + * and this makes the cpu choose a slot all by itself. + * Then we do a normal TLB flush on exit. We need only + * disable preemption during the clear. + */ + +#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) +#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) +#define DCACHE_SIZE (PAGE_SIZE * 2) + +#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) +#define PAGE_SIZE_REM 0x80 +#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) +#define PAGE_SIZE_REM 0x100 +#else +#error Wrong PAGE_SHIFT specified +#endif + +#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \ + fmovd %reg0, %f48; fmovd %reg1, %f50; \ + fmovd %reg2, %f52; fmovd %reg3, %f54; \ + fmovd %reg4, %f56; fmovd %reg5, %f58; \ + fmovd %reg6, %f60; fmovd %reg7, %f62; + + .text + + .align 32 + .globl copy_user_page +copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + lduw [%g6 + TI_PRE_COUNT], %o4 + sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_SIZE), %o3 + + sllx %g2, 32, %g2 + sethi %uhi(TTE_BITS_TOP), %g3 + + sllx %g3, 32, %g3 + sub %o0, %g2, %g1 ! dest paddr + + sub %o1, %g2, %g2 ! src paddr + or %g3, TTE_BITS_BOTTOM, %g3 + + and %o2, %o3, %o0 ! vaddr D-cache alias bit + or %g1, %g3, %g1 ! dest TTE data + + or %g2, %g3, %g2 ! src TTE data + sethi %hi(TLBTEMP_BASE), %o3 + + sethi %hi(DCACHE_SIZE), %o1 + add %o0, %o3, %o0 ! dest TTE vaddr + + add %o4, 1, %o2 + add %o0, %o1, %o1 ! src TTE vaddr + + /* Disable preemption. */ + mov TLB_TAG_ACCESS, %g3 + stw %o2, [%g6 + TI_PRE_COUNT] + + /* Load TLB entries. */ + rdpr %pstate, %o2 + wrpr %o2, PSTATE_IE, %pstate + stxa %o0, [%g3] ASI_DMMU + stxa %g1, [%g0] ASI_DTLB_DATA_IN + membar #Sync + stxa %o1, [%g3] ASI_DMMU + stxa %g2, [%g0] ASI_DTLB_DATA_IN + membar #Sync + wrpr %o2, 0x0, %pstate + + BRANCH_IF_ANY_CHEETAH(g3,o2,1f) + ba,pt %xcc, 9f + nop + +1: + VISEntryHalf + membar #StoreLoad | #StoreStore | #LoadStore + sethi %hi((PAGE_SIZE/64)-2), %o2 + mov %o0, %g1 + prefetch [%o1 + 0x000], #one_read + or %o2, %lo((PAGE_SIZE/64)-2), %o2 + prefetch [%o1 + 0x040], #one_read + prefetch [%o1 + 0x080], #one_read + prefetch [%o1 + 0x0c0], #one_read + ldd [%o1 + 0x000], %f0 + prefetch [%o1 + 0x100], #one_read + ldd [%o1 + 0x008], %f2 + prefetch [%o1 + 0x140], #one_read + ldd [%o1 + 0x010], %f4 + prefetch [%o1 + 0x180], #one_read + fmovd %f0, %f16 + ldd [%o1 + 0x018], %f6 + fmovd %f2, %f18 + ldd [%o1 + 0x020], %f8 + fmovd %f4, %f20 + ldd [%o1 + 0x028], %f10 + fmovd %f6, %f22 + ldd [%o1 + 0x030], %f12 + fmovd %f8, %f24 + ldd [%o1 + 0x038], %f14 + fmovd %f10, %f26 + ldd [%o1 + 0x040], %f0 +1: ldd [%o1 + 0x048], %f2 + fmovd %f12, %f28 + ldd [%o1 + 0x050], %f4 + fmovd %f14, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x058], %f6 + fmovd %f0, %f16 + ldd [%o1 + 0x060], %f8 + fmovd %f2, %f18 + ldd [%o1 + 0x068], %f10 + fmovd %f4, %f20 + ldd [%o1 + 0x070], %f12 + fmovd %f6, %f22 + ldd [%o1 + 0x078], %f14 + fmovd %f8, %f24 + ldd [%o1 + 0x080], %f0 + prefetch [%o1 + 0x180], #one_read + fmovd %f10, %f26 + subcc %o2, 1, %o2 + add %o0, 0x40, %o0 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ldd [%o1 + 0x048], %f2 + fmovd %f12, %f28 + ldd [%o1 + 0x050], %f4 + fmovd %f14, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x058], %f6 + fmovd %f0, %f16 + ldd [%o1 + 0x060], %f8 + fmovd %f2, %f18 + ldd [%o1 + 0x068], %f10 + fmovd %f4, %f20 + ldd [%o1 + 0x070], %f12 + fmovd %f6, %f22 + add %o0, 0x40, %o0 + ldd [%o1 + 0x078], %f14 + fmovd %f8, %f24 + fmovd %f10, %f26 + fmovd %f12, %f28 + fmovd %f14, %f30 + stda %f16, [%o0] ASI_BLK_P + membar #Sync + VISExitHalf + ba,pt %xcc, 5f + nop + +9: + ldx [%g6 + TI_FLAGS], %g3 + andcc %g3, _TIF_BLKCOMMIT, %g0 + rd %asi, %g3 + be,a,pt %icc, 1f + wr %g0, ASI_BLK_P, %asi + wr %g0, ASI_BLK_COMMIT_P, %asi +1: VISEntry + mov %o0, %g1 + ldda [%o1] ASI_BLK_P, %f0 + add %o1, 0x40, %o1 + ldda [%o1] ASI_BLK_P, %f16 + add %o1, 0x40, %o1 + sethi %hi(PAGE_SIZE), %o2 +1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) + ldda [%o1] ASI_BLK_P, %f32 + stda %f48, [%o0] %asi + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + add %o0, 0x40, %o0 + TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) + ldda [%o1] ASI_BLK_P, %f0 + stda %f48, [%o0] %asi + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + add %o0, 0x40, %o0 + TOUCH(f32, f34, f36, f38, f40, f42, f44, f46) + ldda [%o1] ASI_BLK_P, %f16 + stda %f48, [%o0] %asi + sub %o2, 0x40, %o2 + add %o1, 0x40, %o1 + cmp %o2, PAGE_SIZE_REM + bne,pt %xcc, 1b + add %o0, 0x40, %o0 +#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) + TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) + ldda [%o1] ASI_BLK_P, %f32 + stda %f48, [%o0] %asi + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + add %o0, 0x40, %o0 + TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) + ldda [%o1] ASI_BLK_P, %f0 + stda %f48, [%o0] %asi + add %o1, 0x40, %o1 + sub %o2, 0x40, %o2 + add %o0, 0x40, %o0 + membar #Sync + stda %f32, [%o0] %asi + add %o0, 0x40, %o0 + stda %f0, [%o0] %asi +#else + membar #Sync + stda %f0, [%o0] %asi + add %o0, 0x40, %o0 + stda %f16, [%o0] %asi +#endif + membar #Sync + wr %g3, 0x0, %asi + VISExit + +5: + stxa %g0, [%g1] ASI_DMMU_DEMAP + membar #Sync + + sethi %hi(DCACHE_SIZE), %g2 + stxa %g0, [%g1 + %g2] ASI_DMMU_DEMAP + membar #Sync + + retl + stw %o4, [%g6 + TI_PRE_COUNT] |