/* checksum.S: Sparc V9 optimized checksum code. * * Copyright(C) 1995 Linus Torvalds * Copyright(C) 1995 Miguel de Icaza * Copyright(C) 1996, 2000 David S. Miller * Copyright(C) 1997 Jakub Jelinek * * derived from: * Linux/Alpha checksum c-code * Linux/ix86 inline checksum assembly * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) * David Mosberger-Tang for optimized reference c-code * BSD4.4 portable checksum routine */ #include #include #include #include #include #include /* The problem with the "add with carry" instructions on Ultra * are two fold. Firstly, they cannot pair with jack shit, * and also they only add in the 32-bit carry condition bit * into the accumulated sum. The following is much better. * For larger chunks we use VIS code, which is faster ;) */ #define src o0 #define dst o1 #define len o2 #define sum o3 .text /* I think I have an erection... Once _AGAIN_ the SunSoft * engineers are caught asleep at the keyboard, tsk tsk... */ #define CSUMCOPY_LASTCHUNK(off, t0, t1) \ ldxa [%src - off - 0x08] %asi, t0; \ ldxa [%src - off - 0x00] %asi, t1; \ nop; nop; \ addcc t0, %sum, %sum; \ stw t0, [%dst - off - 0x04]; \ srlx t0, 32, t0; \ bcc,pt %xcc, 51f; \ stw t0, [%dst - off - 0x08]; \ add %sum, 1, %sum; \ 51: addcc t1, %sum, %sum; \ stw t1, [%dst - off + 0x04]; \ srlx t1, 32, t1; \ bcc,pt %xcc, 52f; \ stw t1, [%dst - off - 0x00]; \ add %sum, 1, %sum; \ 52: cpc_start: cc_end_cruft: andcc %g7, 8, %g0 ! IEU1 Group be,pn %icc, 1f ! CTI and %g7, 4, %g5 ! IEU0 ldxa [%src + 0x00] %asi, %g2 ! Load Group add %dst, 8, %dst ! IEU0 add %src, 8, %src ! IEU1 addcc %g2, %sum, %sum ! IEU1 Group + 2 bubbles stw %g2, [%dst - 0x04] ! Store srlx %g2, 32, %g2 ! IEU0 bcc,pt %xcc, 1f ! CTI Group stw %g2, [%dst - 0x08] ! Store add %sum, 1, %sum ! IEU0 1: brz,pt %g5, 1f ! CTI Group clr %g2 ! IEU0 lduwa [%src + 0x00] %asi, %g2 ! Load add %dst, 4, %dst ! IEU0 Group add %src, 4, %src ! IEU1 stw %g2, [%dst - 0x04] ! Store Group + 2 bubbles sllx %g2, 32, %g2 ! IEU0 1: andcc %g7, 2, %g0 ! IEU1 be,pn %icc, 1f ! CTI Group clr %o4 ! IEU1 lduha [%src + 0x00] %asi, %o4 ! Load add %src, 2, %src ! IEU0 Group add %dst, 2, %dst ! IEU1 sth %o4, [%dst - 0x2] ! Store Group + 2 bubbles sll %o4, 16, %o4 ! IEU0 1: andcc %g7, 1, %g0 ! IEU1 be,pn %icc, 1f ! CTI Group clr %o5 ! IEU0 lduba [%src + 0x00] %asi, %o5 ! Load stb %o5, [%dst + 0x00] ! Store Group + 2 bubbles sll %o5, 8, %o5 ! IEU0 1: or %g2, %o4, %o4 ! IEU1 or %o5, %o4, %o4 ! IEU0 Group addcc %o4, %sum, %sum ! IEU1 bcc,pt %xcc, ccfold ! CTI sethi %uhi(PAGE_OFFSET), %g4 ! IEU0 Group b,pt %xcc, ccfold ! CTI add %sum, 1, %sum ! IEU1 cc_fixit: cmp %len, 6 ! IEU1 Group bl,a,pn %icc, ccte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group andcc %src, 2, %g0 ! IEU1 Group be,pn %icc, 1f ! CTI andcc %src, 0x4, %g0 ! IEU1 Group lduha [%src + 0x00] %asi, %g4 ! Load sub %len, 2, %len ! IEU0 add %src, 2, %src ! IEU0 Group add %dst, 2, %dst ! IEU1 sll %g4, 16, %g3 ! IEU0 Group + 1 bubble addcc %g3, %sum, %sum ! IEU1 bcc,pt %xcc, 0f ! CTI srl %sum, 16, %g3 ! IEU0 Group add %g3, 1, %g3 ! IEU0 4 clocks (mispredict) 0: andcc %src, 0x4, %g0 ! IEU1 Group sth %g4, [%dst - 0x2] ! Store sll %sum, 16, %sum ! IEU0 sll %g3, 16, %g3 ! IEU0 Group srl %sum, 16, %sum ! IEU0 Group or %g3, %sum, %sum ! IEU0 Group (regdep) 1: be,pt %icc, ccmerge ! CTI andcc %len, 0xf0, %g1 ! IEU1 lduwa [%src + 0x00] %asi, %g4 ! Load Group sub %len, 4, %len ! IEU0 add %src, 4, %src ! IEU1 add %dst, 4, %dst ! IEU0 Group addcc %g4, %sum, %sum ! IEU1 Group + 1 bubble stw %g4, [%dst - 0x4] ! Store bcc,pt %xcc, ccmerge ! CTI andcc %len, 0xf0, %g1 ! IEU1 Group b,pt %xcc, ccmerge ! CTI 4 clocks (mispredict) add %sum, 1, %sum ! IEU0 .align 32 .globl csum_partial_copy_sparc64 csum_partial_copy_sparc64: /* %o0=src, %o1=dest, %o2=len, %o3=sum */ xorcc %src, %dst, %o4 ! IEU1 Group srl %sum, 0, %sum ! IEU0 andcc %o4, 3, %g0 ! IEU1 Group srl %len, 0, %len ! IEU0 bne,pn %icc, ccslow ! CTI andcc %src, 1, %g0 ! IEU1 Group bne,pn %icc, ccslow ! CTI cmp %len, 256 ! IEU1 Group bgeu,pt %icc, csum_partial_copy_vis ! CTI andcc %src, 7, %g0 ! IEU1 Group bne,pn %icc, cc_fixit ! CTI andcc %len, 0xf0, %g1 ! IEU1 Group ccmerge:be,pn %icc, ccte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group sll %g1, 2, %o4 ! IEU0 13: sethi %hi(12f), %o5 ! IEU0 Group add %src, %g1, %src ! IEU1 sub %o5, %o4, %o5 ! IEU0 Group jmpl %o5 + %lo(12f), %g0 ! CTI Group brk forced add %dst, %g1, %dst ! IEU0 Group cctbl: CSUMCOPY_LASTCHUNK(0xe8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xd8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xc8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xb8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xa8,%g2,%g3) CSUMCOPY_LASTCHUNK(0x98,%g2,%g3) CSUMCOPY_LASTCHUNK(0x88,%g2,%g3) CSUMCOPY_LASTCHUNK(0x78,%g2,%g3) CSUMCOPY_LASTCHUNK(0x68,%g2,%g3) CSUMCOPY_LASTCHUNK(0x58,%g2,%g3) CSUMCOPY_LASTCHUNK(0x48,%g2,%g3) CSUMCOPY_LASTCHUNK(0x38,%g2,%g3) CSUMCOPY_LASTCHUNK(0x28,%g2,%g3) CSUMCOPY_LASTCHUNK(0x18,%g2,%g3) CSUMCOPY_LASTCHUNK(0x08,%g2,%g3) 12: andcc %len, 0xf, %g7 ! IEU1 Group ccte: bne,pn %icc, cc_end_cruft ! CTI sethi %uhi(PAGE_OFFSET), %g4 ! IEU0 ccfold: sllx %sum, 32, %o0 ! IEU0 Group addcc %sum, %o0, %o0 ! IEU1 Group (regdep) srlx %o0, 32, %o0 ! IEU0 Group (regdep) bcs,a,pn %xcc, 1f ! CTI add %o0, 1, %o0 ! IEU1 4 clocks (mispredict) 1: retl ! CTI Group brk forced sllx %g4, 32, %g4 ! IEU0 Group ccslow: mov 0, %g5 brlez,pn %len, 4f andcc %src, 1, %o5 be,a,pt %icc, 1f srl %len, 1, %g7 sub %len, 1, %len lduba [%src] %asi, %g5 add %src, 1, %src stb %g5, [%dst] srl %len, 1, %g7 add %dst, 1, %dst 1: brz,a,pn %g7, 3f andcc %len, 1, %g0 andcc %src, 2, %g0 be,a,pt %icc, 1f srl %g7, 1, %g7 lduha [%src] %asi, %o4 sub %len, 2, %len srl %o4, 8, %g2 sub %g7, 1, %g7 stb %g2, [%dst] add %o4, %g5, %g5 stb %o4, [%dst + 1] add %src, 2, %src srl %g7, 1, %g7 add %dst, 2, %dst 1: brz,a,pn %g7, 2f andcc %len, 2, %g0 lduwa [%src] %asi, %o4 5: srl %o4, 24, %g2 srl %o4, 16, %g3 stb %g2, [%dst] srl %o4, 8, %g2 stb %g3, [%dst + 1] add %src, 4, %src stb %g2, [%dst + 2] addcc %o4, %g5, %g5 stb %o4, [%dst + 3] addc %g5, %g0, %g5 add %dst, 4, %dst subcc %g7, 1, %g7 bne,a,pt %icc, 5b lduwa [%src] %asi, %o4 sll %g5, 16, %g2 srl %g5, 16, %g5 srl %g2, 16, %g2 andcc %len, 2, %g0 add %g2, %g5, %g5 2: be,a,pt %icc, 3f andcc %len, 1, %g0 lduha [%src] %asi, %o4 andcc %len, 1, %g0 srl %o4, 8, %g2 add %src, 2, %src stb %g2, [%dst] add %g5, %o4, %g5 stb %o4, [%dst + 1] add %dst, 2, %dst 3: be,a,pt %icc, 1f sll %g5, 16, %o4 lduba [%src] %asi, %g2 sll %g2, 8, %o4 stb %g2, [%dst] add %g5, %o4, %g5 sll %g5, 16, %o4 1: addcc %o4, %g5, %g5 srl %g5, 16, %o4 addc %g0, %o4, %g5 brz,pt %o5, 4f srl %g5, 8, %o4 and %g5, 0xff, %g2 and %o4, 0xff, %o4 sll %g2, 8, %g2 or %g2, %o4, %g5 4: addcc %sum, %g5, %sum addc %g0, %sum, %o0 retl srl %o0, 0, %o0 cpc_end: /* Now the version with userspace as the destination */ #define CSUMCOPY_LASTCHUNK_USER(off, t0, t1) \ ldx [%src - off - 0x08], t0; \ ldx [%src - off - 0x00], t1; \ nop; nop; \ addcc t0, %sum, %sum; \ stwa t0, [%dst - off - 0x04] %asi; \ srlx t0, 32, t0; \ bcc,pt %xcc, 51f; \ stwa t0, [%dst - off - 0x08] %asi; \ add %sum, 1, %sum; \ 51: addcc t1, %sum, %sum; \ stwa t1, [%dst - off + 0x04] %asi; \ srlx t1, 32, t1; \ bcc,pt %xcc, 52f; \ stwa t1, [%dst - off - 0x00] %asi; \ add %sum, 1, %sum; \ 52: cpc_user_start: cc_user_end_cruft: andcc %g7, 8, %g0 ! IEU1 Group be,pn %icc, 1f ! CTI and %g7, 4, %g5 ! IEU0 ldx [%src + 0x00], %g2 ! Load Group add %dst, 8, %dst ! IEU0 add %src, 8, %src ! IEU1 addcc %g2, %sum, %sum ! IEU1 Group + 2 bubbles stwa %g2, [%dst - 0x04] %asi ! Store srlx %g2, 32, %g2 ! IEU0 bcc,pt %xcc, 1f ! CTI Group stwa %g2, [%dst - 0x08] %asi ! Store add %sum, 1, %sum ! IEU0 1: brz,pt %g5, 1f ! CTI Group clr %g2 ! IEU0 lduw [%src + 0x00], %g2 ! Load add %dst, 4, %dst ! IEU0 Group add %src, 4, %src ! IEU1 stwa %g2, [%dst - 0x04] %asi ! Store Group + 2 bubbles sllx %g2, 32, %g2 ! IEU0 1: andcc %g7, 2, %g0 ! IEU1 be,pn %icc, 1f ! CTI Group clr %o4 ! IEU1 lduh [%src + 0x00], %o4 ! Load add %src, 2, %src ! IEU0 Group add %dst, 2, %dst ! IEU1 stha %o4, [%dst - 0x2] %asi ! Store Group + 2 bubbles sll %o4, 16, %o4 ! IEU0 1: andcc %g7, 1, %g0 ! IEU1 be,pn %icc, 1f ! CTI Group clr %o5 ! IEU0 ldub [%src + 0x00], %o5 ! Load stba %o5, [%dst + 0x00] %asi ! Store Group + 2 bubbles sll %o5, 8, %o5 ! IEU0 1: or %g2, %o4, %o4 ! IEU1 or %o5, %o4, %o4 ! IEU0 Group addcc %o4, %sum, %sum ! IEU1 bcc,pt %xcc, ccuserfold ! CTI sethi %uhi(PAGE_OFFSET), %g4 ! IEU0 Group b,pt %xcc, ccuserfold ! CTI add %sum, 1, %sum ! IEU1 cc_user_fixit: cmp %len, 6 ! IEU1 Group bl,a,pn %icc, ccuserte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group andcc %src, 2, %g0 ! IEU1 Group be,pn %icc, 1f ! CTI andcc %src, 0x4, %g0 ! IEU1 Group lduh [%src + 0x00], %g4 ! Load sub %len, 2, %len ! IEU0 add %src, 2, %src ! IEU0 Group add %dst, 2, %dst ! IEU1 sll %g4, 16, %g3 ! IEU0 Group + 1 bubble addcc %g3, %sum, %sum ! IEU1 bcc,pt %xcc, 0f ! CTI srl %sum, 16, %g3 ! IEU0 Group add %g3, 1, %g3 ! IEU0 4 clocks (mispredict) 0: andcc %src, 0x4, %g0 ! IEU1 Group stha %g4, [%dst - 0x2] %asi ! Store sll %sum, 16, %sum ! IEU0 sll %g3, 16, %g3 ! IEU0 Group srl %sum, 16, %sum ! IEU0 Group or %g3, %sum, %sum ! IEU0 Group (regdep) 1: be,pt %icc, ccusermerge ! CTI andcc %len, 0xf0, %g1 ! IEU1 lduw [%src + 0x00], %g4 ! Load Group sub %len, 4, %len ! IEU0 add %src, 4, %src ! IEU1 add %dst, 4, %dst ! IEU0 Group addcc %g4, %sum, %sum ! IEU1 Group + 1 bubble stwa %g4, [%dst - 0x4] %asi ! Store bcc,pt %xcc, ccusermerge ! CTI andcc %len, 0xf0, %g1 ! IEU1 Group b,pt %xcc, ccusermerge ! CTI 4 clocks (mispredict) add %sum, 1, %sum ! IEU0 .align 32 .globl csum_partial_copy_user_sparc64 csum_partial_copy_user_sparc64: /* %o0=src, %o1=dest, %o2=len, %o3=sum */ xorcc %src, %dst, %o4 ! IEU1 Group srl %sum, 0, %sum ! IEU0 andcc %o4, 3, %g0 ! IEU1 Group srl %len, 0, %len ! IEU0 bne,pn %icc, ccuserslow ! CTI andcc %src, 1, %g0 ! IEU1 Group bne,pn %icc, ccuserslow ! CTI cmp %len, 256 ! IEU1 Group bgeu,pt %icc, csum_partial_copy_user_vis ! CTI andcc %src, 7, %g0 ! IEU1 Group bne,pn %icc, cc_user_fixit ! CTI andcc %len, 0xf0, %g1 ! IEU1 Group ccusermerge: be,pn %icc, ccuserte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group sll %g1, 2, %o4 ! IEU0 13: sethi %hi(12f), %o5 ! IEU0 Group add %src, %g1, %src ! IEU1 sub %o5, %o4, %o5 ! IEU0 Group jmpl %o5 + %lo(12f), %g0 ! CTI Group brk forced add %dst, %g1, %dst ! IEU0 Group ccusertbl: CSUMCOPY_LASTCHUNK_USER(0xe8,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0xd8,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0xc8,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0xb8,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0xa8,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x98,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x88,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x78,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x68,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x58,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x48,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x38,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x28,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x18,%g2,%g3) CSUMCOPY_LASTCHUNK_USER(0x08,%g2,%g3) 12: andcc %len, 0xf, %g7 ! IEU1 Group ccuserte: bne,pn %icc, cc_user_end_cruft ! CTI sethi %uhi(PAGE_OFFSET), %g4 ! IEU0 ccuserfold: sllx %sum, 32, %o0 ! IEU0 Group addcc %sum, %o0, %o0 ! IEU1 Group (regdep) srlx %o0, 32, %o0 ! IEU0 Group (regdep) bcs,a,pn %xcc, 1f ! CTI add %o0, 1, %o0 ! IEU1 4 clocks (mispredict) 1: retl ! CTI Group brk forced sllx %g4, 32, %g4 ! IEU0 Group ccuserslow: mov 0, %g5 brlez,pn %len, 4f andcc %src, 1, %o5 be,a,pt %icc, 1f srl %len, 1, %g7 sub %len, 1, %len ldub [%src], %g5 add %src, 1, %src stba %g5, [%dst] %asi srl %len, 1, %g7 add %dst, 1, %dst 1: brz,a,pn %g7, 3f andcc %len, 1, %g0 andcc %src, 2, %g0 be,a,pt %icc, 1f srl %g7, 1, %g7 lduh [%src], %o4 sub %len, 2, %len srl %o4, 8, %g2 sub %g7, 1, %g7 stba %g2, [%dst] %asi add %o4, %g5, %g5 stba %o4, [%dst + 1] %asi add %src, 2, %src srl %g7, 1, %g7 add %dst, 2, %dst 1: brz,a,pn %g7, 2f andcc %len, 2, %g0 lduw [%src], %o4 5: srl %o4, 24, %g2 srl %o4, 16, %g3 stba %g2, [%dst] %asi srl %o4, 8, %g2 stba %g3, [%dst + 1] %asi add %src, 4, %src stba %g2, [%dst + 2] %asi addcc %o4, %g5, %g5 stba %o4, [%dst + 3] %asi addc %g5, %g0, %g5 add %dst, 4, %dst subcc %g7, 1, %g7 bne,a,pt %icc, 5b lduw [%src], %o4 sll %g5, 16, %g2 srl %g5, 16, %g5 srl %g2, 16, %g2 andcc %len, 2, %g0 add %g2, %g5, %g5 2: be,a,pt %icc, 3f andcc %len, 1, %g0 lduh [%src], %o4 andcc %len, 1, %g0 srl %o4, 8, %g2 add %src, 2, %src stba %g2, [%dst] %asi add %g5, %o4, %g5 stba %o4, [%dst + 1] %asi add %dst, 2, %dst 3: be,a,pt %icc, 1f sll %g5, 16, %o4 ldub [%src], %g2 sll %g2, 8, %o4 stba %g2, [%dst] %asi add %g5, %o4, %g5 sll %g5, 16, %o4 1: addcc %o4, %g5, %g5 srl %g5, 16, %o4 addc %g0, %o4, %g5 brz,pt %o5, 4f srl %g5, 8, %o4 and %g5, 0xff, %g2 and %o4, 0xff, %o4 sll %g2, 8, %g2 or %g2, %o4, %g5 4: addcc %sum, %g5, %sum addc %g0, %sum, %o0 retl srl %o0, 0, %o0 cpc_user_end: .globl cpc_handler cpc_handler: ldx [%sp + 0x7ff + 128], %g1 ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %g3 sub %g0, EFAULT, %g2 brnz,a,pt %g1, 1f st %g2, [%g1] 1: sethi %uhi(PAGE_OFFSET), %g4 wr %g3, %g0, %asi retl sllx %g4, 32, %g4 .section __ex_table .align 4 .word cpc_start, 0, cpc_end, cpc_handler .word cpc_user_start, 0, cpc_user_end, cpc_handler