#ifndef _I386_STRING_H_ #define _I386_STRING_H_ #ifdef __KERNEL__ #include /* * On a 486 or Pentium, we are better off not using the * byte string operations. But on a 386 or a PPro the * byte string ops are faster than doing it by hand * (MUCH faster on a Pentium). * * Also, the byte strings actually work correctly. Forget * the i486 routines for now as they may be broken.. */ #if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486) #include #else /* * This string-include defines all string functions as inline * functions. Use gcc. It also assumes ds=es=data space, this should be * normal. Most of the string-functions are rather heavily hand-optimized, * see especially strtok,strstr,str[c]spn. They should work, but are not * very easy to understand. Everything is done entirely within the register * set, making the functions fast and clean. String instructions have been * used through-out, making for "slightly" unclear code :-) * * NO Copyright (C) 1991, 1992 Linus Torvalds, * consider these trivial functions to be PD. */ #define __HAVE_ARCH_STRCPY static inline char * strcpy(char * dest,const char *src) { int d0, d1, d2; __asm__ __volatile__( "1:\tlodsb\n\t" "stosb\n\t" "testb %%al,%%al\n\t" "jne 1b" : "=&S" (d0), "=&D" (d1), "=&a" (d2) :"0" (src),"1" (dest) : "memory"); return dest; } #define __HAVE_ARCH_STRNCPY static inline char * strncpy(char * dest,const char *src,size_t count) { int d0, d1, d2, d3; __asm__ __volatile__( "1:\tdecl %2\n\t" "js 2f\n\t" "lodsb\n\t" "stosb\n\t" "testb %%al,%%al\n\t" "jne 1b\n\t" "rep\n\t" "stosb\n" "2:" : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) :"0" (src),"1" (dest),"2" (count) : "memory"); return dest; } #define __HAVE_ARCH_STRCAT static inline char * strcat(char * dest,const char * src) { int d0, d1, d2, d3; __asm__ __volatile__( "repne\n\t" "scasb\n\t" "decl %1\n" "1:\tlodsb\n\t" "stosb\n\t" "testb %%al,%%al\n\t" "jne 1b" : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory"); return dest; } #define __HAVE_ARCH_STRNCAT static inline char * strncat(char * dest,const char * src,size_t count) { int d0, d1, d2, d3; __asm__ __volatile__( "repne\n\t" "scasb\n\t" "decl %1\n\t" "movl %8,%3\n" "1:\tdecl %3\n\t" "js 2f\n\t" "lodsb\n\t" "stosb\n\t" "testb %%al,%%al\n\t" "jne 1b\n" "2:\txorl %2,%2\n\t" "stosb" : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) : "memory"); return dest; } #define __HAVE_ARCH_STRCMP static inline int strcmp(const char * cs,const char * ct) { int d0, d1; register int __res; __asm__ __volatile__( "1:\tlodsb\n\t" "scasb\n\t" "jne 2f\n\t" "testb %%al,%%al\n\t" "jne 1b\n\t" "xorl %%eax,%%eax\n\t" "jmp 3f\n" "2:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "3:" :"=a" (__res), "=&S" (d0), "=&D" (d1) :"1" (cs),"2" (ct)); return __res; } #define __HAVE_ARCH_STRNCMP static inline int strncmp(const char * cs,const char * ct,size_t count) { register int __res; int d0, d1, d2; __asm__ __volatile__( "1:\tdecl %3\n\t" "js 2f\n\t" "lodsb\n\t" "scasb\n\t" "jne 3f\n\t" "testb %%al,%%al\n\t" "jne 1b\n" "2:\txorl %%eax,%%eax\n\t" "jmp 4f\n" "3:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "4:" :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) :"1" (cs),"2" (ct),"3" (count)); return __res; } #define __HAVE_ARCH_STRCHR static inline char * strchr(const char * s, int c) { int d0; register char * __res; __asm__ __volatile__( "movb %%al,%%ah\n" "1:\tlodsb\n\t" "cmpb %%ah,%%al\n\t" "je 2f\n\t" "testb %%al,%%al\n\t" "jne 1b\n\t" "movl $1,%1\n" "2:\tmovl %1,%0\n\t" "decl %0" :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); return __res; } #define __HAVE_ARCH_STRRCHR static inline char * strrchr(const char * s, int c) { int d0, d1; register char * __res; __asm__ __volatile__( "movb %%al,%%ah\n" "1:\tlodsb\n\t" "cmpb %%ah,%%al\n\t" "jne 2f\n\t" "leal -1(%%esi),%0\n" "2:\ttestb %%al,%%al\n\t" "jne 1b" :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); return __res; } #define __HAVE_ARCH_STRLEN static inline size_t strlen(const char * s) { int d0; register int __res; __asm__ __volatile__( "repne\n\t" "scasb\n\t" "notl %0\n\t" "decl %0" :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); return __res; } static inline void * __memcpy(void * to, const void * from, size_t n) { int d0, d1, d2; __asm__ __volatile__( "rep ; movsl\n\t" "testb $2,%b4\n\t" "je 1f\n\t" "movsw\n" "1:\ttestb $1,%b4\n\t" "je 2f\n\t" "movsb\n" "2:" : "=&c" (d0), "=&D" (d1), "=&S" (d2) :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) : "memory"); return (to); } /* * This looks horribly ugly, but the compiler can optimize it totally, * as the count is constant. */ static inline void * __constant_memcpy(void * to, const void * from, size_t n) { switch (n) { case 0: return to; case 1: *(unsigned char *)to = *(const unsigned char *)from; return to; case 2: *(unsigned short *)to = *(const unsigned short *)from; return to; case 3: *(unsigned short *)to = *(const unsigned short *)from; *(2+(unsigned char *)to) = *(2+(const unsigned char *)from); return to; case 4: *(unsigned long *)to = *(const unsigned long *)from; return to; case 6: /* for Ethernet addresses */ *(unsigned long *)to = *(const unsigned long *)from; *(2+(unsigned short *)to) = *(2+(const unsigned short *)from); return to; case 8: *(unsigned long *)to = *(const unsigned long *)from; *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); return to; case 12: *(unsigned long *)to = *(const unsigned long *)from; *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); return to; case 16: *(unsigned long *)to = *(const unsigned long *)from; *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); return to; case 20: *(unsigned long *)to = *(const unsigned long *)from; *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); *(4+(unsigned long *)to) = *(4+(const unsigned long *)from); return to; } #define COMMON(x) \ __asm__ __volatile__( \ "rep ; movsl" \ x \ : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ : "0" (n/4),"1" ((long) to),"2" ((long) from) \ : "memory"); { int d0, d1, d2; switch (n % 4) { case 0: COMMON(""); return to; case 1: COMMON("\n\tmovsb"); return to; case 2: COMMON("\n\tmovsw"); return to; default: COMMON("\n\tmovsw\n\tmovsb"); return to; } } #undef COMMON } #define __HAVE_ARCH_MEMCPY #ifdef CONFIG_X86_USE_3DNOW #include /* * This CPU favours 3DNow strongly (eg AMD Athlon) */ static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) { if (len < 512) return __constant_memcpy(to, from, len); return _mmx_memcpy(to, from, len); } static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __memcpy(to, from, len); return _mmx_memcpy(to, from, len); } #define memcpy(t, f, n) \ (__builtin_constant_p(n) ? \ __constant_memcpy3d((t),(f),(n)) : \ __memcpy3d((t),(f),(n))) #else /* * No 3D Now! */ #define memcpy(t, f, n) \ (__builtin_constant_p(n) ? \ __constant_memcpy((t),(f),(n)) : \ __memcpy((t),(f),(n))) #endif /* * struct_cpy(x,y), copy structure *x into (matching structure) *y. * * We get link-time errors if the structure sizes do not match. * There is no runtime overhead, it's all optimized away at * compile time. */ extern void __struct_cpy_bug (void); #define struct_cpy(x,y) \ ({ \ if (sizeof(*(x)) != sizeof(*(y))) \ __struct_cpy_bug; \ memcpy(x, y, sizeof(*(x))); \ }) #define __HAVE_ARCH_MEMMOVE static inline void * memmove(void * dest,const void * src, size_t n) { int d0, d1, d2; if (dest