diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/i386_ksyms.c x/arch/i386/kernel/i386_ksyms.c --- x-ref/arch/i386/kernel/i386_ksyms.c 2003-06-13 22:07:23.000000000 +0200 +++ x/arch/i386/kernel/i386_ksyms.c 2003-07-17 06:19:34.000000000 +0200 @@ -107,6 +107,12 @@ EXPORT_SYMBOL(__generic_copy_from_user); EXPORT_SYMBOL(__generic_copy_to_user); EXPORT_SYMBOL(strnlen_user); +#ifdef CONFIG_X86_INTEL_USERCOPY +EXPORT_SYMBOL(movsl_mask); +EXPORT_SYMBOL(__copy_user_int); +EXPORT_SYMBOL(__copy_user_zeroing_int); +#endif + EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/setup.c x/arch/i386/kernel/setup.c --- x-ref/arch/i386/kernel/setup.c 2003-07-17 06:16:35.000000000 +0200 +++ x/arch/i386/kernel/setup.c 2003-07-17 06:19:34.000000000 +0200 @@ -156,6 +156,13 @@ unsigned long pci_mem_start = 0x10000000 static unsigned int highmem_pages __initdata = -1; /* + * Alignment at which movsl is preferred for bulk memory copies + */ +#ifdef CONFIG_X86_INTEL_USERCOPY +struct movsl_mask movsl_mask; +#endif + +/* * Setup options */ struct drive_info_struct { char dummy[32]; } drive_info; @@ -2451,6 +2458,23 @@ static void __init init_intel(struct cpu } #endif +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested*/ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 7: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif } void __init get_cpu_vendor(struct cpuinfo_x86 *c) diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/lib/usercopy.c x/arch/i386/lib/usercopy.c --- x-ref/arch/i386/lib/usercopy.c 2003-06-13 22:07:23.000000000 +0200 +++ x/arch/i386/lib/usercopy.c 2003-07-17 06:19:34.000000000 +0200 @@ -45,8 +45,12 @@ unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) { prefetch(from); - if (access_ok(VERIFY_WRITE, to, n)) - __copy_user(to,from,n); + if (access_ok(VERIFY_WRITE, to, n)) { + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_int(to, from, n); + } return n; } @@ -54,10 +58,14 @@ unsigned long __generic_copy_from_user(void *to, const void *from, unsigned long n) { prefetchw(to); - if (access_ok(VERIFY_READ, from, n)) - __copy_user_zeroing(to,from,n); - else - memset(to, 0, n); + if (access_ok(VERIFY_READ, from, n)) { + if (movsl_is_ok(to, from, n)) + __copy_user_zeroing(to,from,n); + else + n = __copy_user_zeroing_int(to, from, n); + } else { + memset(to, 0, n); + } return n; } @@ -252,3 +260,194 @@ long strnlen_user(const char *s, long n) :"cc"); return res & mask; } + +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Copy To/From Userspace + */ + +/* Generic arbitrary sized copy. */ +unsigned long __copy_user_int(void *to, const void *from,unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 1f\n" + " movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "1: movl 0(%4), %%eax\n" + " movl 4(%4), %%edx\n" + "2: movl %%eax, 0(%3)\n" + "21: movl %%edx, 4(%3)\n" + " movl 8(%4), %%eax\n" + " movl 12(%4),%%edx\n" + "3: movl %%eax, 8(%3)\n" + "31: movl %%edx, 12(%3)\n" + " movl 16(%4), %%eax\n" + " movl 20(%4), %%edx\n" + "4: movl %%eax, 16(%3)\n" + "41: movl %%edx, 20(%3)\n" + " movl 24(%4), %%eax\n" + " movl 28(%4), %%edx\n" + "10: movl %%eax, 24(%3)\n" + "51: movl %%edx, 28(%3)\n" + " movl 32(%4), %%eax\n" + " movl 36(%4), %%edx\n" + "11: movl %%eax, 32(%3)\n" + "61: movl %%edx, 36(%3)\n" + " movl 40(%4), %%eax\n" + " movl 44(%4), %%edx\n" + "12: movl %%eax, 40(%3)\n" + "71: movl %%edx, 44(%3)\n" + " movl 48(%4), %%eax\n" + " movl 52(%4), %%edx\n" + "13: movl %%eax, 48(%3)\n" + "81: movl %%edx, 52(%3)\n" + " movl 56(%4), %%eax\n" + " movl 60(%4), %%edx\n" + "14: movl %%eax, 56(%3)\n" + "91: movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax, %0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 2b,8b\n" + " .long 21b,8b\n" + " .long 3b,8b\n" + " .long 31b,8b\n" + " .long 4b,8b\n" + " .long 41b,8b\n" + " .long 10b,8b\n" + " .long 51b,8b\n" + " .long 11b,8b\n" + " .long 61b,8b\n" + " .long 12b,8b\n" + " .long 71b,8b\n" + " .long 13b,8b\n" + " .long 81b,8b\n" + " .long 14b,8b\n" + " .long 91b,8b\n" + " .long 6b,9b\n" + " .long 7b,8b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + + return size; +} + +unsigned long +__copy_user_zeroing_int(void *to, const void *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movl %%eax, 0(%3)\n" + " movl %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movl %%eax, 8(%3)\n" + " movl %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movl %%eax, 16(%3)\n" + " movl %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movl %%eax, 24(%3)\n" + " movl %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movl %%eax, 32(%3)\n" + " movl %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movl %%eax, 40(%3)\n" + " movl %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movl %%eax, 48(%3)\n" + " movl %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movl %%eax, 56(%3)\n" + " movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} +#endif /* CONFIG_X86_INTEL_USERCOPY */ + + diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-i386/uaccess.h x/include/asm-i386/uaccess.h --- x-ref/include/asm-i386/uaccess.h 2003-06-26 01:02:16.000000000 +0200 +++ x/include/asm-i386/uaccess.h 2003-07-17 06:20:53.000000000 +0200 @@ -31,6 +31,33 @@ #define set_fs(x) (current->addr_limit = (x)) #define segment_eq(a,b) ((a).seg == (b).seg) +/* + * * movsl can be slow when source and dest are not both 8-byte aligned + * */ + +#if defined(CONFIG_M586MMX) || defined(CONFIG_M686) || \ + defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUM4) +#define CONFIG_X86_INTEL_USERCOPY +#endif + +#ifdef CONFIG_X86_INTEL_USERCOPY + extern struct movsl_mask { + int mask; + } ____cacheline_aligned_in_smp movsl_mask; +#endif + +static inline int movsl_is_ok(const void *a1, const void *a2, unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if (n >= 64 && (((const long)a1 ^ (const long)a2) & movsl_mask.mask)) + return 0; +#endif + return 1; +} +/* These are undefined on !CONFIG_X86_INTEL_USERCOPY. And they should be unrerefenced. */ +extern unsigned long __copy_user_int(void *, const void *, unsigned long); +extern unsigned long __copy_user_zeroing_int(void *, const void *, unsigned long); + extern int __verify_write(const void *, unsigned long); @@ -427,18 +454,23 @@ do { \ static inline unsigned long __generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) { - __copy_user_zeroing(to,from,n); - return n; + if (movsl_is_ok(to, from, n)) + __copy_user_zeroing(to, from, n); + else + n = __copy_user_zeroing_int(to, from, n); + return n; } static inline unsigned long __generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) { - __copy_user(to,from,n); - return n; + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_int(to, from, n); + return n; } - /* Optimize just a little bit when we know the size of the move. */ #define __constant_copy_user(to, from, size) \ do { \ @@ -709,9 +741,7 @@ __constant_copy_from_user_nocheck(void * * On success, this will be zero. */ #define copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user((to),(from),(n)) : \ - __generic_copy_to_user((to),(from),(n))) + __generic_copy_to_user((to),(from),(n)) /** * copy_from_user: - Copy a block of data from user space. @@ -730,9 +760,7 @@ __constant_copy_from_user_nocheck(void * * data to the requested size using zero bytes. */ #define copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user((to),(from),(n)) : \ - __generic_copy_from_user((to),(from),(n))) + __generic_copy_from_user((to),(from),(n)) /** * __copy_to_user: - Copy a block of data into user space, with less checking. @@ -749,9 +777,7 @@ __constant_copy_from_user_nocheck(void * * On success, this will be zero. */ #define __copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user_nocheck((to),(from),(n)) : \ - __generic_copy_to_user_nocheck((to),(from),(n))) + __generic_copy_to_user_nocheck((to),(from),(n)) /** * __copy_from_user: - Copy a block of data from user space, with less checking. @@ -771,9 +797,7 @@ __constant_copy_from_user_nocheck(void * * data to the requested size using zero bytes. */ #define __copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user_nocheck((to),(from),(n)) : \ - __generic_copy_from_user_nocheck((to),(from),(n))) + __generic_copy_from_user_nocheck((to),(from),(n)) long strncpy_from_user(char *dst, const char *src, long count); long __strncpy_from_user(char *dst, const char *src, long count);