diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/i386_ksyms.c x/arch/i386/kernel/i386_ksyms.c
--- x-ref/arch/i386/kernel/i386_ksyms.c	2003-06-13 22:07:23.000000000 +0200
+++ x/arch/i386/kernel/i386_ksyms.c	2003-07-17 06:19:34.000000000 +0200
@@ -107,6 +107,12 @@ EXPORT_SYMBOL(__generic_copy_from_user);
 EXPORT_SYMBOL(__generic_copy_to_user);
 EXPORT_SYMBOL(strnlen_user);
 
+#ifdef CONFIG_X86_INTEL_USERCOPY
+EXPORT_SYMBOL(movsl_mask);
+EXPORT_SYMBOL(__copy_user_int);
+EXPORT_SYMBOL(__copy_user_zeroing_int);
+#endif
+
 EXPORT_SYMBOL(pci_alloc_consistent);
 EXPORT_SYMBOL(pci_free_consistent);
 
diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/setup.c x/arch/i386/kernel/setup.c
--- x-ref/arch/i386/kernel/setup.c	2003-07-17 06:16:35.000000000 +0200
+++ x/arch/i386/kernel/setup.c	2003-07-17 06:19:34.000000000 +0200
@@ -156,6 +156,13 @@ unsigned long pci_mem_start = 0x10000000
 static unsigned int highmem_pages __initdata = -1;
 
 /*
+ * Alignment at which movsl is preferred for bulk memory copies
+ */
+#ifdef CONFIG_X86_INTEL_USERCOPY 
+struct movsl_mask movsl_mask;
+#endif 
+
+/*
  * Setup options
  */
 struct drive_info_struct { char dummy[32]; } drive_info;
@@ -2451,6 +2458,23 @@ static void __init init_intel(struct cpu
 
 	}
 #endif
+#ifdef CONFIG_X86_INTEL_USERCOPY 
+   /* 
+    * Set up the preferred alignment for movsl bulk memory moves
+    */
+                switch (c->x86) {
+                case 4:                                 /* 486: untested */
+                                break;
+                case 5:                                 /* Old Pentia: untested*/
+                                break;
+                case 6:                                 /* PII/PIII only like movsl with 8-byte alignment */
+                                movsl_mask.mask = 7;
+                                break;
+                case 7:                                 /* P4 is OK down to 8-byte alignment */
+                               movsl_mask.mask = 7;
+                                break;
+                }
+#endif 
 }
 
 void __init get_cpu_vendor(struct cpuinfo_x86 *c)
diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/lib/usercopy.c x/arch/i386/lib/usercopy.c
--- x-ref/arch/i386/lib/usercopy.c	2003-06-13 22:07:23.000000000 +0200
+++ x/arch/i386/lib/usercopy.c	2003-07-17 06:19:34.000000000 +0200
@@ -45,8 +45,12 @@ unsigned long
 __generic_copy_to_user(void *to, const void *from, unsigned long n)
 {
 	prefetch(from);
-	if (access_ok(VERIFY_WRITE, to, n))
-		__copy_user(to,from,n);
+        if (access_ok(VERIFY_WRITE, to, n)) {
+              if (movsl_is_ok(to, from, n))
+                     __copy_user(to, from, n);
+               else
+                     n = __copy_user_int(to, from, n);
+         }
 	return n;
 }
 
@@ -54,10 +58,14 @@ unsigned long
 __generic_copy_from_user(void *to, const void *from, unsigned long n)
 {
 	prefetchw(to);
-	if (access_ok(VERIFY_READ, from, n))
-		__copy_user_zeroing(to,from,n);
-	else
-		memset(to, 0, n);
+        if (access_ok(VERIFY_READ, from, n)) {
+             if (movsl_is_ok(to, from, n))
+                   __copy_user_zeroing(to,from,n);
+             else
+                  n = __copy_user_zeroing_int(to, from, n);
+        } else {
+              memset(to, 0, n);
+        }           
 	return n;
 }
 
@@ -252,3 +260,194 @@ long strnlen_user(const char *s, long n)
 		:"cc");
 	return res & mask;
 }
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy.  */
+unsigned long __copy_user_int(void *to, const void *from,unsigned long size)
+{
+                int d0, d1;
+                __asm__ __volatile__(
+                                       "       .align 2,0x90\n"
+                                       "0:     movl 32(%4), %%eax\n"
+                                       "       cmpl $67, %0\n"
+                                       "       jbe 1f\n"
+                                       "       movl 64(%4), %%eax\n"
+                                       "       .align 2,0x90\n"
+                                       "1:     movl 0(%4), %%eax\n"
+                                       "       movl 4(%4), %%edx\n"
+                                       "2:     movl %%eax, 0(%3)\n"
+                                       "21:    movl %%edx, 4(%3)\n"
+                                       "       movl 8(%4), %%eax\n"
+                                       "       movl 12(%4),%%edx\n"
+                                       "3:     movl %%eax, 8(%3)\n"
+                                       "31:    movl %%edx, 12(%3)\n"
+                                       "       movl 16(%4), %%eax\n"
+                                       "       movl 20(%4), %%edx\n"
+                                       "4:     movl %%eax, 16(%3)\n"
+                                       "41:    movl %%edx, 20(%3)\n"
+                                       "       movl 24(%4), %%eax\n"
+                                       "       movl 28(%4), %%edx\n"
+                                       "10:    movl %%eax, 24(%3)\n"
+                                       "51:    movl %%edx, 28(%3)\n"
+                                       "       movl 32(%4), %%eax\n"
+                                       "       movl 36(%4), %%edx\n"
+                                       "11:    movl %%eax, 32(%3)\n"
+                                       "61:    movl %%edx, 36(%3)\n"
+                                       "       movl 40(%4), %%eax\n"
+                                       "       movl 44(%4), %%edx\n"
+                                       "12:    movl %%eax, 40(%3)\n"
+                                       "71:    movl %%edx, 44(%3)\n"
+                                       "       movl 48(%4), %%eax\n"
+                                       "       movl 52(%4), %%edx\n"
+                                       "13:    movl %%eax, 48(%3)\n"
+                                       "81:    movl %%edx, 52(%3)\n"
+                                       "       movl 56(%4), %%eax\n"
+                                       "       movl 60(%4), %%edx\n"
+                                       "14:    movl %%eax, 56(%3)\n"
+                                       "91:    movl %%edx, 60(%3)\n"
+                                       "       addl $-64, %0\n"
+                                       "       addl $64, %4\n"
+                                       "       addl $64, %3\n"
+                                       "       cmpl $63, %0\n"
+                                       "       ja  0b\n"
+                                       "5:     movl  %0, %%eax\n"
+                                       "       shrl  $2, %0\n"
+                                       "       andl  $3, %%eax\n"
+                                       "       cld\n"
+                                       "6:     rep; movsl\n"
+                                       "       movl %%eax, %0\n"
+                                       "7:     rep; movsb\n"
+                                       "8:\n"
+                                       ".section .fixup,\"ax\"\n"
+                                       "9:     lea 0(%%eax,%0,4),%0\n"
+                                       "       jmp 8b\n"
+                                       ".previous\n"
+                                       ".section __ex_table,\"a\"\n"
+                                       "       .align 4\n"
+                                       "       .long 2b,8b\n"
+				       "       .long 21b,8b\n"
+                                       "       .long 3b,8b\n"
+                                       "       .long 31b,8b\n"
+                                       "       .long 4b,8b\n"
+                                       "       .long 41b,8b\n"
+                                       "       .long 10b,8b\n"
+                                       "       .long 51b,8b\n"
+                                       "       .long 11b,8b\n"
+                                       "       .long 61b,8b\n"
+                                       "       .long 12b,8b\n"
+                                       "       .long 71b,8b\n"
+                                       "       .long 13b,8b\n"
+                                       "       .long 81b,8b\n"
+                                       "       .long 14b,8b\n"
+                                       "       .long 91b,8b\n"
+                                       "       .long 6b,9b\n"
+                                       "       .long 7b,8b\n"
+                                       ".previous"
+                                       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+                                       :  "1"(to), "2"(from), "0"(size)
+                                       : "eax", "edx", "memory");
+
+                return size;
+}
+
+unsigned long
+__copy_user_zeroing_int(void *to, const void *from, unsigned long size)
+{
+                int d0, d1;
+                __asm__ __volatile__(
+                                       "        .align 2,0x90\n"
+                                       "0:      movl 32(%4), %%eax\n"
+                                       "        cmpl $67, %0\n"
+                                       "        jbe 2f\n"
+                                       "1:      movl 64(%4), %%eax\n"
+                                       "        .align 2,0x90\n"
+                                       "2:      movl 0(%4), %%eax\n"
+                                       "21:     movl 4(%4), %%edx\n"
+                                       "        movl %%eax, 0(%3)\n"
+                                       "        movl %%edx, 4(%3)\n"
+                                       "3:      movl 8(%4), %%eax\n"
+                                       "31:     movl 12(%4),%%edx\n"
+                                       "        movl %%eax, 8(%3)\n"
+                                       "        movl %%edx, 12(%3)\n"
+                                       "4:      movl 16(%4), %%eax\n"
+                                       "41:     movl 20(%4), %%edx\n"
+                                       "        movl %%eax, 16(%3)\n"
+                                       "        movl %%edx, 20(%3)\n"
+                                       "10:     movl 24(%4), %%eax\n"
+                                       "51:     movl 28(%4), %%edx\n"
+                                       "        movl %%eax, 24(%3)\n"
+                                       "        movl %%edx, 28(%3)\n"
+                                       "11:     movl 32(%4), %%eax\n"
+                                       "61:     movl 36(%4), %%edx\n"
+                                       "        movl %%eax, 32(%3)\n"
+                                       "        movl %%edx, 36(%3)\n"
+                                       "12:     movl 40(%4), %%eax\n"
+                                       "71:     movl 44(%4), %%edx\n"
+                                       "        movl %%eax, 40(%3)\n"
+                                       "        movl %%edx, 44(%3)\n"
+                                       "13:     movl 48(%4), %%eax\n"
+                                       "81:     movl 52(%4), %%edx\n"
+                                       "        movl %%eax, 48(%3)\n"
+				       "        movl %%edx, 52(%3)\n"
+                                       "14:     movl 56(%4), %%eax\n"
+                                       "91:     movl 60(%4), %%edx\n"
+                                       "        movl %%eax, 56(%3)\n"
+                                       "        movl %%edx, 60(%3)\n"
+                                       "        addl $-64, %0\n"
+                                       "        addl $64, %4\n"
+                                       "        addl $64, %3\n"
+                                       "        cmpl $63, %0\n"
+                                       "        ja  0b\n"
+                                       "5:      movl  %0, %%eax\n"
+                                       "        shrl  $2, %0\n"
+                                       "        andl $3, %%eax\n"
+                                       "        cld\n"
+                                       "6:      rep; movsl\n"
+                                       "        movl %%eax,%0\n"
+                                       "7:      rep; movsb\n"
+                                       "8:\n"
+                                       ".section .fixup,\"ax\"\n"
+                                       "9:      lea 0(%%eax,%0,4),%0\n"
+                                       "16:     pushl %0\n"
+                                       "        pushl %%eax\n"
+                                       "        xorl %%eax,%%eax\n"
+                                       "        rep; stosb\n"
+                                       "        popl %%eax\n"
+                                       "        popl %0\n"
+                                       "        jmp 8b\n"
+                                       ".previous\n"
+                                       ".section __ex_table,\"a\"\n"
+                                       "                .align 4\n"
+                                       "                .long 0b,16b\n"
+                                       "                .long 1b,16b\n"
+                                       "                .long 2b,16b\n"
+                                       "                .long 21b,16b\n"
+                                       "                .long 3b,16b\n"
+                                       "                .long 31b,16b\n"
+                                       "                .long 4b,16b\n"
+                                       "                .long 41b,16b\n"
+                                       "                .long 10b,16b\n"
+                                       "                .long 51b,16b\n"
+                                       "                .long 11b,16b\n"
+                                       "                .long 61b,16b\n"
+                                       "                .long 12b,16b\n"
+                                       "                .long 71b,16b\n"
+                                       "                .long 13b,16b\n"
+                                       "                .long 81b,16b\n"
+                                       "                .long 14b,16b\n"
+                                       "                .long 91b,16b\n"
+                                       "                .long 6b,9b\n"
+                                       "                .long 7b,16b\n"
+                                       ".previous"
+                                       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+                                       :  "1"(to), "2"(from), "0"(size)
+                                       : "eax", "edx", "memory");
+                return size;
+}
+#endif          /* CONFIG_X86_INTEL_USERCOPY */
+
+
diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-i386/uaccess.h x/include/asm-i386/uaccess.h
--- x-ref/include/asm-i386/uaccess.h	2003-06-26 01:02:16.000000000 +0200
+++ x/include/asm-i386/uaccess.h	2003-07-17 06:20:53.000000000 +0200
@@ -31,6 +31,33 @@
 #define set_fs(x)	(current->addr_limit = (x))
 
 #define segment_eq(a,b)	((a).seg == (b).seg)
+/*
+ *  * movsl can be slow when source and dest are not both 8-byte aligned
+ *   */
+
+#if defined(CONFIG_M586MMX) || defined(CONFIG_M686) || \
+                defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUM4)
+#define CONFIG_X86_INTEL_USERCOPY  
+#endif
+
+#ifdef CONFIG_X86_INTEL_USERCOPY 
+	extern struct movsl_mask {
+		                int mask;
+	} ____cacheline_aligned_in_smp movsl_mask;
+#endif 
+
+static inline int movsl_is_ok(const void *a1, const void *a2, unsigned long n)
+{
+#ifdef CONFIG_X86_INTEL_USERCOPY 
+	           if (n >= 64 && (((const long)a1 ^ (const long)a2) & movsl_mask.mask))
+                                return 0;
+#endif 
+		                 return 1;
+}
+/* These are undefined on !CONFIG_X86_INTEL_USERCOPY. And they should be unrerefenced. */
+extern unsigned long __copy_user_int(void *, const void *, unsigned long);
+extern unsigned long __copy_user_zeroing_int(void *, const void *, unsigned long);
+		   
 
 extern int __verify_write(const void *, unsigned long);
 
@@ -427,18 +454,23 @@ do {									\
 static inline unsigned long
 __generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
 {
-	__copy_user_zeroing(to,from,n);
-	return n;
+                if (movsl_is_ok(to, from, n))
+                        __copy_user_zeroing(to, from, n);
+                else
+                        n = __copy_user_zeroing_int(to, from, n);
+		       return n;
 }
 
 static inline unsigned long
 __generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
 {
-	__copy_user(to,from,n);
-	return n;
+                if (movsl_is_ok(to, from, n)) 
+			 __copy_user(to, from, n);	
+	       else 
+	                n = __copy_user_int(to, from, n);
+		  return n;
 }
 
-
 /* Optimize just a little bit when we know the size of the move. */
 #define __constant_copy_user(to, from, size)			\
 do {								\
@@ -709,9 +741,7 @@ __constant_copy_from_user_nocheck(void *
  * On success, this will be zero.
  */
 #define copy_to_user(to,from,n)				\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_to_user((to),(from),(n)) :	\
-	 __generic_copy_to_user((to),(from),(n)))
+	 __generic_copy_to_user((to),(from),(n))
 
 /**
  * copy_from_user: - Copy a block of data from user space.
@@ -730,9 +760,7 @@ __constant_copy_from_user_nocheck(void *
  * data to the requested size using zero bytes.
  */
 #define copy_from_user(to,from,n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_from_user((to),(from),(n)) :	\
-	 __generic_copy_from_user((to),(from),(n)))
+	 __generic_copy_from_user((to),(from),(n))
 
 /**
  * __copy_to_user: - Copy a block of data into user space, with less checking.
@@ -749,9 +777,7 @@ __constant_copy_from_user_nocheck(void *
  * On success, this will be zero.
  */
 #define __copy_to_user(to,from,n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_to_user_nocheck((to),(from),(n)) :	\
-	 __generic_copy_to_user_nocheck((to),(from),(n)))
+	 __generic_copy_to_user_nocheck((to),(from),(n))
 
 /**
  * __copy_from_user: - Copy a block of data from user space, with less checking.
@@ -771,9 +797,7 @@ __constant_copy_from_user_nocheck(void *
  * data to the requested size using zero bytes.
  */
 #define __copy_from_user(to,from,n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_from_user_nocheck((to),(from),(n)) :	\
-	 __generic_copy_from_user_nocheck((to),(from),(n)))
+	 __generic_copy_from_user_nocheck((to),(from),(n))
 
 long strncpy_from_user(char *dst, const char *src, long count);
 long __strncpy_from_user(char *dst, const char *src, long count);