diff -u linux/arch/i386/lib/checksum.S-o linux/arch/i386/lib/checksum.S
--- linux/arch/i386/lib/checksum.S-o	2003-03-07 16:48:01.000000000 +0100
+++ linux/arch/i386/lib/checksum.S	2003-10-01 14:01:31.000000000 +0200
@@ -48,6 +48,9 @@
 	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
 	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
 	   * alignment for the unrolled loop.
+	   *
+	   * Danger, Will Robinson: with sendfile 2 byte alignment is not guaranteed.
+	   *
 	   */		
 csum_partial:	
 	pushl %esi
@@ -237,18 +240,37 @@
 	movl $0xffffff,%ebx	# by the shll and shrl instructions
 	shll $3,%ecx
 	shrl %cl,%ebx
-	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
+.Ltail:	
+	andl -128(%esi),%ebx
+.Ltail_finished:
 	addl %ebx,%eax
 	adcl $0,%eax
 80: 
 	testl $1, 12(%esp)
 	jz 90f
 	roll $8, %eax
-90: 
+90:  
 	popl %ebx
 	popl %esi
 	ret
-				
+	
+	.section __ex_table,"a"
+	.long .Ltail,tail_recover
+	.long .Ltail_byte3,.Ltail_byte1
+	.long .Ltail_byte2,.Ltail_finished
+	.previous
+		
+tail_recover:	
+	xorl %ebx,%ebx
+.Ltail_byte3:	
+	movb -126(%esi),%bl
+	shl  $16,%ebx
+.Ltail_byte1:			
+	movb -128(%esi),%bl
+.Ltail_byte2:	
+	movb -127(%esi),%bh
+	jmp .Ltail_finished
+					
 #endif
 
 /*