aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStas Sergeev <stsp@aknet.ru>2005-03-30 16:32:49 -0800
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-03-30 16:32:49 -0800
commit67d7f3abcdb0e99d278806c75f188e29a1ee5cec (patch)
tree3ba6ffa1964463f4b4aa8721ef23db859f29747c
parent074b4c6141b583bbab5898512312c92d9a779ae9 (diff)
downloadhistory-67d7f3abcdb0e99d278806c75f188e29a1ee5cec.tar.gz
[PATCH] x86: fix ESP corruption CPU bug (take 2)
This works around the corruption of the high word of the ESP register, which is the official bug of x86 CPUs. The bug triggers only when the one is using the 16bit stack segment, and is described here: http://www.intel.com/design/intarch/specupdt/27287402.PDF From: Oleg Nesterov <oleg@tv-sign.ru> I think that Stas tries to steal 1024 bytes from kernel's memory. Acked-by: Linus Torvalds <torvalds@osdl.org> Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz> Acked-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Stas Sergeev <stsp@aknet.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/cpu/common.c11
-rw-r--r--arch/i386/kernel/entry.S130
-rw-r--r--arch/i386/kernel/head.S2
-rw-r--r--arch/i386/kernel/traps.c45
-rw-r--r--include/asm-i386/desc.h4
-rw-r--r--include/asm-i386/segment.h5
6 files changed, 171 insertions, 26 deletions
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 190e3feeba6d5..ebd5d8247faae 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -21,6 +21,9 @@
DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
static int cachesize_override __initdata = -1;
static int disable_x86_fxsr __initdata = 0;
static int disable_x86_serial_nr __initdata = 1;
@@ -546,6 +549,7 @@ void __init cpu_init (void)
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = &current->thread;
+ __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
if (cpu_test_and_set(cpu, cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -568,6 +572,13 @@ void __init cpu_init (void)
*/
memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
GDT_SIZE);
+
+ /* Set up GDT entry for 16bit stack */
+ *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
+ ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
+ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
+ (CPU_16BIT_STACK_SIZE - 1);
+
cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address =
(unsigned long)&per_cpu(cpu_gdt_table, cpu);
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 87aad70d37302..1e45ff292bc99 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -47,6 +47,7 @@
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page.h>
+#include <asm/desc.h>
#include "irq_vectors.h"
#define nr_syscalls ((syscall_table_size)/4)
@@ -78,7 +79,7 @@ VM_MASK = 0x00020000
#define preempt_stop cli
#else
#define preempt_stop
-#define resume_kernel restore_all
+#define resume_kernel restore_nocheck
#endif
#define SAVE_ALL \
@@ -122,24 +123,6 @@ VM_MASK = 0x00020000
.previous
-#define RESTORE_ALL \
- RESTORE_REGS \
- addl $4, %esp; \
-1: iret; \
-.section .fixup,"ax"; \
-2: sti; \
- movl $(__USER_DS), %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
- movl $11,%eax; \
- call do_exit; \
-.previous; \
-.section __ex_table,"a";\
- .align 4; \
- .long 1b,2b; \
-.previous
-
-
ENTRY(ret_from_fork)
pushl %eax
call schedule_tail
@@ -163,7 +146,7 @@ ret_from_intr:
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
movb CS(%esp), %al
testl $(VM_MASK | 3), %eax
- jz resume_kernel # returning to kernel or vm86-space
+ jz resume_kernel
ENTRY(resume_userspace)
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
@@ -178,7 +161,7 @@ ENTRY(resume_userspace)
ENTRY(resume_kernel)
cli
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_all
+ jnz restore_nocheck
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
@@ -259,8 +242,56 @@ syscall_exit:
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work
+
restore_all:
- RESTORE_ALL
+ movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
+ movb OLDSS(%esp), %ah
+ movb CS(%esp), %al
+ andl $(VM_MASK | (4 << 8) | 3), %eax
+ cmpl $((4 << 8) | 3), %eax
+ je ldt_ss # returning to user-space with LDT SS
+restore_nocheck:
+ RESTORE_REGS
+ addl $4, %esp
+1: iret
+.section .fixup,"ax"
+iret_exc:
+ sti
+ movl $__USER_DS, %edx
+ movl %edx, %ds
+ movl %edx, %es
+ movl $11,%eax
+ call do_exit
+.previous
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+
+ldt_ss:
+ larl OLDSS(%esp), %eax
+ jnz restore_nocheck
+ testl $0x00400000, %eax # returning to 32bit stack?
+ jnz restore_nocheck # allright, normal return
+ /* If returning to userspace with 16bit stack,
+ * try to fix the higher word of ESP, as the CPU
+ * won't restore it.
+ * This is an "official" bug of all the x86-compatible
+ * CPUs, which we can try to work around to make
+ * dosemu and wine happy. */
+ subl $8, %esp # reserve space for switch16 pointer
+ cli
+ movl %esp, %eax
+ /* Set up the 16bit stack frame with switch32 pointer on top,
+ * and a switch16 pointer on top of the current frame. */
+ call setup_x86_bogus_stack
+ RESTORE_REGS
+ lss 20+4(%esp), %esp # switch to 16bit stack
+1: iret
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
# perform work that needs to be done immediately before resumption
ALIGN
@@ -336,6 +367,27 @@ syscall_badsys:
movl $-ENOSYS,EAX(%esp)
jmp resume_userspace
+#define FIXUP_ESPFIX_STACK \
+ movl %esp, %eax; \
+ /* switch to 32bit stack using the pointer on top of 16bit stack */ \
+ lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+ /* copy data from 16bit stack to 32bit stack */ \
+ call fixup_x86_bogus_stack; \
+ /* put ESP to the proper location */ \
+ movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+ pushl %eax; \
+ movl %ss, %eax; \
+ /* see if on 16bit stack */ \
+ cmpw $__ESPFIX_SS, %ax; \
+ jne 28f; \
+ movl $__KERNEL_DS, %edx; \
+ movl %edx, %ds; \
+ movl %edx, %es; \
+ /* switch to 32bit stack */ \
+ FIXUP_ESPFIX_STACK \
+28: popl %eax;
+
/*
* Build the entry stubs and pointer table with
* some assembler magic.
@@ -390,7 +442,9 @@ error_code:
pushl %ecx
pushl %ebx
cld
- movl %es, %ecx
+ pushl %es
+ UNWIND_ESPFIX_STACK
+ popl %ecx
movl ES(%esp), %edi # get the function address
movl ORIG_EAX(%esp), %edx # get the error code
movl %eax, ORIG_EAX(%esp)
@@ -472,6 +526,11 @@ debug_stack_correct:
* fault happened on the sysenter path.
*/
ENTRY(nmi)
+ pushl %eax
+ movl %ss, %eax
+ cmpw $__ESPFIX_SS, %ax
+ popl %eax
+ je nmi_16bit_stack
cmpl $sysenter_entry,(%esp)
je nmi_stack_fixup
pushl %eax
@@ -491,7 +550,7 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
- RESTORE_ALL
+ jmp restore_all
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
@@ -507,6 +566,29 @@ nmi_debug_stack_fixup:
FIX_STACK(24,nmi_stack_correct, 1)
jmp nmi_stack_correct
+nmi_16bit_stack:
+ /* create the pointer to lss back */
+ pushl %ss
+ pushl %esp
+ movzwl %sp, %esp
+ addw $4, (%esp)
+ /* copy the iret frame of 12 bytes */
+ .rept 3
+ pushl 16(%esp)
+ .endr
+ pushl %eax
+ SAVE_ALL
+ FIXUP_ESPFIX_STACK # %eax == %esp
+ xorl %edx,%edx # zero error code
+ call do_nmi
+ RESTORE_REGS
+ lss 12+4(%esp), %esp # back to 16bit stack
+1: iret
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+
ENTRY(int3)
pushl $-1 # mark this as an int
SAVE_ALL
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index ea1ef3a32ec5b..d273fd7461921 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -512,7 +512,7 @@ ENTRY(cpu_gdt_table)
.quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0xc8 APM DS data */
- .quad 0x0000000000000000 /* 0xd0 - unused */
+ .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */
.quad 0x0000000000000000 /* 0xd8 - unused */
.quad 0x0000000000000000 /* 0xe0 - unused */
.quad 0x0000000000000000 /* 0xe8 - unused */
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7b5b97f209b7f..6c0e383915b6a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -887,6 +887,51 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
#endif
}
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+ unsigned long *switch16_ptr, *switch32_ptr;
+ struct pt_regs *regs;
+ unsigned long stack_top, stack_bot;
+ unsigned short iret_frame16_off;
+ int cpu = smp_processor_id();
+ /* reserve the space on 32bit stack for the magic switch16 pointer */
+ memmove(stk, stk + 8, sizeof(struct pt_regs));
+ switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+ regs = (struct pt_regs *)stk;
+ /* now the switch32 on 16bit stack */
+ stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+ stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+ switch32_ptr = (unsigned long *)(stack_top - 8);
+ iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+ /* copy iret frame on 16bit stack */
+ memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
+ /* fill in the switch pointers */
+ switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+ switch16_ptr[1] = __ESPFIX_SS;
+ switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+ 8 - CPU_16BIT_STACK_SIZE;
+ switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+ unsigned long *switch32_ptr;
+ unsigned char *stack16, *stack32;
+ unsigned long stack_top, stack_bot;
+ int len;
+ int cpu = smp_processor_id();
+ stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+ stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+ switch32_ptr = (unsigned long *)(stack_top - 8);
+ /* copy the data from 16bit stack to 32bit stack */
+ len = CPU_16BIT_STACK_SIZE - 8 - sp;
+ stack16 = (unsigned char *)(stack_bot + sp);
+ stack32 = (unsigned char *)
+ (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+ memcpy(stack32, stack16, len);
+ return stack32;
+}
+
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h
index fb8cdcf41d4b5..11e67811a9901 100644
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -4,6 +4,8 @@
#include <asm/ldt.h>
#include <asm/segment.h>
+#define CPU_16BIT_STACK_SIZE 1024
+
#ifndef __ASSEMBLY__
#include <linux/preempt.h>
@@ -15,6 +17,8 @@
extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
DECLARE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
+DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+
struct Xgt_desc_struct {
unsigned short size;
unsigned long address __attribute__((packed));
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h
index abe3440a93ceb..bb5ff5b2c02ec 100644
--- a/include/asm-i386/segment.h
+++ b/include/asm-i386/segment.h
@@ -38,7 +38,7 @@
* 24 - APM BIOS support
* 25 - APM BIOS support
*
- * 26 - unused
+ * 26 - ESPFIX small SS
* 27 - unused
* 28 - unused
* 29 - unused
@@ -71,6 +71,9 @@
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
+#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*