From: Hugh Dickins Repeated -j3 kernel builds, run in tandem on dual PIII, have been collapsing recently on -mm with 4G/4G split, SMP and preemption. Typically 'make' fails with Error 139 because 'as' or another got SIGSEGV; maybe within ten minutes, maybe after ten hours. This patch seems to fix that (ran successfully overnight on test4-mm1, will run over the weekend on test4-mm3-1). Please cast a critical eye over it, I expect Ingo or someone else will find it can be improved. The problem is that a task may be preempted just after it has entered kernelspace, while using the transitional "virtual stack" i.e. %esp pointing to high per-cpu kmap of the kernel stack. If the task resumes on another cpu, that %esp needs to be repointed into the new cpu's kmap. The corresponding returns to userspace look okay to me: interrupts are disabled over the critical points. And in general no copy is taken of %esp while on the virtual stack e.g. setting pointer to pt_regs is and must be done after switching to real stack. But there's one place in __SWITCH_KERNELSPACE itself where we need to check and repeat if moved. arch/i386/kernel/entry.S | 17 ++++++++++++++++- arch/i386/kernel/process.c | 20 +++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff -puN arch/i386/kernel/entry.S~4g4g-preempt-vstack-fix arch/i386/kernel/entry.S --- 25/arch/i386/kernel/entry.S~4g4g-preempt-vstack-fix 2003-08-30 15:42:11.000000000 -0700 +++ 25-akpm/arch/i386/kernel/entry.S 2003-08-30 15:42:11.000000000 -0700 @@ -103,6 +103,20 @@ TSS_ESP0_OFFSET = (4 - 0x200) #ifdef CONFIG_X86_SWITCH_PAGETABLES +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +/* + * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, + * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is + * left stale, so we must check whether to repeat the real stack calculation. + */ +#define repeat_if_esp_changed \ + xorl %esp, %ebp; \ + testl $0xffffe000, %ebp; \ + jnz 0b +#else +#define repeat_if_esp_changed +#endif + /* clobbers ebx, edx and ebp */ #define __SWITCH_KERNELSPACE \ @@ -117,12 +131,13 @@ TSS_ESP0_OFFSET = (4 - 0x200) movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ \ /* GET_THREAD_INFO(%ebp) intermixed */ \ - \ +0: \ movl %esp, %ebp; \ movl %esp, %ebx; \ andl $0xffffe000, %ebp; \ andl $0x00001fff, %ebx; \ orl TI_real_stack(%ebp), %ebx; \ + repeat_if_esp_changed; \ \ movl %edx, %cr3; \ movl %ebx, %esp; \ diff -puN arch/i386/kernel/process.c~4g4g-preempt-vstack-fix arch/i386/kernel/process.c --- 25/arch/i386/kernel/process.c~4g4g-preempt-vstack-fix 2003-08-30 15:42:11.000000000 -0700 +++ 25-akpm/arch/i386/kernel/process.c 2003-08-30 15:42:11.000000000 -0700 @@ -479,13 +479,27 @@ struct task_struct * __switch_to(struct __kmap_atomic(next->stack_page1, KM_VSTACK1); /* - * Reload esp0: - */ - /* * NOTE: here we rely on the task being the stack as well */ next_p->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) + /* + * If next was preempted on entry from userspace to kernel, + * and now it's on a different cpu, we need to adjust %esp. + * This assumes that entry.S does not copy %esp while on the + * virtual stack (with interrupts enabled): which is so, + * except within __SWITCH_KERNELSPACE itself. + */ + if (unlikely(next->esp >= TASK_SIZE)) { + next->esp &= THREAD_SIZE - 1; + next->esp |= (unsigned long) next_p->thread_info->virtual_stack; + } +#endif #endif + /* + * Reload esp0: + */ load_esp0(tss, virtual_esp0(next_p)); /* _