From: Hugh Dickins Fixes bugzilla #2219 fork's dup_mmap leaves child mm_rb as copied from parent mm while doing all the copy_page_ranges, and then calls build_mmap_rb without holding page_table_lock. try_to_unmap_one's find_vma (holding page_table_lock not mmap_sem) coming on another cpu may cause mm mayhem. It may leave the child's mmap_cache pointing to a vma of the parent mm. When the parent exits and the child faults, quite what happens rather depends on what junk then inhabits vm_page_prot, which gets set in the page table, with page_add_rmap adding the ptep, but junk pte likely to fail the tests for page_remove_rmap. Eventually the child exits, the page table is freed and try_to_unmap_one oopses on null ptep_to_mm (but in a kernel with rss limiting, usually page_referenced hits the null ptep_to_mm first). This took me days and days to unravel! Big thanks to Matthieu for reporting it with a good test case. --- include/linux/mm.h | 3 ++- kernel/fork.c | 13 +++++++++++-- mm/mmap.c | 20 ++------------------ 3 files changed, 15 insertions(+), 21 deletions(-) diff -puN include/linux/mm.h~vma-corruption-fix include/linux/mm.h --- 25/include/linux/mm.h~vma-corruption-fix 2004-03-06 18:37:39.000000000 -0800 +++ 25-akpm/include/linux/mm.h 2004-03-06 18:37:40.000000000 -0800 @@ -530,7 +530,8 @@ extern void si_meminfo_node(struct sysin /* mmap.c */ extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void build_mmap_rb(struct mm_struct *); +extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, + struct rb_node **, struct rb_node *); extern void exit_mmap(struct mm_struct *); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); diff -puN kernel/fork.c~vma-corruption-fix kernel/fork.c --- 25/kernel/fork.c~vma-corruption-fix 2004-03-06 18:37:39.000000000 -0800 +++ 25-akpm/kernel/fork.c 2004-03-06 18:37:40.000000000 -0800 @@ -265,6 +265,7 @@ static struct task_struct *dup_task_stru static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) { struct vm_area_struct * mpnt, *tmp, **pprev; + struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge = 0; @@ -277,6 +278,9 @@ static inline int dup_mmap(struct mm_str mm->map_count = 0; mm->rss = 0; cpus_clear(mm->cpu_vm_mask); + mm->mm_rb = RB_ROOT; + rb_link = &mm->mm_rb.rb_node; + rb_parent = NULL; pprev = &mm->mmap; /* @@ -324,11 +328,17 @@ static inline int dup_mmap(struct mm_str /* * Link in the new vma and copy the page table entries: - * link in first so that swapoff can see swap entries. + * link in first so that swapoff can see swap entries, + * and try_to_unmap_one's find_vma find the new vma. */ spin_lock(&mm->page_table_lock); *pprev = tmp; pprev = &tmp->vm_next; + + __vma_link_rb(mm, tmp, rb_link, rb_parent); + rb_link = &tmp->vm_rb.rb_right; + rb_parent = &tmp->vm_rb; + mm->map_count++; retval = copy_page_range(mm, current->mm, tmp); spin_unlock(&mm->page_table_lock); @@ -340,7 +350,6 @@ static inline int dup_mmap(struct mm_str goto fail; } retval = 0; - build_mmap_rb(mm); out: flush_tlb_mm(current->mm); diff -puN mm/mmap.c~vma-corruption-fix mm/mmap.c --- 25/mm/mmap.c~vma-corruption-fix 2004-03-06 18:37:40.000000000 -0800 +++ 25-akpm/mm/mmap.c 2004-03-06 18:37:40.000000000 -0800 @@ -222,8 +222,8 @@ __vma_link_list(struct mm_struct *mm, st } } -static void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, - struct rb_node **rb_link, struct rb_node *rb_parent) +void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, + struct rb_node **rb_link, struct rb_node *rb_parent) { rb_link_node(&vma->vm_rb, rb_parent, rb_link); rb_insert_color(&vma->vm_rb, &mm->mm_rb); @@ -1404,22 +1404,6 @@ out: EXPORT_SYMBOL(do_brk); -/* Build the RB tree corresponding to the VMA list. */ -void build_mmap_rb(struct mm_struct * mm) -{ - struct vm_area_struct * vma; - struct rb_node ** rb_link, * rb_parent; - - mm->mm_rb = RB_ROOT; - rb_link = &mm->mm_rb.rb_node; - rb_parent = NULL; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - __vma_link_rb(mm, vma, rb_link, rb_parent); - rb_parent = &vma->vm_rb; - rb_link = &rb_parent->rb_right; - } -} - /* Release all mmaps. */ void exit_mmap(struct mm_struct *mm) { _