From: William Lee Irwin III The new vmalloc() semantics from 2.5.32 had a race window. As things stand, the presence of a vm_area in the vmlist protects from allocators other than the owner examining the ptes in that area. This puts an ordering constraint on unmapping, so that allocators are required to unmap areas before removing them from the list or otherwise dropping the lock. Currently, unmap_vm_area() is done outside the lock and after the area is removed, which as we've seen from Felix von Leitner's test is oopsable. The following patch folds calls to unmap_vm_area() into remove_vm_area() to reinstate what are essentially the 2.4.x semantics of vfree(). This renders a number of unmap_vm_area() calls unnecessary (and in fact oopsable since they wipe ptes from later allocations). It's an open question as to whether this is sufficiently performant, but it is the minimally invasive approach. The more performant alternative is to provide the right API hooks to wipe the vmalloc() area clean before removing them from the list, using the ownership of the area to eliminate holding the vmlist_lock for the duration of the unmapping. If it proves to be necessary wli is on standby to implement it. arch/i386/mm/ioremap.c | 1 - arch/sparc64/kernel/module.c | 2 ++ arch/x86_64/kernel/module.c | 2 +- arch/x86_64/mm/ioremap.c | 1 - mm/vmalloc.c | 3 +-- 5 files changed, 4 insertions(+), 5 deletions(-) diff -puN arch/i386/mm/ioremap.c~vmalloc-race-fix arch/i386/mm/ioremap.c --- 25/arch/i386/mm/ioremap.c~vmalloc-race-fix 2003-05-12 21:23:25.000000000 -0700 +++ 25-akpm/arch/i386/mm/ioremap.c 2003-05-12 21:23:25.000000000 -0700 @@ -222,7 +222,6 @@ void iounmap(void *addr) return; } - unmap_vm_area(p); if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { change_page_attr(virt_to_page(__va(p->phys_addr)), p->size >> PAGE_SHIFT, diff -puN arch/sparc64/kernel/module.c~vmalloc-race-fix arch/sparc64/kernel/module.c --- 25/arch/sparc64/kernel/module.c~vmalloc-race-fix 2003-05-12 21:23:25.000000000 -0700 +++ 25-akpm/arch/sparc64/kernel/module.c 2003-05-12 21:23:25.000000000 -0700 @@ -138,7 +138,9 @@ void *module_alloc(unsigned long size) /* Free memory returned from module_core_alloc/module_init_alloc */ void module_free(struct module *mod, void *module_region) { + write_lock(&vmlist_lock); module_unmap(module_region); + write_unlock(&vmlist_lock); /* FIXME: If module_region == mod->init_region, trim exception table entries. */ } diff -puN arch/x86_64/kernel/module.c~vmalloc-race-fix arch/x86_64/kernel/module.c --- 25/arch/x86_64/kernel/module.c~vmalloc-race-fix 2003-05-12 21:23:25.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/module.c 2003-05-12 21:23:25.000000000 -0700 @@ -48,7 +48,6 @@ void module_free(struct module *mod, voi for (prevp = &mod_vmlist ; (map = *prevp) ; prevp = &map->next) { if ((unsigned long)map->addr == addr) { *prevp = map->next; - write_unlock(&vmlist_lock); goto found; } } @@ -57,6 +56,7 @@ void module_free(struct module *mod, voi return; found: unmap_vm_area(map); + write_unlock(&vmlist_lock); if (map->pages) { for (i = 0; i < map->nr_pages; i++) if (map->pages[i]) diff -puN arch/x86_64/mm/ioremap.c~vmalloc-race-fix arch/x86_64/mm/ioremap.c --- 25/arch/x86_64/mm/ioremap.c~vmalloc-race-fix 2003-05-12 21:23:25.000000000 -0700 +++ 25-akpm/arch/x86_64/mm/ioremap.c 2003-05-12 21:23:25.000000000 -0700 @@ -222,7 +222,6 @@ void iounmap(void *addr) return; } - unmap_vm_area(p); if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { change_page_attr(virt_to_page(__va(p->phys_addr)), p->size >> PAGE_SHIFT, diff -puN mm/vmalloc.c~vmalloc-race-fix mm/vmalloc.c --- 25/mm/vmalloc.c~vmalloc-race-fix 2003-05-12 21:23:25.000000000 -0700 +++ 25-akpm/mm/vmalloc.c 2003-05-12 21:23:25.000000000 -0700 @@ -260,6 +260,7 @@ struct vm_struct *remove_vm_area(void *a return NULL; found: + unmap_vm_area(tmp); *p = tmp->next; write_unlock(&vmlist_lock); return tmp; @@ -283,8 +284,6 @@ void __vunmap(void *addr, int deallocate addr); return; } - - unmap_vm_area(area); if (deallocate_pages) { int i; _