From: Hugh Dickins A recent update removed the page_table_lock from objrmap. That was simply wrong: it's necessary to guard mm->rss (in try_to_unmap), and it's necessary to ensure the page tables don't get freed beneath us (in which case any modification of the pte, by page_referenced or by try_to_unmap, might be corrupting a freed and reused page). Then restore the original SWAP_AGAIN semantics in try_to_unmap: simplest for inner levels to return SWAP_AGAIN or SWAP_FAILED, outer level to decide SWAP_SUCCESS if all pages were unmapped. Stop searching when all have been unmapped. page_convert_anon left for the moment with FIXMEs rather than page_table_lock. I believe it can be done without page_table_lock (which would be problematic there), but have noticed several other bugs there, more patches will follow at a later date (though I'd much rather anobjrmap, which handles all this so much more cleanly). 25-akpm/mm/rmap.c | 28 +++++++++++++++++++--------- 1 files changed, 19 insertions(+), 9 deletions(-) diff -puN mm/rmap.c~hugh-07-objrmap-page_table_lock mm/rmap.c --- 25/mm/rmap.c~hugh-07-objrmap-page_table_lock Tue Mar 25 18:34:52 2003 +++ 25-akpm/mm/rmap.c Tue Mar 25 18:34:52 2003 @@ -144,9 +144,13 @@ out: static int page_referenced_obj_one(struct vm_area_struct *vma, struct page *page) { + struct mm_struct *mm = vma->vm_mm; pte_t *pte; int referenced = 0; + if (!spin_trylock(&mm->page_table_lock)) + return 1; + pte = find_pte(vma, page, NULL); if (pte) { if (ptep_test_and_clear_young(pte)) @@ -154,6 +158,7 @@ page_referenced_obj_one(struct vm_area_s pte_unmap(pte); } + spin_unlock(&mm->page_table_lock); return referenced; } @@ -490,7 +495,10 @@ try_to_unmap_obj_one(struct vm_area_stru unsigned long address; pte_t *pte; pte_t pteval; - int ret = SWAP_SUCCESS; + int ret = SWAP_AGAIN; + + if (!spin_trylock(&mm->page_table_lock)) + return ret; pte = find_pte(vma, page, &address); if (!pte) @@ -519,6 +527,7 @@ out_unmap: pte_unmap(pte); out: + spin_unlock(&mm->page_table_lock); return ret; } @@ -539,7 +548,7 @@ try_to_unmap_obj(struct page *page) { struct address_space *mapping = page->mapping; struct vm_area_struct *vma; - int ret = SWAP_SUCCESS; + int ret = SWAP_AGAIN; if (!mapping) BUG(); @@ -548,23 +557,20 @@ try_to_unmap_obj(struct page *page) BUG(); if (down_trylock(&mapping->i_shared_sem)) - return SWAP_AGAIN; + return ret; list_for_each_entry(vma, &mapping->i_mmap, shared) { ret = try_to_unmap_obj_one(vma, page); - if (ret != SWAP_SUCCESS) + if (ret == SWAP_FAIL || !page->pte.mapcount) goto out; } list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { ret = try_to_unmap_obj_one(vma, page); - if (ret != SWAP_SUCCESS) + if (ret == SWAP_FAIL || !page->pte.mapcount) goto out; } - if (page->pte.mapcount) - BUG(); - out: up(&mapping->i_shared_sem); return ret; @@ -754,8 +760,10 @@ int try_to_unmap(struct page * page) } } out: - if (!page_mapped(page)) + if (!page_mapped(page)) { dec_page_state(nr_mapped); + ret = SWAP_SUCCESS; + } return ret; } @@ -840,6 +848,7 @@ retry: index = NRPTE-1; list_for_each_entry(vma, &mapping->i_mmap, shared) { + /* FIXME: unsafe without page_table_lock */ pte = find_pte(vma, page, NULL); if (pte) { pte_paddr = ptep_to_paddr(pte); @@ -856,6 +865,7 @@ retry: } } list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + /* FIXME: unsafe without page_table_lock */ pte = find_pte(vma, page, NULL); if (pte) { pte_paddr = ptep_to_paddr(pte); _