From: Hugh Dickins 25-akpm/mm/rmap.c | 147 ++++++++++++++++++++++++++++++++---------------------- 1 files changed, 89 insertions(+), 58 deletions(-) diff -puN mm/rmap.c~page_convert_anon-locking-fix mm/rmap.c --- 25/mm/rmap.c~page_convert_anon-locking-fix Fri Mar 28 14:25:55 2003 +++ 25-akpm/mm/rmap.c Fri Mar 28 14:25:55 2003 @@ -784,106 +784,137 @@ int page_convert_anon(struct page *page) struct vm_area_struct *vma; struct pte_chain *pte_chain = NULL, *ptec; pte_t *pte; - pte_addr_t pte_paddr; + pte_addr_t pte_paddr = 0; int mapcount; - int index = 0; + int ptecount; + int index = 1; int err = 0; + down(&mapping->i_shared_sem); + pte_chain_lock(page); + + /* + * Has someone else done it for us before we got the lock? + * If so, pte.direct or pte.chain has replaced pte.mapcount. + */ if (PageAnon(page)) - goto out; + goto out_unlock; -retry: /* * Preallocate the pte_chains outside the lock. + * If mapcount grows while we're allocating here, retry. + * If mapcount shrinks, we free the excess before returning. */ mapcount = page->pte.mapcount; - if (mapcount > 1) { + while (index < mapcount) { + pte_chain_unlock(page); + up(&mapping->i_shared_sem); for (; index < mapcount; index += NRPTE) { + if (index == 1) + index = 0; ptec = pte_chain_alloc(GFP_KERNEL); if (!ptec) { - while (pte_chain) { - ptec = pte_chain->next; - pte_chain_free(pte_chain); - pte_chain = ptec; - } err = -ENOMEM; - goto out; + goto out_free; } ptec->next = pte_chain; pte_chain = ptec; } + down(&mapping->i_shared_sem); + pte_chain_lock(page); + /* + * Has someone else done it while we were allocating? + */ + if (PageAnon(page)) + goto out_unlock; + mapcount = page->pte.mapcount; } - down(&mapping->i_shared_sem); - pte_chain_lock(page); + if (!mapcount) + goto set_anon; +again: /* - * Check to make sure the number of mappings didn't change. If they - * did, either retry or free enough pte_chains to compensate. + * We don't try for page_table_lock (what would we do when a + * trylock fails?), therefore there's a small chance that we + * catch a vma just as it's being unmapped and its page tables + * freed. Our pte_chain_lock prevents that on vmas that really + * contain our page, but not on the others we look at. So we + * might locate junk that looks just like our page's pfn. It's + * a transient and very unlikely occurrence (much less likely + * than a trylock failing), so check how many maps we find, + * and if too many, start all over again. */ - if (mapcount < page->pte.mapcount) { - pte_chain_unlock(page); - up(&mapping->i_shared_sem); - goto retry; - } else if ((mapcount > page->pte.mapcount) && (mapcount > 1)) { - mapcount = page->pte.mapcount; - while ((index - NRPTE) > mapcount) { - index -= NRPTE; - ptec = pte_chain->next; - pte_chain_free(pte_chain); - pte_chain = ptec; - } - if (mapcount <= 1) - pte_chain_free(pte_chain); - } - SetPageAnon(page); + ptecount = 0; + ptec = pte_chain; - if (mapcount == 0) - goto out_unlock; - else if (mapcount == 1) { - SetPageDirect(page); - page->pte.direct = 0; - } else - page->pte.chain = pte_chain; + /* + * Arrange for the first pte_chain to be partially filled at + * the top, and the last (and any intermediates) to be full. + */ + index = mapcount % NRPTE; + if (index) + index = NRPTE - index; - index = NRPTE-1; list_for_each_entry(vma, &mapping->i_mmap, shared) { - /* FIXME: unsafe without page_table_lock */ pte = find_pte(vma, page, NULL); if (pte) { + ptecount++; + if (unlikely(ptecount > mapcount)) + goto again; pte_paddr = ptep_to_paddr(pte); pte_unmap(pte); - if (PageDirect(page)) { - page->pte.direct = pte_paddr; - goto out_unlock; - } - pte_chain->ptes[index] = pte_paddr; - if (!--index) { - pte_chain = pte_chain->next; - index = NRPTE-1; + if (ptec) { + ptec->ptes[index] = pte_paddr; + index++; + if (index == NRPTE) { + ptec = ptec->next; + index = 0; + } } } } list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { - /* FIXME: unsafe without page_table_lock */ pte = find_pte(vma, page, NULL); if (pte) { + ptecount++; + if (unlikely(ptecount > mapcount)) + goto again; pte_paddr = ptep_to_paddr(pte); pte_unmap(pte); - if (PageDirect(page)) { - page->pte.direct = pte_paddr; - goto out_unlock; - } - pte_chain->ptes[index] = pte_paddr; - if (!--index) { - pte_chain = pte_chain->next; - index = NRPTE-1; + if (ptec) { + ptec->ptes[index] = pte_paddr; + index++; + if (index == NRPTE) { + ptec = ptec->next; + index = 0; + } } } } + + BUG_ON(ptecount != mapcount); + if (mapcount == 1) { + SetPageDirect(page); + page->pte.direct = pte_paddr; + /* If pte_chain is set, it's all excess to be freed */ + } else { + page->pte.chain = pte_chain; + /* Point pte_chain to any excess to be freed */ + pte_chain = ptec; + BUG_ON(index); + } + +set_anon: + SetPageAnon(page); out_unlock: pte_chain_unlock(page); up(&mapping->i_shared_sem); -out: +out_free: + while (pte_chain) { + ptec = pte_chain->next; + pte_chain_free(pte_chain); + pte_chain = ptec; + } return err; } _