From Dave. Crappy name. fs/exec.c | 1 include/asm-i386/mman.h | 0 include/asm-ppc64/mman.h | 0 include/linux/mm.h | 1 include/linux/page-flags.h | 5 include/linux/rmap-locking.h | 2 mm/fremap.c | 11 + mm/memory.c | 8 mm/mmap.c | 0 mm/page_alloc.c | 2 mm/rmap.c | 380 +++++++++++++++++++++++++++++++++++++++++++ mm/swapfile.c | 1 12 files changed, 411 insertions(+) diff -puN fs/exec.c~objrmap-2.5.62-5 fs/exec.c --- 25/fs/exec.c~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/fs/exec.c 2003-03-29 16:38:04.000000000 -0800 @@ -315,6 +315,7 @@ void put_dirty_page(struct task_struct * lru_cache_add_active(page); flush_dcache_page(page); flush_page_to_ram(page); + SetPageAnon(page); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY)))); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); diff -puN include/asm-i386/mman.h~objrmap-2.5.62-5 include/asm-i386/mman.h diff -puN include/linux/mm.h~objrmap-2.5.62-5 include/linux/mm.h --- 25/include/linux/mm.h~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/include/linux/mm.h 2003-03-29 16:38:04.000000000 -0800 @@ -170,6 +170,7 @@ struct page { struct pte_chain *chain;/* Reverse pte mapping pointer. * protected by PG_chainlock */ pte_addr_t direct; + int mapcount; } pte; unsigned long private; /* mapping-private opaque data */ diff -puN include/linux/page-flags.h~objrmap-2.5.62-5 include/linux/page-flags.h --- 25/include/linux/page-flags.h~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/include/linux/page-flags.h 2003-03-29 16:38:04.000000000 -0800 @@ -74,6 +74,7 @@ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_reclaim 18 /* To be reclaimed asap */ #define PG_compound 19 /* Part of a compound page */ +#define PG_anon 20 /* Anonymous page */ /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -255,6 +256,10 @@ extern void get_full_page_state(struct p #define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) #define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) +#define PageAnon(page) test_bit(PG_anon, &(page)->flags) +#define SetPageAnon(page) set_bit(PG_anon, &(page)->flags) +#define ClearPageAnon(page) clear_bit(PG_anon, &(page)->flags) + /* * The PageSwapCache predicate doesn't use a PG_flag at this time, * but it may again do so one day. diff -puN mm/fremap.c~objrmap-2.5.62-5 mm/fremap.c --- 25/mm/fremap.c~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/mm/fremap.c 2003-03-29 16:38:04.000000000 -0800 @@ -60,10 +60,21 @@ int install_page(struct mm_struct *mm, s pgd_t *pgd; pmd_t *pmd; struct pte_chain *pte_chain; + unsigned long pgidx; pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) goto err; + + /* + * Convert this page to anon for objrmap if it's nonlinear + */ + pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; + pgidx += vma->vm_pgoff; + pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; + if (!PageAnon(page) && (page->index != pgidx)) + page_convert_anon(page); + pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); diff -puN mm/memory.c~objrmap-2.5.62-5 mm/memory.c --- 25/mm/memory.c~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/mm/memory.c 2003-03-29 16:38:04.000000000 -0800 @@ -1012,6 +1012,7 @@ static int do_wp_page(struct mm_struct * ++mm->rss; page_remove_rmap(old_page, page_table); break_cow(vma, new_page, address, page_table); + SetPageAnon(new_page); pte_chain = page_add_rmap(new_page, page_table, pte_chain); lru_cache_add_active(new_page); @@ -1221,6 +1222,7 @@ static int do_swap_page(struct mm_struct flush_page_to_ram(page); flush_icache_page(vma, page); set_pte(page_table, pte); + SetPageAnon(page); pte_chain = page_add_rmap(page, page_table, pte_chain); /* No need to invalidate - it was non-present before */ @@ -1287,6 +1289,7 @@ do_anonymous_page(struct mm_struct *mm, entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add_active(page); mark_page_accessed(page); + SetPageAnon(page); } set_pte(page_table, entry); @@ -1346,6 +1349,10 @@ do_no_page(struct mm_struct *mm, struct if (!pte_chain) goto oom; + /* See if nopage returned an anon page */ + if (!new_page->mapping || PageSwapCache(new_page)) + SetPageAnon(new_page); + /* * Should we do an early C-O-W break? */ @@ -1358,6 +1365,7 @@ do_no_page(struct mm_struct *mm, struct copy_user_highpage(page, new_page, address); page_cache_release(new_page); lru_cache_add_active(page); + SetPageAnon(page); new_page = page; } diff -puN mm/mmap.c~objrmap-2.5.62-5 mm/mmap.c diff -puN mm/page_alloc.c~objrmap-2.5.62-5 mm/page_alloc.c --- 25/mm/page_alloc.c~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/mm/page_alloc.c 2003-03-29 16:38:04.000000000 -0800 @@ -220,6 +220,8 @@ static inline void free_pages_check(cons bad_page(function, page); if (PageDirty(page)) ClearPageDirty(page); + if (PageAnon(page)) + ClearPageAnon(page); } /* diff -puN mm/rmap.c~objrmap-2.5.62-5 mm/rmap.c --- 25/mm/rmap.c~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/mm/rmap.c 2003-03-29 16:38:04.000000000 -0800 @@ -77,6 +77,135 @@ kmem_cache_t *pte_chain_cache; **/ /** + * find_pte - Find a pte pointer given a vma and a struct page. + * @vma: the vma to search + * @page: the page to find + * + * Determine if this page is mapped in this vma. If it is, map and rethrn + * the pte pointer associated with it. Return null if the page is not + * mapped in this vma for any reason. + * + * This is strictly an internal helper function for the object-based rmap + * functions. + * + * It is the caller's responsibility to unmap the pte if it is returned. + */ +static inline pte_t * +find_pte(struct vm_area_struct *vma, struct page *page, unsigned long *addr) +{ + struct mm_struct *mm = vma->vm_mm; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long loffset; + unsigned long address; + + loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)); + if (loffset < vma->vm_pgoff) + goto out; + + address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT); + + if (address >= vma->vm_end) + goto out; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out; + + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) + goto out; + + pte = pte_offset_map(pmd, address); + if (!pte_present(*pte)) + goto out_unmap; + + if (page_to_pfn(page) != pte_pfn(*pte)) + goto out_unmap; + + if (addr) + *addr = address; + + return pte; + +out_unmap: + pte_unmap(pte); +out: + return NULL; +} + +/** + * page_referenced_obj_one - referenced check for object-based rmap + * @vma: the vma to look in. + * @page: the page we're working on. + * + * Find a pte entry for a page/vma pair, then check and clear the referenced + * bit. + * + * This is strictly a helper function for page_referenced_obj. + */ +static int +page_referenced_obj_one(struct vm_area_struct *vma, struct page *page) +{ + pte_t *pte; + int referenced = 0; + + pte = find_pte(vma, page, NULL); + if (pte) { + if (ptep_test_and_clear_young(pte)) + referenced++; + pte_unmap(pte); + } + + return referenced; +} + +/** + * page_referenced_obj_one - referenced check for object-based rmap + * @page: the page we're checking references on. + * + * For an object-based mapped page, find all the places it is mapped and + * check/clear the referenced flag. This is done by following the page->mapping + * pointer, then walking the chain of vmas it holds. It returns the number + * of references it found. + * + * This function is only called from page_referenced for object-based pages. + * + * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, + * assume a reference count of 1. + */ +static int +page_referenced_obj(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct vm_area_struct *vma; + int referenced = 0; + + if (!page->pte.mapcount) + return 0; + + if (!mapping) + BUG(); + + if (PageSwapCache(page)) + BUG(); + + if (down_trylock(&mapping->i_shared_sem)) + return 1; + + list_for_each_entry(vma, &mapping->i_mmap, shared) + referenced += page_referenced_obj_one(vma, page); + + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) + referenced += page_referenced_obj_one(vma, page); + + up(&mapping->i_shared_sem); + + return referenced; +} + +/** * page_referenced - test if the page was referenced * @page: the page to test * @@ -95,6 +224,10 @@ int page_referenced(struct page * page) if (TestClearPageReferenced(page)) referenced++; + if (!PageAnon(page)) { + referenced += page_referenced_obj(page); + goto out; + } if (PageDirect(page)) { pte_t *pte = rmap_ptep_map(page->pte.direct); if (ptep_test_and_clear_young(pte)) @@ -128,6 +261,7 @@ int page_referenced(struct page * page) __pte_chain_free(pc); } } +out: return referenced; } @@ -160,6 +294,22 @@ page_add_rmap(struct page *page, pte_t * pte_chain_lock(page); + /* + * If this is an object-based page, just count it. We can + * find the mappings by walking the object vma chain for that object. + */ + if (!PageAnon(page)) { + if (!page->mapping) + BUG(); + if (PageSwapCache(page)) + BUG(); + if (!page->pte.mapcount) + inc_page_state(nr_mapped); + page->pte.mapcount++; + pte_chain_unlock(page); + return pte_chain; + } + #ifdef DEBUG_RMAP /* * This stuff needs help to get up to highmem speed. @@ -248,6 +398,24 @@ void page_remove_rmap(struct page * page pte_chain_lock(page); + /* + * If this is an object-based page, just uncount it. We can + * find the mappings by walking the object vma chain for that object. + */ + if (!PageAnon(page)) { + if (!page->mapping) + BUG(); + if (PageSwapCache(page)) + BUG(); + if (!page->pte.mapcount) + BUG(); + page->pte.mapcount--; + if (!page->pte.mapcount) + dec_page_state(nr_mapped); + pte_chain_unlock(page); + return; + } + if (PageDirect(page)) { if (page->pte.direct == pte_paddr) { page->pte.direct = 0; @@ -312,6 +480,101 @@ out: } /** + * try_to_unmap_obj - unmap a page using the object-based rmap method + * @page: the page to unmap + * + * Determine whether a page is mapped in a given vma and unmap it if it's found. + * + * This function is strictly a helper function for try_to_unmap_obj. + */ +static inline int +try_to_unmap_obj_one(struct vm_area_struct *vma, struct page *page) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; + pte_t pteval; + int ret = SWAP_SUCCESS; + + pte = find_pte(vma, page, &address); + if (!pte) + goto out; + + if (vma->vm_flags & VM_LOCKED) { + ret = SWAP_FAIL; + goto out_unmap; + } + + flush_cache_page(vma, address); + pteval = ptep_get_and_clear(pte); + flush_tlb_page(vma, address); + + if (pte_dirty(pteval)) + set_page_dirty(page); + + if (!page->pte.mapcount) + BUG(); + + mm->rss--; + page->pte.mapcount--; + page_cache_release(page); + +out_unmap: + pte_unmap(pte); + +out: + return ret; +} + +/** + * try_to_unmap_obj - unmap a page using the object-based rmap method + * @page: the page to unmap + * + * Find all the mappings of a page using the mapping pointer and the vma chains + * contained in the address_space struct it points to. + * + * This function is only called from try_to_unmap for object-based pages. + * + * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, + * return a temporary error. + */ +static int +try_to_unmap_obj(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct vm_area_struct *vma; + int ret = SWAP_SUCCESS; + + if (!mapping) + BUG(); + + if (PageSwapCache(page)) + BUG(); + + if (down_trylock(&mapping->i_shared_sem)) + return SWAP_AGAIN; + + list_for_each_entry(vma, &mapping->i_mmap, shared) { + ret = try_to_unmap_obj_one(vma, page); + if (ret != SWAP_SUCCESS) + goto out; + } + + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + ret = try_to_unmap_obj_one(vma, page); + if (ret != SWAP_SUCCESS) + goto out; + } + + if (page->pte.mapcount) + BUG(); + +out: + up(&mapping->i_shared_sem); + return ret; +} + +/** * try_to_unmap_one - worker function for try_to_unmap * @page: page to unmap * @ptep: page table entry to unmap from page @@ -431,6 +694,15 @@ int try_to_unmap(struct page * page) if (!page->mapping) BUG(); + /* + * If it's an object-based page, use the object vma chain to find all + * the mappings. + */ + if (!PageAnon(page)) { + ret = try_to_unmap_obj(page); + goto out; + } + if (PageDirect(page)) { ret = try_to_unmap_one(page, page->pte.direct); if (ret == SWAP_SUCCESS) { @@ -492,6 +764,114 @@ out: } /** + * page_convert_anon - Convert an object-based mapped page to pte_chain-based. + * @page: the page to convert + * + * Find all the mappings for an object-based page and convert them + * to 'anonymous', ie create a pte_chain and store all the pte pointers there. + * + * This function takes the address_space->i_shared_sem and the pte_chain_lock + * for the page. It jumps through some hoops to preallocate the correct number + * of pte_chain structures to ensure that it can complete without releasing + * the lock. + */ +void page_convert_anon(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct vm_area_struct *vma; + struct pte_chain *pte_chain = NULL, *ptec; + pte_t *pte; + pte_addr_t pte_paddr; + int mapcount; + int index = 0; + + if (PageAnon(page)) + goto out; + +retry: + /* + * Preallocate the pte_chains outside the lock. + */ + mapcount = page->pte.mapcount; + if (mapcount > 1) { + for (; index < mapcount; index += NRPTE) { + ptec = pte_chain_alloc(GFP_KERNEL); + ptec->next = pte_chain; + pte_chain = ptec; + } + } + down(&mapping->i_shared_sem); + pte_chain_lock(page); + + /* + * Check to make sure the number of mappings didn't change. If they + * did, either retry or free enough pte_chains to compensate. + */ + if (mapcount < page->pte.mapcount) { + pte_chain_unlock(page); + goto retry; + } else if ((mapcount > page->pte.mapcount) && (mapcount > 1)) { + mapcount = page->pte.mapcount; + while ((index - NRPTE) > mapcount) { + index -= NRPTE; + ptec = pte_chain->next; + pte_chain_free(pte_chain); + pte_chain = ptec; + } + if (mapcount <= 1) + pte_chain_free(pte_chain); + } + SetPageAnon(page); + + if (mapcount == 0) + goto out; + else if (mapcount == 1) { + SetPageDirect(page); + page->pte.direct = 0; + } else + page->pte.chain = pte_chain; + + index = NRPTE-1; + list_for_each_entry(vma, &mapping->i_mmap, shared) { + pte = find_pte(vma, page, NULL); + if (pte) { + pte_paddr = ptep_to_paddr(pte); + pte_unmap(pte); + if (PageDirect(page)) { + page->pte.direct = pte_paddr; + goto out_unlock; + } + pte_chain->ptes[index] = pte_paddr; + if (!--index) { + pte_chain = pte_chain->next; + index = NRPTE-1; + } + } + } + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + pte = find_pte(vma, page, NULL); + if (pte) { + pte_paddr = ptep_to_paddr(pte); + pte_unmap(pte); + if (PageDirect(page)) { + page->pte.direct = pte_paddr; + goto out_unlock; + } + pte_chain->ptes[index] = pte_paddr; + if (!--index) { + pte_chain = pte_chain->next; + index = NRPTE-1; + } + } + } +out_unlock: + pte_chain_unlock(page); + up(&mapping->i_shared_sem); +out: + return; +} + +/** ** No more VM stuff below this comment, only pte_chain helper ** functions. **/ diff -puN mm/swapfile.c~objrmap-2.5.62-5 mm/swapfile.c --- 25/mm/swapfile.c~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/mm/swapfile.c 2003-03-29 16:38:04.000000000 -0800 @@ -392,6 +392,7 @@ unuse_pte(struct vm_area_struct *vma, un return; get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); + SetPageAnon(page); *pte_chainp = page_add_rmap(page, dir, *pte_chainp); swap_free(entry); ++vma->vm_mm->rss; diff -puN include/asm-ppc64/mman.h~objrmap-2.5.62-5 include/asm-ppc64/mman.h diff -puN include/linux/rmap-locking.h~objrmap-2.5.62-5 include/linux/rmap-locking.h --- 25/include/linux/rmap-locking.h~objrmap-2.5.62-5 2003-03-29 16:38:04.000000000 -0800 +++ 25-akpm/include/linux/rmap-locking.h 2003-03-29 16:38:04.000000000 -0800 @@ -45,3 +45,5 @@ static inline void pte_chain_free(struct if (pte_chain) __pte_chain_free(pte_chain); } + +void page_convert_anon(struct page *page); _