diff -urN linux/include/linux/mm.h linux-wli/include/linux/mm.h --- linux/include/linux/mm.h Fri Mar 15 20:15:39 2002 +++ linux-wli/include/linux/mm.h Tue Mar 19 03:29:13 2002 @@ -300,6 +300,18 @@ #define PG_reserved 14 #define PG_launder 15 /* written out by VM pressure.. */ #define PG_nosave 29 +#define PG_chainlock 16 /* lock bit for pte_chain */ + +static inline void pte_chain_lock(struct page *page) +{ + while (test_and_set_bit(PG_chainlock, &(page)->flags)) + cpu_relax(); +} + +static inline void pte_chain_unlock(struct page *page) +{ + clear_bit(PG_chainlock, &(page)->flags); +} /* Make it prettier to test the above... */ #define UnlockPage(page) unlock_page(page) diff -urN linux/mm/filemap.c linux-wli/mm/filemap.c --- linux/mm/filemap.c Fri Mar 15 20:15:39 2002 +++ linux-wli/mm/filemap.c Tue Mar 19 03:00:29 2002 @@ -191,8 +191,10 @@ static void truncate_complete_page(struct page *page) { /* Leave it on the LRU if it gets converted into anonymous buffers */ + pte_chain_lock(page); if (!page->pte_chain && (!page->buffers || do_flushpage(page, 0))) lru_cache_del(page); + pte_chain_unlock(page); /* * We remove the page from the page cache _after_ we have diff -urN linux/mm/page_alloc.c linux-wli/mm/page_alloc.c --- linux/mm/page_alloc.c Tue Mar 19 01:59:25 2002 +++ linux-wli/mm/page_alloc.c Tue Mar 19 03:01:41 2002 @@ -128,8 +128,10 @@ BUG(); if (PageInactiveClean(page)) BUG(); + pte_chain_lock(page); if (page->pte_chain) BUG(); + pte_chain_unlock(page); page->flags &= ~((1<age = PAGE_AGE_START; diff -urN linux/mm/rmap.c linux-wli/mm/rmap.c --- linux/mm/rmap.c Tue Mar 19 01:59:25 2002 +++ linux-wli/mm/rmap.c Tue Mar 19 03:27:19 2002 @@ -13,9 +13,8 @@ /* * Locking: - * - the page->pte_chain is protected by the per zone lru lock, - * we probably want to change this to a per-page lock in the - * future + * - the page->pte_chain is protected by the PG_chainlock bit + * in page->flags * - because swapout locking is opposite to the locking order * in the page fault path, the swapout path uses trylocks * on the mm->page_table_lock @@ -77,7 +76,7 @@ * * Quick test_and_clear_referenced for all mappings to a page, * returns the number of processes which referenced the page. - * Caller needs to hold the lru locks. + * Caller needs to hold the pte_chain lock. */ int FASTCALL(page_referenced(struct page *)); int page_referenced(struct page * page) @@ -116,7 +115,6 @@ zone = page_zone(page); - lock_lru(zone); #ifdef DEBUG_RMAP if (!page || !ptep) BUG(); @@ -126,20 +124,22 @@ BUG(); { struct pte_chain * pc; + pte_chain_lock(page); for (pc = page->pte_chain; pc; pc = pc->next) { if (pc->ptep == ptep) BUG(); } + pte_chain_unlock(page); } #endif pte_chain = pte_chain_alloc(zone); + pte_chain->ptep = ptep; /* Hook up the pte_chain to the page. */ - pte_chain->ptep = ptep; + pte_chain_lock(page); pte_chain->next = page->pte_chain; page->pte_chain = pte_chain; - - unlock_lru(zone); + pte_chain_unlock(page); } /** @@ -164,7 +164,7 @@ zone = page_zone(page); - lock_lru(zone); + pte_chain_lock(page); for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) { if (pc->ptep == ptep) { pte_chain_free(zone, pc, prev_pc, page); @@ -182,7 +182,7 @@ #endif out: - unlock_lru(zone); + pte_chain_unlock(page); return; } @@ -262,8 +262,8 @@ * @page: the page to get unmapped * * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path. Caller must hold the lru locks - * and the page lock. Return values are: + * page, used in the pageout path. Caller must hold the page's + * pte_chain lock and the page lock. Return values are: * * SWAP_SUCCESS - we succeeded in removing all mappings * SWAP_AGAIN - we missed a trylock, try again later @@ -313,7 +313,7 @@ * we make the optimisation of only checking the first process * in the pte_chain list, this should catch hogs while not * evicting pages shared by many processes. - * The caller needs to hold the lru locks. + * The caller needs to hold the page's pte_chain lock. */ int FASTCALL(page_over_rsslimit(struct page *)); int page_over_rsslimit(struct page * page) @@ -341,7 +341,7 @@ * This function unlinks pte_chain from the singly linked list it * may be on and adds the pte_chain to the free list. May also be * called for new pte_chain structures which aren't on any list yet. - * Caller needs to hold the lru lock. + * Caller needs to hold the page's pte_chain lock if page is non-NULL. */ static inline void pte_chain_free( zone_t * zone, struct pte_chain * pte_chain, @@ -363,18 +363,18 @@ * * Returns a pointer to a fresh pte_chain structure. Allocates new * pte_chain structures as required. - * Caller needs to hold the lru locks. + * Caller needs to hold the page's pte_chain lock. */ static inline struct pte_chain * pte_chain_alloc(zone_t *zone) { struct pte_chain * pte_chain; /* Allocate new pte_chain structs as needed. */ + spin_lock(&zone->pte_chain_freelist_lock); if (!zone->pte_chain_freelist) alloc_new_pte_chains(zone); /* Grab the first pte_chain from the freelist. */ - spin_lock(&zone->pte_chain_freelist_lock); pte_chain = pte_chain_pop(zone); spin_unlock(&zone->pte_chain_freelist_lock); diff -urN linux/mm/swap.c linux-wli/mm/swap.c --- linux/mm/swap.c Sat Mar 16 02:26:52 2002 +++ linux-wli/mm/swap.c Tue Mar 19 03:11:31 2002 @@ -107,6 +107,7 @@ } /* Make sure the page really is reclaimable. */ + pte_chain_lock(page); if (!page->mapping || PageDirty(page) || page->pte_chain || page->buffers || page_count(page) > 1) deactivate_page_nolock(page); @@ -122,6 +123,7 @@ add_page_to_inactive_clean_list(page); } } + pte_chain_unlock(page); } /** diff -urN linux/mm/vmscan.c linux-wli/mm/vmscan.c --- linux/mm/vmscan.c Fri Mar 15 20:15:39 2002 +++ linux-wli/mm/vmscan.c Tue Mar 19 03:26:23 2002 @@ -49,6 +49,7 @@ page->age -= min(PAGE_AGE_DECL, (int)page->age); } +/* Must be called with the page's pte_chain lock held */ static inline int page_mapping_inuse(struct page * page) { struct address_space * mapping = page->mapping; @@ -109,13 +110,16 @@ } /* Page cannot be reclaimed ? Move to inactive_dirty list. */ + pte_chain_lock(page); if (unlikely(page->pte_chain || page->buffers || PageReferenced(page) || PageDirty(page) || page_count(page) > 1 || TryLockPage(page))) { del_page_from_inactive_clean_list(page); add_page_to_inactive_dirty_list(page); + pte_chain_unlock(page); continue; } + pte_chain_unlock(page); /* OK, remove the page from the caches. */ if (PageSwapCache(page)) { @@ -242,13 +246,16 @@ * The page is in active use or really unfreeable. Move to * the active list and adjust the page age if needed. */ + pte_chain_lock(page); if (page_referenced(page) && page_mapping_inuse(page) && !page_over_rsslimit(page)) { del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); page->age = max((int)page->age, PAGE_AGE_START); + pte_chain_unlock(page); continue; } + pte_chain_unlock(page); /* * Page is being freed, don't worry about it. @@ -269,6 +276,7 @@ * * XXX: implement swap clustering ? */ + pte_chain_lock(page); if (page->pte_chain && !page->mapping && !page->buffers) { page_cache_get(page); unlock_lru(zone); @@ -278,15 +286,18 @@ page_cache_release(page); lock_lru(zone); continue; + pte_chain_unlock(page); } page_cache_release(page); lock_lru(zone); } + pte_chain_unlock(page); /* * The page is mapped into the page tables of one or more * processes. Try to unmap it here. */ + pte_chain_lock(page); if (page->pte_chain) { switch (try_to_unmap(page)) { case SWAP_ERROR: @@ -299,6 +310,7 @@ ; /* try to free the page below */ } } + pte_chain_unlock(page); mapping = page->mapping; if (PageDirty(page) && mapping) { @@ -386,6 +398,7 @@ * This test is not safe from races, but only the one * in reclaim_page() needs to be. */ + pte_chain_lock(page); if (mapping && !PageDirty(page) && !page->pte_chain && page_count(page) == 1) { del_page_from_inactive_dirty_list(page); @@ -403,6 +416,7 @@ add_page_to_active_list(page); UnlockPage(page); } + pte_chain_unlock(page); } unlock_lru(zone); @@ -472,33 +486,41 @@ * bother with page aging. If the page is touched again * while on the inactive_clean list it'll be reactivated. */ + pte_chain_lock(page); if (!page_mapping_inuse(page)) { + pte_chain_unlock(page); drop_page_zone(zone, page); continue; } + pte_chain_unlock(page); /* * Do aging on the pages. */ + pte_chain_lock(page); if (page_referenced(page)) { age_page_up(page); } else { age_page_down(page); } + pte_chain_unlock(page); /* * If the page age is 'hot' and the process using the * page doesn't exceed its RSS limit we keep the page. * Otherwise we move it to the inactive_dirty list. */ + pte_chain_lock(page); if (page->age && !page_over_rsslimit(page)) { list_del(page_lru); list_add(page_lru, &zone->active_list); } else { deactivate_page_nolock(page); if (++nr_deactivated > target) + pte_chain_unlock(page); break; } + pte_chain_unlock(page); /* Low latency reschedule point */ if (current->need_resched) {