diff -urN linux/Documentation/vm/locking linux-wli/Documentation/vm/locking --- linux/Documentation/vm/locking Mon Mar 19 12:35:11 2001 +++ linux-wli/Documentation/vm/locking Fri Mar 15 23:59:01 2002 @@ -70,7 +70,7 @@ c_spinlock spinlocks. This is okay, since code that holds i_shared_lock never asks for memory, and the kmem code asks for pages after dropping c_spinlock. The page_table_lock also nests with pagecache_lock and -pagemap_lru_lock spinlocks, and no code asks for memory with these locks +the lru locks spinlocks, and no code asks for memory with these locks held. The page_table_lock is grabbed while holding the kernel_lock spinning monitor. diff -urN linux/include/linux/brlock.h linux-wli/include/linux/brlock.h --- linux/include/linux/brlock.h Thu Nov 22 11:46:19 2001 +++ linux-wli/include/linux/brlock.h Sat Mar 16 00:03:37 2002 @@ -37,6 +37,7 @@ BR_GLOBALIRQ_LOCK, BR_NETPROTO_LOCK, BR_LLC_LOCK, + BR_PAGE_LRU, __BR_END }; diff -urN linux/include/linux/mm.h linux-wli/include/linux/mm.h --- linux/include/linux/mm.h Fri Mar 15 23:47:14 2002 +++ linux-wli/include/linux/mm.h Sat Mar 16 00:03:47 2002 @@ -158,7 +158,7 @@ unsigned long flags; /* atomic flags, some possibly updated asynchronously */ struct list_head lru; /* Pageout list, eg. active_list; - protected by pagemap_lru_lock !! */ + protected by lru_locks !! */ unsigned char age; /* Page aging counter. */ unsigned char zone; /* Memory zone the page belongs to. */ struct pte_chain * pte_chain; /* Reverse pte mapping pointer. */ @@ -268,7 +268,7 @@ * * Note that the referenced bit, the page->lru list_head and the * active, inactive_dirty and inactive_clean lists are protected by - * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit! + * the lru_locks, and *NOT* by the usual PG_locked bit! * * PG_skip is used on sparc/sparc64 architectures to "skip" certain * parts of the address space. --- linux/include/linux/mm_inline.h Fri Mar 15 23:47:14 2002 +++ linux-wli/include/linux/mm_inline.h Sat Mar 16 00:04:08 2002 @@ -2,7 +2,7 @@ #define _LINUX_VM_INLINE_H #include - +#include /* * Knuth recommends primes in approximately golden ratio to the maximum * integer representable by a machine word for multiplicative hashing. @@ -306,6 +306,39 @@ fixup_freespace(zone, 1); } else SetPageReferenced(page); +} + +/* + * Aquires the lru list locks. lru_lock() will either provide exclusion + * for the specified zone, or, in case of ALL_ZONES, will provide + * exclusion for all zones at once. + * Looks expensive but the if will be optimized out at compile time + */ + +static inline void lock_lru(struct zone_struct *zone) +{ + if (zone) { + br_read_lock(BR_PAGE_LRU); + spin_lock(&zone->lru_lock); + } else { + br_write_lock(BR_PAGE_LRU); + } +} + +/* + * Releases the lru list locks. lru_unlock() *must* be called with the + * same argument as the corresponding lock_lru() is called + * Looks expensive but the if will be optimized out at compile time + */ + +static inline void unlock_lru(struct zone_struct *zone) +{ + if (zone) { + spin_unlock(&zone->lru_lock); + br_read_unlock(BR_PAGE_LRU); + } else { + br_write_unlock(BR_PAGE_LRU); + } } #endif diff -urN linux/include/linux/mmzone.h linux-wli/include/linux/mmzone.h --- linux/include/linux/mmzone.h Fri Mar 15 23:47:14 2002 +++ linux-wli/include/linux/mmzone.h Sat Mar 16 00:03:40 2002 @@ -39,6 +39,7 @@ * Commonly accessed fields: */ spinlock_t lock; + spinlock_t lru_lock; unsigned long free_pages; unsigned long active_pages; unsigned long inactive_dirty_pages; diff -urN linux/include/linux/swap.h linux-wli/include/linux/swap.h --- linux/include/linux/swap.h Fri Mar 15 23:47:14 2002 +++ linux-wli/include/linux/swap.h Sat Mar 16 00:03:38 2002 @@ -123,6 +123,7 @@ extern void FASTCALL(deactivate_page(struct page *)); extern void FASTCALL(deactivate_page_nolock(struct page *)); extern void FASTCALL(drop_page(struct page *)); +extern void FASTCALL(drop_page_zone(struct zone_struct *zone, struct page *)); extern void swap_setup(void); @@ -179,8 +180,6 @@ asmlinkage long sys_swapoff(const char *); asmlinkage long sys_swapon(const char *, int); -extern spinlock_t pagemap_lru_lock; - /* * Page aging defines. These seem to work great in FreeBSD, * no need to reinvent the wheel. @@ -192,7 +191,7 @@ /* * List add/del helper macros. These must be called - * with the pagemap_lru_lock held! + * with the lru_lock held! */ #define DEBUG_LRU_PAGE(page) \ do { \ diff -urN linux/mm/filemap.c linux-wli/mm/filemap.c --- linux/mm/filemap.c Fri Mar 15 23:47:15 2002 +++ linux-wli/mm/filemap.c Fri Mar 15 23:59:01 2002 @@ -56,9 +56,8 @@ /* * Lock ordering: - * pagemap_lru_lock ==> page_lock ==> i_shared_lock + * lru_locks ==> page_lock ==> i_shared_lock */ -spinlock_t pagemap_lru_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* * Remove a page from the page cache and free it. Caller has to make @@ -137,7 +136,7 @@ head = &mapping->clean_pages; - spin_lock(&pagemap_lru_lock); + lock_lru(ALL_ZONES); spin_lock(&mapping->page_lock); curr = head->next; @@ -170,7 +169,7 @@ } spin_unlock(&mapping->page_lock); - spin_unlock(&pagemap_lru_lock); + unlock_lru(ALL_ZONES); } static int do_flushpage(struct page *page, unsigned long offset) @@ -976,7 +975,7 @@ * been increased since the last time we were called, we * stop when the page isn't there. */ - spin_lock(&pagemap_lru_lock); + lock_lru(ALL_ZONES); while (--index >= start) { spin_lock(&mapping->page_lock); page = radix_tree_lookup(&mapping->page_tree, index); @@ -985,7 +984,7 @@ break; drop_page(page); } - spin_unlock(&pagemap_lru_lock); + unlock_lru(ALL_ZONES); } /* Same as grab_cache_page, but do not wait if the page is unavailable. diff -urN linux/mm/page_alloc.c linux-wli/mm/page_alloc.c --- linux/mm/page_alloc.c Fri Mar 15 23:47:15 2002 +++ linux-wli/mm/page_alloc.c Fri Mar 15 23:59:01 2002 @@ -954,6 +954,7 @@ INIT_LIST_HEAD(&zone->active_list); INIT_LIST_HEAD(&zone->inactive_dirty_list); INIT_LIST_HEAD(&zone->inactive_clean_list); + spin_lock_init(&zone->lru_lock); if (!size) continue; diff -urN linux/mm/rmap.c linux-wli/mm/rmap.c --- linux/mm/rmap.c Fri Mar 15 23:47:15 2002 +++ linux-wli/mm/rmap.c Fri Mar 15 23:59:01 2002 @@ -13,7 +13,7 @@ /* * Locking: - * - the page->pte_chain is protected by the pagemap_lru_lock, + * - the page->pte_chain is protected by the per zone lru lock, * we probably want to change this to a per-page lock in the * future * - because swapout locking is opposite to the locking order @@ -21,6 +21,7 @@ * on the mm->page_table_lock */ #include +#include #include #include @@ -59,7 +60,7 @@ * * Quick test_and_clear_referenced for all mappings to a page, * returns the number of processes which referenced the page. - * Caller needs to hold the pagemap_lru_lock. + * Caller needs to hold the lru locks. */ int FASTCALL(page_referenced(struct page *)); int page_referenced(struct page * page) @@ -95,7 +96,7 @@ if (!VALID_PAGE(page) || PageReserved(page)) return; - spin_lock(&pagemap_lru_lock); + lock_lru(page_zone(page)); #ifdef DEBUG_RMAP if (!page || !ptep) BUG(); @@ -118,7 +119,7 @@ pte_chain->next = page->pte_chain; page->pte_chain = pte_chain; - spin_unlock(&pagemap_lru_lock); + unlock_lru(page_zone(page)); } /** @@ -140,7 +141,7 @@ if (!VALID_PAGE(page) || PageReserved(page)) return; - spin_lock(&pagemap_lru_lock); + lock_lru(page_zone(page)); for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) { if (pc->ptep == ptep) { pte_chain_free(pc, prev_pc, page); @@ -158,7 +159,7 @@ #endif out: - spin_unlock(&pagemap_lru_lock); + unlock_lru(page_zone(page)); return; } @@ -172,7 +173,7 @@ * table entry mapping a page. Because locking order here is opposite * to the locking order used by the page fault path, we use trylocks. * Locking: - * pagemap_lru_lock page_launder() + * lru_lock page_launder() * page lock page_launder(), trylock * mm->page_table_lock try_to_unmap_one(), trylock */ @@ -238,7 +239,7 @@ * @page: the page to get unmapped * * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path. Caller must hold pagemap_lru_lock + * page, used in the pageout path. Caller must hold the lru locks * and the page lock. Return values are: * * SWAP_SUCCESS - we succeeded in removing all mappings @@ -289,7 +290,7 @@ * we make the optimisation of only checking the first process * in the pte_chain list, this should catch hogs while not * evicting pages shared by many processes. - * The caller needs to hold the pagemap_lru_lock. + * The caller needs to hold the lru locks. */ int FASTCALL(page_over_rsslimit(struct page *)); int page_over_rsslimit(struct page * page) @@ -317,7 +318,7 @@ * This function unlinks pte_chain from the singly linked list it * may be on and adds the pte_chain to the free list. May also be * called for new pte_chain structures which aren't on any list yet. - * Caller needs to hold the pagemap_lru_list. + * Caller needs to hold the lru lock. */ static inline void pte_chain_free(struct pte_chain * pte_chain, struct pte_chain * prev_pte_chain, struct page * page) { @@ -336,7 +337,7 @@ * * Returns a pointer to a fresh pte_chain structure. Allocates new * pte_chain structures as required. - * Caller needs to hold the pagemap_lru_lock. + * Caller needs to hold the lru locks. */ static inline struct pte_chain * pte_chain_alloc(void) { diff -urN linux/mm/swap.c linux-wli/mm/swap.c --- linux/mm/swap.c Fri Mar 15 23:47:15 2002 +++ linux-wli/mm/swap.c Fri Mar 15 23:59:01 2002 @@ -50,7 +50,6 @@ /** * (de)activate_page - move pages from/to active and inactive lists * @page: the page we want to move - * @nolock - are we already holding the pagemap_lru_lock? * * Deactivate_page will move an active page to the right * inactive list, while activate_page will move a page back @@ -76,9 +75,53 @@ void FASTCALL(deactivate_page(struct page *)); void deactivate_page(struct page * page) { - spin_lock(&pagemap_lru_lock); + lock_lru(page_zone(page)); deactivate_page_nolock(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(page_zone(page)); +} + +/** + * drop_page_zone - like deactivate_page, but try inactive_clean list + * @zone: zone for the page + * @page: the page to drop + * + * Try to move a page to the inactive_clean list, this succeeds if the + * page is clean and not in use by anybody. If the page cannot be placed + * on the inactive_clean list it is placed on the inactive_dirty list + * instead. + * + * Note: this function gets called with the zone specific lru lock held. + */ +void FASTCALL(drop_page_zone(struct zone_struct *zone, struct page *)); +void drop_page_zone(struct zone_struct *zone, struct page * page) +{ + if (!TryLockPage(page)) { + if (page->mapping && page->buffers) { + page_cache_get(page); + unlock_lru(zone); + try_to_release_page(page, GFP_NOIO); + lock_lru(zone); + page_cache_release(page); + } + UnlockPage(page); + } + + /* Make sure the page really is reclaimable. */ + if (!page->mapping || PageDirty(page) || page->pte_chain || + page->buffers || page_count(page) > 1) + deactivate_page_nolock(page); + + else if (page_count(page) == 1) { + ClearPageReferenced(page); + page->age = 0; + if (PageActive(page)) { + del_page_from_active_list(page); + add_page_to_inactive_clean_list(page); + } else if (PageInactiveDirty(page)) { + del_page_from_inactive_dirty_list(page); + add_page_to_inactive_clean_list(page); + } + } } /** @@ -90,7 +133,7 @@ * on the inactive_clean list it is placed on the inactive_dirty list * instead. * - * Note: this function gets called with the pagemap_lru_lock held. + * Note: this function gets called with the ALL_ZONES lru lock held. */ void FASTCALL(drop_page(struct page *)); void drop_page(struct page * page) @@ -98,9 +141,9 @@ if (!TryLockPage(page)) { if (page->mapping && page->buffers) { page_cache_get(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(ALL_ZONES); try_to_release_page(page, GFP_NOIO); - spin_lock(&pagemap_lru_lock); + lock_lru(ALL_ZONES); page_cache_release(page); } UnlockPage(page); @@ -145,9 +188,9 @@ void FASTCALL(activate_page(struct page *)); void activate_page(struct page * page) { - spin_lock(&pagemap_lru_lock); + lock_lru(page_zone(page)); activate_page_nolock(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(page_zone(page)); } /** @@ -158,9 +201,9 @@ void lru_cache_add(struct page * page) { if (!PageLRU(page)) { - spin_lock(&pagemap_lru_lock); + lock_lru(page_zone(page)); add_page_to_active_list(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(page_zone(page)); } } @@ -169,7 +212,7 @@ * @page: the page to add * * This function is for when the caller already holds - * the pagemap_lru_lock. + * the lru locks. */ void FASTCALL(__lru_cache_del(struct page *)); void __lru_cache_del(struct page * page) @@ -190,9 +233,9 @@ void FASTCALL(lru_cache_del(struct page *)); void lru_cache_del(struct page * page) { - spin_lock(&pagemap_lru_lock); + lock_lru(page_zone(page)); __lru_cache_del(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(page_zone(page)); } /* diff -urN linux/mm/vmscan.c linux-wli/mm/vmscan.c --- linux/mm/vmscan.c Fri Mar 15 23:47:15 2002 +++ linux-wli/mm/vmscan.c Fri Mar 15 23:59:01 2002 @@ -88,7 +88,7 @@ * We need to hold the page_lock around all tests to make sure * reclaim_page() cannot race with find_get_page() and friends. */ - spin_lock(&pagemap_lru_lock); + lock_lru(zone); maxscan = zone->inactive_clean_pages; while (maxscan-- && !list_empty(&zone->inactive_clean_list)) { page_lru = zone->inactive_clean_list.prev; @@ -141,13 +141,13 @@ unlock: spin_unlock(&mapping->page_lock); } - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); return NULL; found_page: del_page_from_inactive_clean_list(page); spin_unlock(&mapping->page_lock); - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); if (entry.val) swap_free(entry); UnlockPage(page); @@ -213,16 +213,16 @@ cleaned_pages = 0; /* The main launder loop. */ - spin_lock(&pagemap_lru_lock); + lock_lru(zone); maxscan = zone->inactive_dirty_pages >> priority; while (maxscan-- && !list_empty(&zone->inactive_dirty_list)) { struct page * page; /* Low latency reschedule point */ if (current->need_resched) { - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); schedule(); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); continue; } @@ -277,16 +277,16 @@ */ if (page->pte_chain && !page->mapping && !page->buffers) { page_cache_get(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); if (!add_to_swap(page)) { activate_page(page); UnlockPage(page); page_cache_release(page); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); continue; } page_cache_release(page); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); } /* @@ -322,12 +322,12 @@ ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); writepage(page); page_cache_release(page); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); continue; } } @@ -338,7 +338,7 @@ * the page as well. */ if (page->buffers) { - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); /* To avoid freeing our page before we're done. */ page_cache_get(page); @@ -351,7 +351,7 @@ * the LRU, so we unlock the page after * taking the lru lock */ - spin_lock(&pagemap_lru_lock); + lock_lru(zone); UnlockPage(page); __lru_cache_del(page); @@ -369,14 +369,14 @@ */ page_cache_release(page); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); continue; } } @@ -409,7 +409,7 @@ UnlockPage(page); } } - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); /* Return the number of pages moved to the inactive_clean list. */ return cleaned_pages; @@ -459,7 +459,7 @@ struct page * page; /* Take the lock while messing with the list... */ - spin_lock(&pagemap_lru_lock); + lock_lru(zone); while (maxscan-- && !list_empty(&zone->active_list)) { page_lru = zone->active_list.prev; page = list_entry(page_lru, struct page, lru); @@ -478,7 +478,7 @@ * while on the inactive_clean list it'll be reactivated. */ if (!page_mapping_inuse(page)) { - drop_page(page); + drop_page_zone(zone, page); continue; } @@ -507,12 +507,12 @@ /* Low latency reschedule point */ if (current->need_resched) { - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); schedule(); - spin_lock(&pagemap_lru_lock); + lock_lru(zone); } } - spin_unlock(&pagemap_lru_lock); + unlock_lru(zone); return nr_deactivated; }