diff -urN vm-ref/mm/page_alloc.c vm/mm/page_alloc.c --- vm-ref/mm/page_alloc.c Tue Sep 18 21:46:07 2001 +++ vm/mm/page_alloc.c Tue Sep 18 15:39:50 2001 @@ -255,7 +255,7 @@ local_pages = ¤t->local_pages; - if (__freed) { + if (__builtin_expect(__freed, 1)) { /* pick from the last inserted so we're lifo */ entry = local_pages->next; do { @@ -380,13 +380,15 @@ if (!z) break; - if (zone_free_pages(z, order) > (gfp_mask & __GFP_HIGH ? z->pages_min / 2 : z->pages_min)) { - page = rmqueue(z, order); - if (page) - return page; - } + page = rmqueue(z, order); + if (page) + return page; } } else { + /* + * Check that no other task is been killed meanwhile, + * in such a case we can succeed the allocation. + */ for (;;) { zone_t *z = *(zone++); if (!z) @@ -733,6 +735,7 @@ zone->lock = SPIN_LOCK_UNLOCKED; zone->zone_pgdat = pgdat; zone->free_pages = 0; + zone->need_balance = 0; if (!size) continue; diff -urN vm-ref/mm/page_io.c vm/mm/page_io.c --- vm-ref/mm/page_io.c Tue Sep 18 21:46:07 2001 +++ vm/mm/page_io.c Tue Sep 18 18:04:21 2001 @@ -78,7 +78,15 @@ if (!wait) { SetPageDecrAfter(page); atomic_inc(&nr_async_pages); - } + } else + /* + * Must hold a reference until after wait_on_page() + * returned or it could be freed by the VM after + * I/O is completed and the page is been unlocked. + * The asynchronous path is fine since it never + * references the page after brw_page(). + */ + page_cache_get(page); /* block_size == PAGE_SIZE/zones_used */ brw_page(rw, page, dev, zones, block_size); @@ -94,6 +102,7 @@ /* This shouldn't happen, but check to be sure. */ if (page_count(page) == 0) printk(KERN_ERR "rw_swap_page: page unused while waiting!\n"); + page_cache_release(page); return 1; } diff -urN vm-ref/mm/swap.c vm/mm/swap.c --- vm-ref/mm/swap.c Tue Sep 18 21:46:07 2001 +++ vm/mm/swap.c Tue Sep 18 21:23:36 2001 @@ -54,6 +54,7 @@ del_page_from_active_list(page); add_page_to_inactive_list(page); } + ClearPageReferenced(page); } void deactivate_page(struct page * page) @@ -72,6 +73,7 @@ del_page_from_inactive_list(page); add_page_to_active_list(page); } + SetPageReferenced(page); } void activate_page(struct page * page) diff -urN vm-ref/mm/vmscan.c vm/mm/vmscan.c --- vm-ref/mm/vmscan.c Tue Sep 18 21:46:07 2001 +++ vm/mm/vmscan.c Tue Sep 18 21:23:49 2001 @@ -47,20 +47,24 @@ { pte_t pte; swp_entry_t entry; + int right_classzone; /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { flush_tlb_page(vma, address); - SetPageReferenced(page); + activate_page(page); return 0; } - - if (!memclass(page->zone, classzone)) + if ((PageInactive(page) || PageActive(page)) && PageReferenced(page)) return 0; if (TryLockPage(page)) return 0; + right_classzone = 1; + if (!memclass(page->zone, classzone)) + right_classzone = 0; + /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty @@ -90,7 +94,7 @@ if (freeable) deactivate_page(page); page_cache_release(page); - return freeable; + return freeable & right_classzone; } } @@ -293,8 +297,10 @@ /* Then, look at the other mm's */ counter = mmlist_nr / priority; do { - if (current->need_resched) + if (__builtin_expect(current->need_resched, 0)) { + __set_current_state(TASK_RUNNING); schedule(); + } spin_lock(&mmlist_lock); mm = swap_mm; @@ -324,20 +330,19 @@ return 0; } -static int FASTCALL(shrink_cache(struct list_head * lru, int * max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask)); -static int shrink_cache(struct list_head * lru, int * max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask) +static int FASTCALL(shrink_cache(struct list_head * lru, int * max_scan, int this_max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask)); +static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask) { - LIST_HEAD(active_local_lru); - LIST_HEAD(inactive_local_lru); struct list_head * entry; int __max_scan = *max_scan; spin_lock(&pagemap_lru_lock); - while (__max_scan && (entry = lru->prev) != lru) { + while (__max_scan && this_max_scan && (entry = lru->prev) != lru) { struct page * page; if (__builtin_expect(current->need_resched, 0)) { spin_unlock(&pagemap_lru_lock); + __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; @@ -348,21 +353,33 @@ if (__builtin_expect(!PageInactive(page) && !PageActive(page), 0)) BUG(); + this_max_scan--; + if (PageTestandClearReferenced(page)) { - if (PageInactive(page)) { - del_page_from_inactive_list(page); - add_page_to_active_list(page); - } else if (PageActive(page)) { + if (!PageSwapCache(page)) { + if (PageInactive(page)) { + del_page_from_inactive_list(page); + add_page_to_active_list(page); + } else if (PageActive(page)) { + list_del(entry); + list_add(entry, &active_list); + } else + BUG(); + } else { list_del(entry); - list_add(entry, &active_list); - } else - BUG(); + list_add(entry, lru); + } continue; } - deactivate_page_nolock(page); - list_del(entry); - list_add_tail(entry, &inactive_local_lru); + if (PageInactive(page)) { + /* just roll it over, no need to update any stat */ + list_del(entry); + list_add(entry, &inactive_list); + } else { + del_page_from_active_list(page); + add_page_to_inactive_list(page); + } if (__builtin_expect(!memclass(page->zone, classzone), 0)) continue; @@ -372,8 +389,6 @@ /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && page_count(page) != 1) { activate_page_nolock(page); - list_del(entry); - list_add_tail(entry, &active_local_lru); continue; } @@ -497,29 +512,48 @@ continue; break; } - - list_splice(&inactive_local_lru, &inactive_list); - list_splice(&active_local_lru, &active_list); spin_unlock(&pagemap_lru_lock); *max_scan = __max_scan; return nr_pages; } +static void refill_inactive(int nr_pages) +{ + struct list_head * entry; + + spin_lock(&pagemap_lru_lock); + entry = active_list.prev; + while (nr_pages-- && entry != &active_list) { + struct page * page; + + page = list_entry(entry, struct page, lru); + entry = entry->prev; + + if (!page->buffers && page_count(page) != 1) + continue; + + del_page_from_active_list(page); + add_page_to_inactive_list(page); + } + spin_unlock(&pagemap_lru_lock); +} + static int FASTCALL(shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)); static int shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages) { - int max_scan = (nr_inactive_pages + nr_active_pages / priority) / priority; + int max_scan = (nr_inactive_pages + nr_active_pages / DEF_PRIORITY) / priority; nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) return 0; - nr_pages = shrink_cache(&inactive_list, &max_scan, nr_pages, classzone, gfp_mask); + refill_inactive(nr_pages / 2); + nr_pages = shrink_cache(&inactive_list, &max_scan, nr_inactive_pages, nr_pages, classzone, gfp_mask); if (nr_pages <= 0) return 0; - nr_pages = shrink_cache(&active_list, &max_scan, nr_pages, classzone, gfp_mask); + nr_pages = shrink_cache(&active_list, &max_scan, nr_active_pages / DEF_PRIORITY, nr_pages, classzone, gfp_mask); if (nr_pages <= 0) return 0; @@ -532,6 +566,7 @@ int try_to_free_pages(zone_t * classzone, unsigned int gfp_mask, unsigned int order) { int priority = DEF_PRIORITY; + int ret = 0; do { int nr_pages = SWAP_CLUSTER_MAX; @@ -539,10 +574,10 @@ if (nr_pages <= 0) return 1; - swap_out(priority, classzone, gfp_mask, SWAP_CLUSTER_MAX); + ret |= swap_out(priority, classzone, gfp_mask, SWAP_CLUSTER_MAX << 2); } while (--priority); - return 0; + return ret; } DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); @@ -567,12 +602,14 @@ for (i = pgdat->nr_zones-1; i >= 0; i--) { zone = pgdat->node_zones + i; - if (current->need_resched) + if (__builtin_expect(current->need_resched, 0)) schedule(); if (!zone->need_balance) continue; if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) { zone->need_balance = 0; + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ*5); continue; } if (check_classzone_need_balance(zone))