diff -urN vm-ref/fs/buffer.c vm/fs/buffer.c --- vm-ref/fs/buffer.c Tue Oct 16 06:50:28 2001 +++ vm/fs/buffer.c Tue Oct 16 06:50:42 2001 @@ -2388,11 +2388,8 @@ ll_rw_block(WRITE, 1, &p); tryagain = 0; } else if (buffer_locked(p)) { - if (gfp_mask & __GFP_WAIT) { - wait_on_buffer(p); - tryagain = 1; - } else - tryagain = 0; + wait_on_buffer(p); + tryagain = 1; } } else tryagain = 0; diff -urN vm-ref/mm/highmem.c vm/mm/highmem.c --- vm-ref/mm/highmem.c Tue Oct 16 06:50:28 2001 +++ vm/mm/highmem.c Tue Oct 16 06:50:40 2001 @@ -328,7 +328,6 @@ struct list_head *tmp; struct page *page; -repeat_alloc: page = alloc_page(GFP_NOHIGHIO); if (page) return page; @@ -338,6 +337,7 @@ */ wakeup_bdflush(); +repeat_alloc: /* * Try to allocate from the emergency pool. */ @@ -366,7 +366,6 @@ struct list_head *tmp; struct buffer_head *bh; -repeat_alloc: bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); if (bh) return bh; @@ -376,6 +375,7 @@ */ wakeup_bdflush(); +repeat_alloc: /* * Try to allocate from the emergency pool. */ diff -urN vm-ref/mm/page_alloc.c vm/mm/page_alloc.c --- vm-ref/mm/page_alloc.c Tue Oct 16 06:50:28 2001 +++ vm/mm/page_alloc.c Tue Oct 16 06:50:42 2001 @@ -390,8 +390,7 @@ return page; } } - if (!order) - goto rebalance; + goto rebalance; } else { /* * Check that no other task is been killed meanwhile, @@ -537,13 +536,20 @@ zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); zonep = zonelist->zones; - for (zone = *zonep++; zone; zone = *zonep++) - sum += zone->free_pages; + zone = *zonep; + if (zone) { + sum += zone->nr_inactive_pages + zone->nr_active_pages; + do { + sum += zone->free_pages; + zonep++; + zone = *zonep; + } while (zone); + } pgdat = pgdat->node_next; } while (pgdat); - return sum + nr_active_pages + nr_inactive_pages; + return sum; } #if CONFIG_HIGHMEM diff -urN vm-ref/mm/swap.c vm/mm/swap.c --- vm-ref/mm/swap.c Tue Oct 16 06:50:28 2001 +++ vm/mm/swap.c Tue Oct 16 06:50:42 2001 @@ -54,7 +54,6 @@ del_page_from_active_list(page); add_page_to_inactive_list(page); } - ClearPageReferenced(page); } void deactivate_page(struct page * page) @@ -80,7 +79,6 @@ spin_lock(&pagemap_lru_lock); activate_page_nolock(page); spin_unlock(&pagemap_lru_lock); - SetPageReferenced(page); } /** diff -urN vm-ref/mm/vmscan.c vm/mm/vmscan.c --- vm-ref/mm/vmscan.c Tue Oct 16 06:50:28 2001 +++ vm/mm/vmscan.c Tue Oct 16 06:50:42 2001 @@ -50,6 +50,7 @@ /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { flush_tlb_page(vma, address); + mark_page_accessed(page); return 0; } @@ -283,10 +284,10 @@ return count; } -static int FASTCALL(swap_out(zone_t * classzone, unsigned int gfp_mask, int nr_pages)); -static int swap_out(zone_t * classzone, unsigned int gfp_mask, int nr_pages) +static int FASTCALL(swap_out(zone_t * classzone, unsigned int gfp_mask)); +static int swap_out(zone_t * classzone, unsigned int gfp_mask) { - int counter; + int counter, nr_pages = SWAP_CLUSTER_MAX << 2; struct mm_struct *mm; /* Then, look at the other mm's */ @@ -326,10 +327,12 @@ return 0; } -static int FASTCALL(shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask)); -static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask) +/* NOTE: this is not only a shrink but also a probe, see the &faliures param */ +static int FASTCALL(shrink_cache(int nr_pages, int max_scan, int * faliures, zone_t * classzone, unsigned int gfp_mask)); +static int shrink_cache(int nr_pages, int max_scan, int * faliures, zone_t * classzone, unsigned int gfp_mask) { struct list_head * entry; + int __faliures = 0; while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { struct page * page; @@ -353,22 +356,32 @@ if (!memclass(page->zone, classzone)) continue; - ClearPageReferenced(page); - max_scan--; + /* Racy check to avoid trylocking when not worthwhile */ + if (!page->buffers && page_count(page) != 1) { + __faliures++; + continue; + } + /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { + __faliures++; if (gfp_mask & __GFP_FS) { if (test_and_set_bit(PG_wait_for_IO, &page->flags)) { + /* later retry on this page again */ + list_del(entry); + list_add_tail(entry, &inactive_list); + page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); + continue; } } continue; @@ -394,6 +407,7 @@ writepage(page); page_cache_release(page); + __faliures++; spin_lock(&pagemap_lru_lock); continue; } @@ -443,6 +457,7 @@ UnlockPage(page); page_cache_release(page); + __faliures++; spin_lock(&pagemap_lru_lock); continue; } @@ -467,6 +482,7 @@ if (!is_page_cache_freeable(page) || PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); + __faliures++; continue; } @@ -494,6 +510,7 @@ } spin_unlock(&pagemap_lru_lock); + *faliures = __faliures; return nr_pages; } @@ -519,14 +536,14 @@ if (!memclass(page->zone, classzone)) continue; - nr_pages--; - if (PageTestandClearReferenced(page)) { list_del(&page->lru); list_add(&page->lru, &active_list); continue; } + nr_pages--; + del_page_from_active_list(page); add_page_to_inactive_list(page); SetPageReferenced(page); @@ -537,11 +554,11 @@ } } -static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages)); -static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages) + +static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * force_paging)); +static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * force_paging) { - int max_scan; - int chunk_size = nr_pages; + int max_scan, orig_nr_pages = nr_pages, faliures; unsigned long ratio; nr_pages -= kmem_cache_reap(gfp_mask); @@ -549,21 +566,19 @@ return 0; spin_lock(&pagemap_lru_lock); - nr_pages = chunk_size; /* try to keep the active list 2/3 of the size of the cache */ - ratio = (unsigned long) nr_pages * classzone->nr_active_pages / ((classzone->nr_inactive_pages * 2) + 1); - /* allow the active cache to grow */ - if (ratio > nr_pages) - ratio = nr_pages; + ratio = (unsigned long) orig_nr_pages * classzone->nr_active_pages / ((classzone->nr_inactive_pages * 2) + 1); + if (ratio > orig_nr_pages * DEF_PRIORITY) + ratio = orig_nr_pages * DEF_PRIORITY; refill_inactive(ratio, classzone); max_scan = classzone->nr_inactive_pages / DEF_PRIORITY; - nr_pages = shrink_cache(nr_pages, max_scan, classzone, gfp_mask); - if (nr_pages <= 0) - return 0; + nr_pages = shrink_cache(orig_nr_pages, max_scan, &faliures, classzone, gfp_mask); - shrink_dcache_memory(DEF_PRIORITY, gfp_mask); - shrink_icache_memory(DEF_PRIORITY, gfp_mask); + /* Here we find when it's time to do paging */ + *force_paging = 0; + if (faliures > max_scan / DEF_PRIORITY) + *force_paging = 1; return nr_pages; } @@ -575,15 +590,19 @@ int ret = 0; for (;;) { - int tries = DEF_PRIORITY << 1; + int tries = DEF_PRIORITY << 2; int nr_pages = SWAP_CLUSTER_MAX; do { - nr_pages = shrink_caches(classzone, gfp_mask, nr_pages); + int force_paging; + + nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &force_paging); + if (force_paging || nr_pages > 0) + ret |= swap_out(classzone, gfp_mask); if (nr_pages <= 0) return 1; - - ret |= swap_out(classzone, gfp_mask, SWAP_CLUSTER_MAX << 2); + shrink_dcache_memory(DEF_PRIORITY, gfp_mask); + shrink_icache_memory(DEF_PRIORITY, gfp_mask); } while (--tries); if (likely(ret))