From: Nick Piggin <piggin@cyberone.com.au>

This patch helps high kbuild loads (swapping) significantly.  It actually
also takes 2-3 seconds off a single threaded and non swapping make bzImage
on a 64MB system, and improves light and medium swapping performance as
well.

* Improve precision in shrink_slab by doing a multiply first.

* Calculate nr_scanned correctly instead of using max_scanned.

* In shrink_cache, loop again if (nr_taken == 0) or
  (nr_freed <= 0 && list_empty(&page_list)) instead of terminating.
  Use max_scan to determine termination.

* In shrink_zone, scan the active list more aggressively at low and
  medium imbalances. This gives improvements to kbuild at no and low
  swapping loads.

* Scan the active list more aggressively at high loads, but cap the
  amount of scanning that can be done. This helps high swapping loads.

* The more aggressive scanning helps by making better use of the inactive
  list to provide reclaim information.

* Calculate max_scan after we have refilled the inactive list.

* In try_to_free_pages, put even pressure on the slab even if we have
  reclaimed enough pages from the LRU.
  

---

 mm/vmscan.c |  137 ++++++++++++++++++++++++++++--------------------------------
 1 files changed, 66 insertions(+), 71 deletions(-)

diff -puN mm/vmscan.c~vm-shrink-zone mm/vmscan.c
--- 25/mm/vmscan.c~vm-shrink-zone	2004-02-04 02:34:24.000000000 -0800
+++ 25-akpm/mm/vmscan.c	2004-02-04 02:34:24.000000000 -0800
@@ -135,7 +135,7 @@ EXPORT_SYMBOL(remove_shrinker);
  *
  * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
  */
-static int shrink_slab(long scanned, unsigned int gfp_mask)
+static int shrink_slab(unsigned long scanned, unsigned int gfp_mask)
 {
 	struct shrinker *shrinker;
 	long pages;
@@ -147,7 +147,7 @@ static int shrink_slab(long scanned, uns
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 
-		delta = 4 * (scanned / shrinker->seeks);
+		delta = 4 * scanned / shrinker->seeks;
 		delta *= (*shrinker->shrinker)(0, gfp_mask);
 		do_div(delta, pages + 1);
 		shrinker->nr += delta;
@@ -243,8 +243,7 @@ static void handle_write_error(struct ad
  * shrink_list returns the number of reclaimed pages
  */
 static int
-shrink_list(struct list_head *page_list, unsigned int gfp_mask,
-		int *max_scan, int *nr_mapped)
+shrink_list(struct list_head *page_list, unsigned int gfp_mask, int *nr_mapped)
 {
 	struct address_space *mapping;
 	LIST_HEAD(ret_pages);
@@ -479,13 +478,15 @@ keep:
  */
 static int
 shrink_cache(const int nr_pages, struct zone *zone,
-		unsigned int gfp_mask, int max_scan, int *nr_mapped)
+		unsigned int gfp_mask, int max_scan, int *nr_scanned)
 {
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
 	int nr_to_process;
 	int ret = 0;
 
+	*nr_scanned = 0;
+
 	/*
 	 * Try to ensure that we free `nr_pages' pages in one pass of the loop.
 	 */
@@ -496,8 +497,9 @@ shrink_cache(const int nr_pages, struct 
 	pagevec_init(&pvec, 1);
 
 	lru_add_drain();
+again:
 	spin_lock_irq(&zone->lru_lock);
-	while (max_scan > 0 && ret < nr_pages) {
+	while (*nr_scanned < max_scan && ret < nr_pages) {
 		struct page *page;
 		int nr_taken = 0;
 		int nr_scan = 0;
@@ -527,18 +529,19 @@ shrink_cache(const int nr_pages, struct 
 		zone->pages_scanned += nr_taken;
 		spin_unlock_irq(&zone->lru_lock);
 
+		*nr_scanned += nr_scan;
 		if (nr_taken == 0)
-			goto done;
+			goto again;
 
-		max_scan -= nr_scan;
 		mod_page_state(pgscan, nr_scan);
-		nr_freed = shrink_list(&page_list, gfp_mask,
-					&max_scan, nr_mapped);
+		nr_freed = shrink_list(&page_list, gfp_mask, nr_scanned);
 		ret += nr_freed;
+
 		if (nr_freed <= 0 && list_empty(&page_list))
-			goto done;
+			goto again;
 
 		spin_lock_irq(&zone->lru_lock);
+
 		/*
 		 * Put back any unfreeable pages.
 		 */
@@ -559,7 +562,6 @@ shrink_cache(const int nr_pages, struct 
 		}
   	}
 	spin_unlock_irq(&zone->lru_lock);
-done:
 	pagevec_release(&pvec);
 	return ret;
 }
@@ -568,9 +570,8 @@ done:
 /* move pages from @page_list to the @spot, that should be somewhere on the
  * @zone->active_list */
 static int
-spill_on_spot(struct zone *zone,
-	      struct list_head *page_list, struct list_head *spot,
-	      struct pagevec *pvec)
+spill_on_spot(struct zone *zone, struct list_head *page_list,
+		struct list_head *spot, struct pagevec *pvec)
 {
 	struct page *page;
 	int          moved;
@@ -791,41 +792,47 @@ refill_inactive_zone(struct zone *zone, 
  * direct reclaim.
  */
 static int
-shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
-	const int nr_pages, int *nr_mapped, struct page_state *ps)
+shrink_zone(struct zone *zone, unsigned int gfp_mask,
+	int nr_pages, int *nr_scanned, struct page_state *ps, int priority)
 {
-	unsigned long ratio;
+	unsigned long imbalance;
+	unsigned long nr_refill_inact;
+	unsigned long max_scan;
 
 	/*
 	 * Try to keep the active list 2/3 of the size of the cache.  And
 	 * make sure that refill_inactive is given a decent number of pages.
 	 *
-	 * The "ratio+1" here is important.  With pagecache-intensive workloads
-	 * the inactive list is huge, and `ratio' evaluates to zero all the
-	 * time.  Which pins the active list memory.  So we add one to `ratio'
-	 * just to make sure that the kernel will slowly sift through the
-	 * active list.
+	 * Keeping imbalance > 0 is important.  With pagecache-intensive loads
+	 * the inactive list is huge, and imbalance evaluates to zero all the
+	 * time which would pin the active list memory.
 	 */
-	ratio = (unsigned long)nr_pages * zone->nr_active /
-				((zone->nr_inactive | 1) * 2);
-	atomic_add(ratio+1, &zone->refill_counter);
-	if (atomic_read(&zone->refill_counter) > SWAP_CLUSTER_MAX) {
-		int count;
-
-		/*
-		 * Don't try to bring down too many pages in one attempt.
-		 * If this fails, the caller will increase `priority' and
-		 * we'll try again, with an increased chance of reclaiming
-		 * mapped memory.
-		 */
-		count = atomic_read(&zone->refill_counter);
-		if (count > SWAP_CLUSTER_MAX * 4)
-			count = SWAP_CLUSTER_MAX * 4;
-		atomic_set(&zone->refill_counter, 0);
-		refill_inactive_zone(zone, count, ps);
+	if (zone->nr_active >= zone->nr_inactive*4)
+		/* ratio will be >= 2 */
+		imbalance = 8*nr_pages;
+	else if (zone->nr_active >= zone->nr_inactive*2)
+		/* 1 < ratio < 2 */
+		imbalance = 4*nr_pages*zone->nr_active / (zone->nr_inactive*2);
+	else
+		imbalance = nr_pages / 2;
+
+	imbalance++;
+
+	nr_refill_inact = atomic_read(&zone->refill_counter) + imbalance;
+	if (nr_refill_inact > SWAP_CLUSTER_MAX) {
+		refill_inactive_zone(zone, nr_refill_inact, ps);
+		nr_refill_inact = 0;
 	}
-	return shrink_cache(nr_pages, zone, gfp_mask,
-				max_scan, nr_mapped);
+	atomic_set(&zone->refill_counter, nr_refill_inact);
+
+	/*
+	 * Now pull pages from the inactive list
+	 */
+	max_scan = zone->nr_inactive >> priority;
+	if (max_scan < nr_pages * 2)
+		max_scan = nr_pages * 2;
+
+	return shrink_cache(nr_pages, zone, gfp_mask, max_scan, nr_scanned);
 }
 
 /*
@@ -854,8 +861,7 @@ shrink_caches(struct zone **zones, int p
 	for (i = 0; zones[i] != NULL; i++) {
 		int to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX);
 		struct zone *zone = zones[i];
-		int nr_mapped = 0;
-		int max_scan;
+		int nr_scanned;
 
 		if (zone->free_pages < zone->pages_high)
 			zone->temp_priority = priority;
@@ -863,16 +869,9 @@ shrink_caches(struct zone **zones, int p
 		if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 			continue;	/* Let kswapd poll it */
 
-		/*
-		 * If we cannot reclaim `nr_pages' pages by scanning twice
-		 * that many pages then fall back to the next zone.
-		 */
-		max_scan = zone->nr_inactive >> priority;
-		if (max_scan < to_reclaim * 2)
-			max_scan = to_reclaim * 2;
-		ret += shrink_zone(zone, max_scan, gfp_mask,
-				to_reclaim, &nr_mapped, ps);
-		*total_scanned += max_scan + nr_mapped;
+		ret += shrink_zone(zone, gfp_mask,
+				to_reclaim, &nr_scanned, ps, priority);
+		*total_scanned += nr_scanned;
 		if (ret >= nr_pages)
 			break;
 	}
@@ -918,6 +917,15 @@ int try_to_free_pages(struct zone **zone
 		get_page_state(&ps);
 		nr_reclaimed += shrink_caches(zones, priority, &total_scanned,
 						gfp_mask, nr_pages, &ps);
+
+		if (zones[0] - zones[0]->zone_pgdat->node_zones < ZONE_HIGHMEM) {
+			shrink_slab(total_scanned, gfp_mask);
+			if (reclaim_state) {
+				nr_reclaimed += reclaim_state->reclaimed_slab;
+				reclaim_state->reclaimed_slab = 0;
+			}
+		}
+
 		if (nr_reclaimed >= nr_pages) {
 			ret = 1;
 			goto out;
@@ -933,13 +941,6 @@ int try_to_free_pages(struct zone **zone
 
 		/* Take a nap, wait for some writeback to complete */
 		blk_congestion_wait(WRITE, HZ/10);
-		if (zones[0] - zones[0]->zone_pgdat->node_zones < ZONE_HIGHMEM) {
-			shrink_slab(total_scanned, gfp_mask);
-			if (reclaim_state) {
-				nr_reclaimed += reclaim_state->reclaimed_slab;
-				reclaim_state->reclaimed_slab = 0;
-			}
-		}
 	}
 	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
 		out_of_memory();
@@ -987,8 +988,7 @@ static int balance_pgdat(pg_data_t *pgda
 
 		for (i = 0; i < pgdat->nr_zones; i++) {
 			struct zone *zone = pgdat->node_zones + i;
-			int nr_mapped = 0;
-			int max_scan;
+			int nr_scanned;
 			int to_reclaim;
 			int reclaimed;
 
@@ -1003,16 +1003,11 @@ static int balance_pgdat(pg_data_t *pgda
 					continue;
 			}
 			zone->temp_priority = priority;
-			max_scan = zone->nr_inactive >> priority;
-			if (max_scan < to_reclaim * 2)
-				max_scan = to_reclaim * 2;
-			if (max_scan < SWAP_CLUSTER_MAX)
-				max_scan = SWAP_CLUSTER_MAX;
-			reclaimed = shrink_zone(zone, max_scan, GFP_KERNEL,
-					to_reclaim, &nr_mapped, ps);
+			reclaimed = shrink_zone(zone, GFP_KERNEL,
+					to_reclaim, &nr_scanned, ps, priority);
 			if (i < ZONE_HIGHMEM) {
 				reclaim_state->reclaimed_slab = 0;
-				shrink_slab(max_scan + nr_mapped, GFP_KERNEL);
+				shrink_slab(nr_scanned, GFP_KERNEL);
 				reclaimed += reclaim_state->reclaimed_slab;
 			}
 			to_free -= reclaimed;

_