[PATCH] vmscan.c scan rate fixes

We've been futzing with the scan rates of the inactive and active lists far too much, and it's still not right (Anton reports interrupt-off times of over a second). - We have this logic in there from 2.4.early (at least) which tries to keep the inactive list 1/3rd the size of the active list. Or something. I really cannot see any logic behind this, so toss it out and change the arithmetic in there so that all pages on both lists have equal scan rates. - Chunk the work up so we never hold interrupts off for more that 32 pages worth of scanning. - Make the per-zone scan-count accumulators unsigned long rather than atomic_t. Mainly because atomic_t's could conceivably overflow, but also because access to these counters is racy-by-design anyway. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Andrew Morton <akpm@osdl.org> 2004-06-23 18:53:40 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-06-23 18:53:40 -0700
commit: 2332dc7870b6f40eff03df88cbb03f4ffddbd086 (patch)
tree: 0ef0bd9ce2c1da83ca56ff7ba32d80b5abec1ce8 /mm
parent: acba6041ff3f5dd7b4c88e012530db7474e3cdb8 (diff)
download: history-2332dc7870b6f40eff03df88cbb03f4ffddbd086.tar.gz
2 files changed, 33 insertions, 41 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 16d5c2af94ee02..b30e78c2a93957 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1482,8 +1482,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
 				zone_names[j], realsize, batch);
 		INIT_LIST_HEAD(&zone->active_list);
 		INIT_LIST_HEAD(&zone->inactive_list);
-		atomic_set(&zone->nr_scan_active, 0);
-		atomic_set(&zone->nr_scan_inactive, 0);
+		zone->nr_scan_active = 0;
+		zone->nr_scan_inactive = 0;
 		zone->nr_active = 0;
 		zone->nr_inactive = 0;
 		if (!size)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5d856f4244bfcc..3fe27c1d23817d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -789,54 +789,46 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
 }
 
 /*
- * Scan `nr_pages' from this zone.  Returns the number of reclaimed pages.
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
 static void
 shrink_zone(struct zone *zone, struct scan_control *sc)
 {
-	unsigned long scan_active, scan_inactive;
-	int count;
-
-	scan_inactive = (zone->nr_active + zone->nr_inactive) >> sc->priority;
+	unsigned long nr_active;
+	unsigned long nr_inactive;
 
 	/*
-	 * Try to keep the active list 2/3 of the size of the cache.  And
-	 * make sure that refill_inactive is given a decent number of pages.
-	 *
-	 * The "scan_active + 1" here is important.  With pagecache-intensive
-	 * workloads the inactive list is huge, and `ratio' evaluates to zero
-	 * all the time.  Which pins the active list memory.  So we add one to
-	 * `scan_active' just to make sure that the kernel will slowly sift
-	 * through the active list.
+	 * Add one to `nr_to_scan' just to make sure that the kernel will
+	 * slowly sift through the active list.
 	 */
-	if (zone->nr_active >= 4*(zone->nr_inactive*2 + 1)) {
-		/* Don't scan more than 4 times the inactive list scan size */
-		scan_active = 4*scan_inactive;
-	} else {
-		unsigned long long tmp;
-
-		/* Cast to long long so the multiply doesn't overflow */
-
-		tmp = (unsigned long long)scan_inactive * zone->nr_active;
-		do_div(tmp, zone->nr_inactive*2 + 1);
-		scan_active = (unsigned long)tmp;
-	}
-
-	atomic_add(scan_active + 1, &zone->nr_scan_active);
-	count = atomic_read(&zone->nr_scan_active);
-	if (count >= SWAP_CLUSTER_MAX) {
-		atomic_set(&zone->nr_scan_active, 0);
-		sc->nr_to_scan = count;
-		refill_inactive_zone(zone, sc);
-	}
+	zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+	nr_active = zone->nr_scan_active;
+	if (nr_active >= SWAP_CLUSTER_MAX)
+		zone->nr_scan_active = 0;
+	else
+		nr_active = 0;
+
+	zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
+	nr_inactive = zone->nr_scan_inactive;
+	if (nr_inactive >= SWAP_CLUSTER_MAX)
+		zone->nr_scan_inactive = 0;
+	else
+		nr_inactive = 0;
+
+	while (nr_active || nr_inactive) {
+		if (nr_active) {
+			sc->nr_to_scan = min(nr_active,
+					(unsigned long)SWAP_CLUSTER_MAX);
+			nr_active -= sc->nr_to_scan;
+			refill_inactive_zone(zone, sc);
+		}
 
-	atomic_add(scan_inactive, &zone->nr_scan_inactive);
-	count = atomic_read(&zone->nr_scan_inactive);
-	if (count >= SWAP_CLUSTER_MAX) {
-		atomic_set(&zone->nr_scan_inactive, 0);
-		sc->nr_to_scan = count;
-		shrink_cache(zone, sc);
+		if (nr_inactive) {
+			sc->nr_to_scan = min(nr_inactive,
+					(unsigned long)SWAP_CLUSTER_MAX);
+			nr_inactive -= sc->nr_to_scan;
+			shrink_cache(zone, sc);
+		}
 	}
 }
author	Andrew Morton <akpm@osdl.org>	2004-06-23 18:53:40 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-06-23 18:53:40 -0700
commit	2332dc7870b6f40eff03df88cbb03f4ffddbd086 (patch)
tree	0ef0bd9ce2c1da83ca56ff7ba32d80b5abec1ce8 /mm
parent	acba6041ff3f5dd7b4c88e012530db7474e3cdb8 (diff)
download	history-2332dc7870b6f40eff03df88cbb03f4ffddbd086.tar.gz