From: Nikita Danilov The vmscan logic at present will scan the inactive list with increasing priority until a threshold is triggered. At that threshold we start unmapping pages from pagetables. The problem is that each time someone calls into this code, the priority is initially low, so some mapped pages will be refiled event hough we really should be unmapping them now. Nikita's patch adds the `pressure' field to struct zone. it is a decaying average of the zone's memory pressure and allows us to start unmapping pages immediately on entry to page reclaim, based on measurements which were made in earlier reclaim attempts. include/linux/mmzone.h | 12 ++++++++++++ mm/vmscan.c | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff -puN include/linux/mmzone.h~zone-pressure include/linux/mmzone.h --- 25/include/linux/mmzone.h~zone-pressure 2003-07-27 00:13:07.000000000 -0700 +++ 25-akpm/include/linux/mmzone.h 2003-07-27 00:13:07.000000000 -0700 @@ -89,6 +89,18 @@ struct zone { ZONE_PADDING(_pad2_) + /* + * measure of scanning intensity for this zone. It is calculated + * as exponentially decaying average of the scanning priority + * required to free enough pages in this zone + * (zone_adj_pressure()). + * + * 0 --- low pressure + * + * (DEF_PRIORITY << 10) --- high pressure + */ + int pressure; + /* * free areas of different sizes */ diff -puN mm/vmscan.c~zone-pressure mm/vmscan.c --- 25/mm/vmscan.c~zone-pressure 2003-07-27 00:13:07.000000000 -0700 +++ 25-akpm/mm/vmscan.c 2003-07-27 00:13:07.000000000 -0700 @@ -80,6 +80,20 @@ static long total_memory; #endif /* + * exponentially decaying average + */ +static inline int expavg(int avg, int val) +{ + return ((val - avg) >> 1) + avg; +} + +static void zone_adj_pressure(struct zone *zone, int priority) +{ + zone->pressure = expavg(zone->pressure, + (DEF_PRIORITY - priority) << 10); +} + +/* * The list of shrinker callbacks used by to apply pressure to * ageable caches. */ @@ -794,8 +808,10 @@ shrink_caches(struct zone *classzone, in ret += shrink_zone(zone, max_scan, gfp_mask, to_reclaim, &nr_mapped, ps, priority); *total_scanned += max_scan + nr_mapped; - if (ret >= nr_pages) + if (ret >= nr_pages) { + zone_adj_pressure(zone, priority); break; + } } return ret; } @@ -824,6 +840,7 @@ int try_to_free_pages(struct zone *cz, int ret = 0; const int nr_pages = SWAP_CLUSTER_MAX; int nr_reclaimed = 0; + struct zone *zone; struct reclaim_state *reclaim_state = current->reclaim_state; inc_page_state(allocstall); @@ -860,6 +877,8 @@ int try_to_free_pages(struct zone *cz, } if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) out_of_memory(); + for (zone = cz; zone >= cz->zone_pgdat->node_zones; -- zone) + zone_adj_pressure(zone, -1); out: return ret; } @@ -907,8 +926,10 @@ static int balance_pgdat(pg_data_t *pgda to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8); } else { /* Zone balancing */ to_reclaim = zone->pages_high-zone->free_pages; - if (to_reclaim <= 0) + if (to_reclaim <= 0) { + zone_adj_pressure(zone, priority); continue; + } } all_zones_ok = 0; max_scan = zone->nr_inactive >> priority; @@ -933,6 +954,14 @@ static int balance_pgdat(pg_data_t *pgda if (to_free) blk_congestion_wait(WRITE, HZ/10); } + if (priority < 0) { + for (i = 0; i < pgdat->nr_zones; i++) { + struct zone *zone = pgdat->node_zones + i; + + if (zone->free_pages < zone->pages_high) + zone_adj_pressure(zone, -1); + } + } return nr_pages - to_free; } _