From: Nick Piggin Move the watermark checking code into a single function. Extend it to account for the order of the allocation and the number of free pages that could satisfy such a request. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton --- 25-akpm/include/linux/mmzone.h | 2 + 25-akpm/mm/page_alloc.c | 58 +++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 19 deletions(-) diff -puN include/linux/mmzone.h~mm-higher-order-watermarks include/linux/mmzone.h --- 25/include/linux/mmzone.h~mm-higher-order-watermarks Wed Oct 27 15:45:38 2004 +++ 25-akpm/include/linux/mmzone.h Wed Oct 27 15:45:38 2004 @@ -280,6 +280,8 @@ void get_zone_counts(unsigned long *acti unsigned long *free); void build_all_zonelists(void); void wakeup_kswapd(struct zone *zone); +int zone_watermark_ok(struct zone *z, int order, unsigned long mark, + int alloc_type, int can_try_harder, int gfp_high); /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. diff -puN mm/page_alloc.c~mm-higher-order-watermarks mm/page_alloc.c --- 25/mm/page_alloc.c~mm-higher-order-watermarks Wed Oct 27 15:45:38 2004 +++ 25-akpm/mm/page_alloc.c Wed Oct 27 15:45:38 2004 @@ -586,6 +586,37 @@ buffered_rmqueue(struct zone *zone, int } /* + * Return 1 if free pages are above 'mark'. This takes into account the order + * of the allocation. + */ +int zone_watermark_ok(struct zone *z, int order, unsigned long mark, + int alloc_type, int can_try_harder, int gfp_high) +{ + /* free_pages my go negative - that's OK */ + long min = mark, free_pages = z->free_pages - (1 << order) + 1; + int o; + + if (gfp_high) + min -= min / 2; + if (can_try_harder) + min -= min / 4; + + if (free_pages <= min + z->protection[alloc_type]) + return 0; + for (o = 0; o < order; o++) { + /* At the next order, this order's pages become unavailable */ + free_pages -= z->free_area[order].nr_free << o; + + /* Require fewer higher order pages to be free */ + min >>= 1; + + if (free_pages <= min) + return 0; + } + return 1; +} + +/* * This is the 'heart' of the zoned buddy allocator. * * Herein lies the mysterious "incremental min". That's the @@ -606,7 +637,6 @@ __alloc_pages(unsigned int gfp_mask, uns struct zonelist *zonelist) { const int wait = gfp_mask & __GFP_WAIT; - unsigned long min; struct zone **zones, *z; struct page *page; struct reclaim_state reclaim_state; @@ -636,9 +666,9 @@ __alloc_pages(unsigned int gfp_mask, uns /* Go through the zonelist once, looking for a zone with enough free */ for (i = 0; (z = zones[i]) != NULL; i++) { - min = z->pages_low + (1<protection[alloc_type]; - if (z->free_pages < min) + if (!zone_watermark_ok(z, order, z->pages_low, + alloc_type, 0, 0)) continue; page = buffered_rmqueue(z, order, gfp_mask); @@ -654,14 +684,9 @@ __alloc_pages(unsigned int gfp_mask, uns * coming from realtime tasks to go deeper into reserves */ for (i = 0; (z = zones[i]) != NULL; i++) { - min = z->pages_min; - if (gfp_mask & __GFP_HIGH) - min /= 2; - if (can_try_harder) - min -= min / 4; - min += (1<protection[alloc_type]; - - if (z->free_pages < min) + if (!zone_watermark_ok(z, order, z->pages_min, + alloc_type, can_try_harder, + gfp_mask & __GFP_HIGH)) continue; page = buffered_rmqueue(z, order, gfp_mask); @@ -697,14 +722,9 @@ rebalance: /* go through the zonelist yet one more time */ for (i = 0; (z = zones[i]) != NULL; i++) { - min = z->pages_min; - if (gfp_mask & __GFP_HIGH) - min /= 2; - if (can_try_harder) - min -= min / 4; - min += (1<protection[alloc_type]; - - if (z->free_pages < min) + if (!zone_watermark_ok(z, order, z->pages_min, + alloc_type, can_try_harder, + gfp_mask & __GFP_HIGH)) continue; page = buffered_rmqueue(z, order, gfp_mask); _