The compound page logic is a little fragile - it relies on additional metadata in the pageframes which some other kernel code likes to stomp on (xfs was doing this). Also, because we're treating all higher-order pages as compound pages it is no longer possible to free individual lower-order pages from the middle of higher-order pages. At least one ARM driver insists on doing this. We only really need the compound page logic for higher-order pages which can be mapped into user pagetables and placed under direct-io. This covers hugetlb pages and, conceivably, soundcard DMA buffers which were allcoated with a higher-order allocation but which weren't marked PageReserved. The patch arranges for the hugetlb implications to allocate their pages with compound page metadata, and all other higher-order allocations go back to the old way. --- 25-akpm/arch/i386/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/ia64/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/ppc64/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/sh/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/sparc64/mm/hugetlbpage.c | 3 ++- 25-akpm/include/linux/gfp.h | 1 + 25-akpm/include/linux/mm.h | 4 ++-- 25-akpm/mm/page_alloc.c | 22 +++++++++++----------- include/linux/page-flags.h | 0 9 files changed, 24 insertions(+), 18 deletions(-) diff -puN include/linux/page-flags.h~use-compound-pages-for-hugetlb-only include/linux/page-flags.h diff -puN mm/page_alloc.c~use-compound-pages-for-hugetlb-only mm/page_alloc.c --- 25/mm/page_alloc.c~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:04.860918880 -0700 +++ 25-akpm/mm/page_alloc.c 2004-04-06 18:46:55.820807640 -0700 @@ -130,6 +130,9 @@ static void destroy_compound_page(struct int i; int nr_pages = 1 << order; + if (!PageCompound(page)) + return; + if (page[1].index != order) bad_page(__FUNCTION__, page); @@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page * or two. */ -static struct page *buffered_rmqueue(struct zone *zone, int order, int cold) +static struct page * +buffered_rmqueue(struct zone *zone, int order, int gfp_flags) { unsigned long flags; struct page *page = NULL; + int cold = !!(gfp_flags & __GFP_COLD); if (order == 0) { struct per_cpu_pages *pcp; @@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(str BUG_ON(bad_range(zone, page)); mod_page_state_zone(zone, pgalloc, 1 << order); prep_new_page(page, order); - if (order) + if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); } return page; @@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, uns struct reclaim_state reclaim_state; struct task_struct *p = current; int i; - int cold; int alloc_type; int do_retry; might_sleep_if(wait); - cold = 0; - if (gfp_mask & __GFP_COLD) - cold = 1; - zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ if (zones[0] == NULL) /* no zones in the zonelist */ return NULL; @@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } @@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, uns if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } @@ -620,7 +620,7 @@ rebalance: for (i = 0; zones[i] != NULL; i++) { struct zone *z = zones[i]; - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } @@ -648,7 +648,7 @@ rebalance: if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } diff -puN arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/i386/mm/hugetlbpage.c --- 25/arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:04.877916296 -0700 +++ 25-akpm/arch/i386/mm/hugetlbpage.c 2004-04-06 18:47:19.432218160 -0700 @@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ia64/mm/hugetlbpage.c --- 25/arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:04.894913712 -0700 +++ 25-akpm/arch/ia64/mm/hugetlbpage.c 2004-04-06 18:47:29.318715184 -0700 @@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ppc64/mm/hugetlbpage.c --- 25/arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:04.911911128 -0700 +++ 25-akpm/arch/ppc64/mm/hugetlbpage.c 2004-04-06 18:47:39.397183024 -0700 @@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_pag static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); if (!page) return NULL; diff -puN arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sh/mm/hugetlbpage.c --- 25/arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:04.928908544 -0700 +++ 25-akpm/arch/sh/mm/hugetlbpage.c 2004-04-06 18:47:47.763911088 -0700 @@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sparc64/mm/hugetlbpage.c --- 25/arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:04.945905960 -0700 +++ 25-akpm/arch/sparc64/mm/hugetlbpage.c 2004-04-06 18:47:57.778388656 -0700 @@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN include/linux/gfp.h~use-compound-pages-for-hugetlb-only include/linux/gfp.h --- 25/include/linux/gfp.h~use-compound-pages-for-hugetlb-only 2004-04-06 18:43:13.707573984 -0700 +++ 25-akpm/include/linux/gfp.h 2004-04-06 18:48:51.463227320 -0700 @@ -32,6 +32,7 @@ #define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */ #define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ #define __GFP_NO_GROW 0x2000 /* Slab internal usage */ +#define __GFP_COMP 0x4000 /* Add compound page metadata */ #define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) diff -puN include/linux/mm.h~use-compound-pages-for-hugetlb-only include/linux/mm.h --- 25/include/linux/mm.h~use-compound-pages-for-hugetlb-only 2004-04-06 18:49:06.323968144 -0700 +++ 25-akpm/include/linux/mm.h 2004-04-06 18:49:36.783337616 -0700 @@ -247,14 +247,14 @@ static inline int page_count(struct page static inline void get_page(struct page *page) { - if (PageCompound(page)) + if (unlikely(PageCompound(page))) page = (struct page *)page->private; atomic_inc(&page->count); } static inline void put_page(struct page *page) { - if (PageCompound(page)) { + if (unlikely(PageCompound(page))) { page = (struct page *)page->private; if (put_page_testzero(page)) { if (page[1].mapping) { /* destructor? */ _