The compound page logic is a little fragile - it relies on additional metadata in the pageframes which some other kernel code likes to stomp on (xfs was doing this). Also, because we're treating all higher-order pages as compound pages it is no longer possible to free individual lower-order pages from the middle of higher-order pages. At least one ARM driver insists on doing this. We only really need the compound page logic for higher-order pages which can be mapped into user pagetables and placed under direct-io. This covers hugetlb pages and, conceivably, soundcard DMA buffers which were allcoated with a higher-order allocation but which weren't marked PageReserved. The patch arranges for the hugetlb implications to allocate their pages with compound page metadata, and all other higher-order allocations go back to the old way. (Andrea supplied the GFP_LEVEL_MASK fix) --- 25-akpm/arch/i386/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/ia64/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/ppc64/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/sh/mm/hugetlbpage.c | 3 ++- 25-akpm/arch/sparc64/mm/hugetlbpage.c | 3 ++- 25-akpm/include/linux/gfp.h | 6 ++++++ 25-akpm/include/linux/mm.h | 4 ++-- 25-akpm/include/linux/slab.h | 4 +--- 25-akpm/mm/page_alloc.c | 22 +++++++++++----------- include/linux/page-flags.h | 0 10 files changed, 30 insertions(+), 21 deletions(-) diff -puN include/linux/page-flags.h~use-compound-pages-for-hugetlb-only include/linux/page-flags.h diff -puN mm/page_alloc.c~use-compound-pages-for-hugetlb-only mm/page_alloc.c --- 25/mm/page_alloc.c~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/mm/page_alloc.c Thu Apr 8 14:43:06 2004 @@ -130,6 +130,9 @@ static void destroy_compound_page(struct int i; int nr_pages = 1 << order; + if (!PageCompound(page)) + return; + if (page[1].index != order) bad_page(__FUNCTION__, page); @@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page * or two. */ -static struct page *buffered_rmqueue(struct zone *zone, int order, int cold) +static struct page * +buffered_rmqueue(struct zone *zone, int order, int gfp_flags) { unsigned long flags; struct page *page = NULL; + int cold = !!(gfp_flags & __GFP_COLD); if (order == 0) { struct per_cpu_pages *pcp; @@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(str BUG_ON(bad_range(zone, page)); mod_page_state_zone(zone, pgalloc, 1 << order); prep_new_page(page, order); - if (order) + if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); } return page; @@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, uns struct reclaim_state reclaim_state; struct task_struct *p = current; int i; - int cold; int alloc_type; int do_retry; might_sleep_if(wait); - cold = 0; - if (gfp_mask & __GFP_COLD) - cold = 1; - zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ if (zones[0] == NULL) /* no zones in the zonelist */ return NULL; @@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } @@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, uns if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } @@ -620,7 +620,7 @@ rebalance: for (i = 0; zones[i] != NULL; i++) { struct zone *z = zones[i]; - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } @@ -648,7 +648,7 @@ rebalance: if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { - page = buffered_rmqueue(z, order, cold); + page = buffered_rmqueue(z, order, gfp_mask); if (page) goto got_pg; } diff -puN arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/i386/mm/hugetlbpage.c --- 25/arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/arch/i386/mm/hugetlbpage.c Thu Apr 8 15:01:26 2004 @@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ia64/mm/hugetlbpage.c --- 25/arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/arch/ia64/mm/hugetlbpage.c Thu Apr 8 15:01:26 2004 @@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ppc64/mm/hugetlbpage.c --- 25/arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/arch/ppc64/mm/hugetlbpage.c Thu Apr 8 15:01:26 2004 @@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_pag static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); if (!page) return NULL; diff -puN arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sh/mm/hugetlbpage.c --- 25/arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/arch/sh/mm/hugetlbpage.c Thu Apr 8 15:01:26 2004 @@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sparc64/mm/hugetlbpage.c --- 25/arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/arch/sparc64/mm/hugetlbpage.c Thu Apr 8 15:01:34 2004 @@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_pag { static int nid = 0; struct page *page; - page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); + page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP, + HUGETLB_PAGE_ORDER); nid = (nid + 1) % numnodes; return page; } diff -puN include/linux/gfp.h~use-compound-pages-for-hugetlb-only include/linux/gfp.h --- 25/include/linux/gfp.h~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/include/linux/gfp.h Thu Apr 8 15:01:40 2004 @@ -32,10 +32,16 @@ #define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */ #define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ #define __GFP_NO_GROW 0x2000 /* Slab internal usage */ +#define __GFP_COMP 0x4000 /* Add compound page metadata */ #define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) +/* if you forget to add the bitmask here kernel will crash, period */ +#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ + __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ + __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP) + #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) diff -puN include/linux/mm.h~use-compound-pages-for-hugetlb-only include/linux/mm.h --- 25/include/linux/mm.h~use-compound-pages-for-hugetlb-only Thu Apr 8 14:28:32 2004 +++ 25-akpm/include/linux/mm.h Thu Apr 8 15:01:35 2004 @@ -247,14 +247,14 @@ static inline int page_count(struct page static inline void get_page(struct page *page) { - if (PageCompound(page)) + if (unlikely(PageCompound(page))) page = (struct page *)page->private; atomic_inc(&page->count); } static inline void put_page(struct page *page) { - if (PageCompound(page)) { + if (unlikely(PageCompound(page))) { page = (struct page *)page->private; if (put_page_testzero(page)) { if (page[1].mapping) { /* destructor? */ diff -puN include/linux/slab.h~use-compound-pages-for-hugetlb-only include/linux/slab.h --- 25/include/linux/slab.h~use-compound-pages-for-hugetlb-only Thu Apr 8 15:01:40 2004 +++ 25-akpm/include/linux/slab.h Thu Apr 8 15:01:40 2004 @@ -25,9 +25,7 @@ typedef struct kmem_cache_s kmem_cache_t #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA -#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ - __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|\ - __GFP_NOFAIL|__GFP_NORETRY) +#define SLAB_LEVEL_MASK GFP_LEVEL_MASK #define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */ _