From: Nick Piggin Mempools have 2 problems. The first is that mempool_alloc can possibly get stuck in __alloc_pages when they should opt to fail, and take an element from their reserved pool. The second is that it will happily eat emergency PF_MEMALLOC reserves instead of going to their reserved pools. Fix the first by passing __GFP_NORETRY in the allocation calls in mempool_alloc. Fix the second by introducing a __GFP_MEMPOOL flag which directs the page allocator not to allocate from the reserve pool. Signed-off-by: Andrew Morton --- include/linux/gfp.h | 6 ++++-- mm/mempool.c | 9 +++++++-- mm/page_alloc.c | 20 ++++++++++++-------- 3 files changed, 23 insertions(+), 12 deletions(-) diff -puN include/linux/gfp.h~mempool-nomemalloc-and-noretry include/linux/gfp.h --- 25/include/linux/gfp.h~mempool-nomemalloc-and-noretry 2005-04-26 05:14:01.000000000 -0700 +++ 25-akpm/include/linux/gfp.h 2005-04-26 05:14:01.000000000 -0700 @@ -38,14 +38,16 @@ struct vm_area_struct; #define __GFP_NO_GROW 0x2000u /* Slab internal usage */ #define __GFP_COMP 0x4000u /* Add compound page metadata */ #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ +#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ -#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) /* if you forget to add the bitmask here kernel will crash, period */ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ - __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP) + __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ + __GFP_NOMEMALLOC) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) diff -puN mm/mempool.c~mempool-nomemalloc-and-noretry mm/mempool.c --- 25/mm/mempool.c~mempool-nomemalloc-and-noretry 2005-04-26 05:14:01.000000000 -0700 +++ 25-akpm/mm/mempool.c 2005-04-26 05:14:01.000000000 -0700 @@ -198,11 +198,16 @@ void * mempool_alloc(mempool_t *pool, un void *element; unsigned long flags; DEFINE_WAIT(wait); - int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); + int gfp_nowait; + + gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ + gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ + gfp_mask |= __GFP_NOWARN; /* failures are OK */ + gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); might_sleep_if(gfp_mask & __GFP_WAIT); repeat_alloc: - element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data); + element = pool->alloc(gfp_nowait, pool->pool_data); if (likely(element != NULL)) return element; diff -puN mm/page_alloc.c~mempool-nomemalloc-and-noretry mm/page_alloc.c --- 25/mm/page_alloc.c~mempool-nomemalloc-and-noretry 2005-04-26 05:14:01.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2005-04-26 05:14:01.000000000 -0700 @@ -799,14 +799,18 @@ __alloc_pages(unsigned int __nocast gfp_ } /* This allocation should allow future memory freeing. */ - if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { - /* go through the zonelist yet again, ignoring mins */ - for (i = 0; (z = zones[i]) != NULL; i++) { - if (!cpuset_zone_allowed(z)) - continue; - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; + + if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) + && !in_interrupt()) { + if (!(gfp_mask & __GFP_NOMEMALLOC)) { + /* go through the zonelist yet again, ignoring mins */ + for (i = 0; (z = zones[i]) != NULL; i++) { + if (!cpuset_zone_allowed(z)) + continue; + page = buffered_rmqueue(z, order, gfp_mask); + if (page) + goto got_pg; + } } goto nopage; } _