This is a cleanup patch. There are quite a lot of places in the kernel which will infinitely retry a memory allocation. Generally, they get it wrong. Some do yield(), the semantics of which have changed over time. Some do schedule(), which can lock up if the caller is SCHED_FIFO/RR. Some do schedule_timeout(), etc. And often it is unnecessary, because the page allocator will do the retry internally anyway. But we cannot rely on that - this behaviour may change (-aa and -rmap kernels do not do this, for instance). So it is good to formalise and to centralise this operation. If an allocation specifies __GFP_REPEAT then the page allocator must infinitely retry the allocation. include/linux/gfp.h | 1 + include/linux/slab.h | 2 +- mm/page_alloc.c | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff -puN include/linux/gfp.h~gfp_repeat include/linux/gfp.h --- 25/include/linux/gfp.h~gfp_repeat 2003-04-10 21:45:42.000000000 -0700 +++ 25-akpm/include/linux/gfp.h 2003-04-10 21:46:09.000000000 -0700 @@ -18,6 +18,7 @@ #define __GFP_FS 0x80 /* Can call down to low-level FS? */ #define __GFP_COLD 0x100 /* Cache-cold page required */ #define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */ +#define __GFP_REPEAT 0x400 /* Retry the allocation */ #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) diff -puN include/linux/slab.h~gfp_repeat include/linux/slab.h --- 25/include/linux/slab.h~gfp_repeat 2003-04-10 21:46:49.000000000 -0700 +++ 25-akpm/include/linux/slab.h 2003-04-10 21:47:08.000000000 -0700 @@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA -#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN) +#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ /* flags to pass to kmem_cache_create(). diff -puN mm/page_alloc.c~gfp_repeat mm/page_alloc.c --- 25/mm/page_alloc.c~gfp_repeat 2003-04-10 21:47:03.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2003-04-10 21:54:44.000000000 -0700 @@ -633,10 +633,11 @@ rebalance: } /* - * Don't let big-order allocations loop. Yield for kswapd, try again. + * Don't let big-order allocations loop unless the caller explicitly + * requests that. Wait for some write requests to complete then retry. */ - if (order <= 3) { - yield(); + if ((order <= 3) || (gfp_mask & __GFP_REPEAT)) { + blk_congestion_wait(WRITE, HZ/50); goto rebalance; } _