This is a cleanup patch. There are quite a lot of places in the kernel which will infinitely retry a memory allocation. Generally, they get it wrong. Some do yield(), the semantics of which have changed over time. Some do schedule(), which can lock up if the caller is SCHED_FIFO/RR. Some do schedule_timeout(), etc. And often it is unnecessary, because the page allocator will do the retry internally anyway. But we cannot rely on that - this behaviour may change (-aa and -rmap kernels do not do this, for instance). So it is good to formalise and to centralise this operation. If an allocation specifies __GFP_REPEAT then the page allocator must infinitely retry the allocation. 25-akpm/include/linux/gfp.h | 15 ++++++++++++++- 25-akpm/include/linux/slab.h | 2 +- 25-akpm/mm/page_alloc.c | 18 +++++++++++++++--- 25-akpm/mm/vmscan.c | 5 ++--- 4 files changed, 32 insertions(+), 8 deletions(-) diff -puN include/linux/gfp.h~gfp_repeat include/linux/gfp.h --- 25/include/linux/gfp.h~gfp_repeat Thu Apr 17 16:02:13 2003 +++ 25-akpm/include/linux/gfp.h Thu Apr 17 16:02:13 2003 @@ -11,13 +11,26 @@ #define __GFP_DMA 0x01 #define __GFP_HIGHMEM 0x02 -/* Action modifiers - doesn't change the zoning */ +/* + * Action modifiers - doesn't change the zoning + * + * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt + * _might_ fail. This depends upon the particular VM implementation. + * + * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. + * + * __GFP_NORETRY: The VM implementation must not retry indefinitely. + */ #define __GFP_WAIT 0x10 /* Can wait and reschedule? */ #define __GFP_HIGH 0x20 /* Should access emergency pools? */ #define __GFP_IO 0x40 /* Can start physical IO? */ #define __GFP_FS 0x80 /* Can call down to low-level FS? */ #define __GFP_COLD 0x100 /* Cache-cold page required */ #define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */ +#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */ +#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */ +#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) diff -puN include/linux/slab.h~gfp_repeat include/linux/slab.h --- 25/include/linux/slab.h~gfp_repeat Thu Apr 17 16:02:13 2003 +++ 25-akpm/include/linux/slab.h Thu Apr 17 16:02:13 2003 @@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA -#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN) +#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|__GFP_NORETRY) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ /* flags to pass to kmem_cache_create(). diff -puN mm/page_alloc.c~gfp_repeat mm/page_alloc.c --- 25/mm/page_alloc.c~gfp_repeat Thu Apr 17 16:02:13 2003 +++ 25-akpm/mm/page_alloc.c Thu Apr 17 16:02:13 2003 @@ -583,6 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns struct page *page; int i; int cold; + int do_retry; if (wait) might_sleep(); @@ -678,10 +679,21 @@ rebalance: } /* - * Don't let big-order allocations loop. Yield for kswapd, try again. + * Don't let big-order allocations loop unless the caller explicitly + * requests that. Wait for some write requests to complete then retry. + * + * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that + * may not be true in other implementations. */ - if (order <= 3) { - yield(); + do_retry = 0; + if (!(gfp_mask & __GFP_NORETRY)) { + if ((order <= 3) && (gfp_mask & __GFP_REPEAT)) + do_retry = 1; + if (gfp_mask & __GFP_NOFAIL) + do_retry = 1; + } + if (do_retry) { + blk_congestion_wait(WRITE, HZ/50); goto rebalance; } diff -puN mm/vmscan.c~gfp_repeat mm/vmscan.c --- 25/mm/vmscan.c~gfp_repeat Thu Apr 17 16:15:50 2003 +++ 25-akpm/mm/vmscan.c Thu Apr 17 16:36:52 2003 @@ -805,8 +805,7 @@ shrink_caches(struct zone *classzone, in * excessive rotation of the inactive list, which is _supposed_ to be an LRU, * yes? */ -int -try_to_free_pages(struct zone *classzone, +int try_to_free_pages(struct zone *classzone, unsigned int gfp_mask, unsigned int order) { int priority; @@ -838,7 +837,7 @@ try_to_free_pages(struct zone *classzone blk_congestion_wait(WRITE, HZ/10); shrink_slab(total_scanned, gfp_mask); } - if (gfp_mask & __GFP_FS) + if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) out_of_memory(); return 0; } _