This is a cleanup patch.

There are quite a lot of places in the kernel which will infinitely retry a
memory allocation.

Generally, they get it wrong.  Some do yield(), the semantics of which have
changed over time.  Some do schedule(), which can lock up if the caller is
SCHED_FIFO/RR.  Some do schedule_timeout(), etc.

And often it is unnecessary, because the page allocator will do the retry
internally anyway.  But we cannot rely on that - this behaviour may change
(-aa and -rmap kernels do not do this, for instance).

So it is good to formalise and to centralise this operation.  If an
allocation specifies __GFP_REPEAT then the page allocator must infinitely
retry the allocation.



 25-akpm/include/linux/gfp.h  |   15 ++++++++++++++-
 25-akpm/include/linux/slab.h |    2 +-
 25-akpm/mm/page_alloc.c      |   18 +++++++++++++++---
 25-akpm/mm/vmscan.c          |    5 ++---
 4 files changed, 32 insertions(+), 8 deletions(-)

diff -puN include/linux/gfp.h~gfp_repeat include/linux/gfp.h
--- 25/include/linux/gfp.h~gfp_repeat	Thu Apr 17 16:02:13 2003
+++ 25-akpm/include/linux/gfp.h	Thu Apr 17 16:02:13 2003
@@ -11,13 +11,26 @@
 #define __GFP_DMA	0x01
 #define __GFP_HIGHMEM	0x02
 
-/* Action modifiers - doesn't change the zoning */
+/*
+ * Action modifiers - doesn't change the zoning
+ *
+ * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
+ * _might_ fail.  This depends upon the particular VM implementation.
+ *
+ * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ */
 #define __GFP_WAIT	0x10	/* Can wait and reschedule? */
 #define __GFP_HIGH	0x20	/* Should access emergency pools? */
 #define __GFP_IO	0x40	/* Can start physical IO? */
 #define __GFP_FS	0x80	/* Can call down to low-level FS? */
 #define __GFP_COLD	0x100	/* Cache-cold page required */
 #define __GFP_NOWARN	0x200	/* Suppress page allocation failure warning */
+#define __GFP_REPEAT	0x400	/* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL	0x800	/* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY	0x1000	/* Do not retry.  Might fail */
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff -puN include/linux/slab.h~gfp_repeat include/linux/slab.h
--- 25/include/linux/slab.h~gfp_repeat	Thu Apr 17 16:02:13 2003
+++ 25-akpm/include/linux/slab.h	Thu Apr 17 16:02:13 2003
@@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t
 #define	SLAB_KERNEL		GFP_KERNEL
 #define	SLAB_DMA		GFP_DMA
 
-#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN)
+#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|__GFP_NORETRY)
 #define	SLAB_NO_GROW		0x00001000UL	/* don't grow a cache */
 
 /* flags to pass to kmem_cache_create().
diff -puN mm/page_alloc.c~gfp_repeat mm/page_alloc.c
--- 25/mm/page_alloc.c~gfp_repeat	Thu Apr 17 16:02:13 2003
+++ 25-akpm/mm/page_alloc.c	Thu Apr 17 16:02:13 2003
@@ -583,6 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 	struct page *page;
 	int i;
 	int cold;
+	int do_retry;
 
 	if (wait)
 		might_sleep();
@@ -678,10 +679,21 @@ rebalance:
 	}
 
 	/*
-	 * Don't let big-order allocations loop.  Yield for kswapd, try again.
+	 * Don't let big-order allocations loop unless the caller explicitly
+	 * requests that.  Wait for some write requests to complete then retry.
+	 *
+	 * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
+	 * may not be true in other implementations.
 	 */
-	if (order <= 3) {
-		yield();
+	do_retry = 0;
+	if (!(gfp_mask & __GFP_NORETRY)) {
+		if ((order <= 3) && (gfp_mask & __GFP_REPEAT))
+			do_retry = 1;
+		if (gfp_mask & __GFP_NOFAIL)
+			do_retry = 1;
+	}
+	if (do_retry) {
+		blk_congestion_wait(WRITE, HZ/50);
 		goto rebalance;
 	}
 
diff -puN mm/vmscan.c~gfp_repeat mm/vmscan.c
--- 25/mm/vmscan.c~gfp_repeat	Thu Apr 17 16:15:50 2003
+++ 25-akpm/mm/vmscan.c	Thu Apr 17 16:36:52 2003
@@ -805,8 +805,7 @@ shrink_caches(struct zone *classzone, in
  * excessive rotation of the inactive list, which is _supposed_ to be an LRU,
  * yes?
  */
-int
-try_to_free_pages(struct zone *classzone,
+int try_to_free_pages(struct zone *classzone,
 		unsigned int gfp_mask, unsigned int order)
 {
 	int priority;
@@ -838,7 +837,7 @@ try_to_free_pages(struct zone *classzone
 		blk_congestion_wait(WRITE, HZ/10);
 		shrink_slab(total_scanned, gfp_mask);
 	}
-	if (gfp_mask & __GFP_FS)
+	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
 		out_of_memory();
 	return 0;
 }

_