diff -urN vm-ref/arch/i386/config.in vm/arch/i386/config.in
--- vm-ref/arch/i386/config.in	Fri Oct 19 05:19:12 2001
+++ vm/arch/i386/config.in	Fri Oct 19 05:19:29 2001
@@ -402,6 +402,7 @@
    bool '  Magic SysRq key' CONFIG_MAGIC_SYSRQ
    bool '  Spinlock debugging' CONFIG_DEBUG_SPINLOCK
    bool '  Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
+   bool '  Debug allocation faliures' CONFIG_DEBUG_GFP
 fi
 
 endmenu
diff -urN vm-ref/fs/buffer.c vm/fs/buffer.c
--- vm-ref/fs/buffer.c	Fri Oct 19 05:19:13 2001
+++ vm/fs/buffer.c	Fri Oct 19 05:19:29 2001
@@ -115,7 +115,7 @@
 		int dummy5;	/* unused */
 	} b_un;
 	unsigned int data[N_PARAM];
-} bdf_prm = {{30, 64, 64, 256, 5*HZ, 30*HZ, 60, 0, 0}};
+} bdf_prm = {{40, 64, 64, 256, 5*HZ, 30*HZ, 60, 0, 0}};
 
 /* These are the min and max parameter values that we will allow to be assigned */
 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   0, 0, 0};
@@ -2396,11 +2396,8 @@
 					ll_rw_block(WRITE, 1, &p);
 					tryagain = 0;
 				} else if (buffer_locked(p)) {
-					if (gfp_mask & __GFP_WAIT) {
-						wait_on_buffer(p);
-						tryagain = 1;
-					} else
-						tryagain = 0;
+					wait_on_buffer(p);
+					tryagain = 1;
 				}
 			} else
 				tryagain = 0;
diff -urN vm-ref/include/linux/mmzone.h vm/include/linux/mmzone.h
--- vm-ref/include/linux/mmzone.h	Mon Oct  8 04:28:58 2001
+++ vm/include/linux/mmzone.h	Fri Oct 19 05:19:29 2001
@@ -41,6 +41,7 @@
 	unsigned long		free_pages;
 	unsigned long		pages_min, pages_low, pages_high;
 	int			need_balance;
+	int			nr_active_pages, nr_inactive_pages;
 
 	/*
 	 * free areas of different sizes
diff -urN vm-ref/include/linux/sched.h vm/include/linux/sched.h
--- vm-ref/include/linux/sched.h	Fri Oct 19 05:19:13 2001
+++ vm/include/linux/sched.h	Fri Oct 19 05:19:29 2001
@@ -280,6 +280,14 @@
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
 
+struct zone_struct;
+
+struct local_pages {
+	struct list_head list;
+	unsigned int order, nr;
+	struct zone_struct * classzone;
+};
+
 struct task_struct {
 	/*
 	 * offsets of these are hardcoded elsewhere - touch with care
@@ -318,8 +326,7 @@
 	struct task_struct *next_task, *prev_task;
 	struct mm_struct *active_mm;
 	struct rw_sem_recursor mm_recursor;
-	struct list_head local_pages;
-	unsigned int allocation_order, nr_local_pages;
+	struct local_pages local_pages;
 
 /* task state */
 	struct linux_binfmt *binfmt;
diff -urN vm-ref/include/linux/swap.h vm/include/linux/swap.h
--- vm-ref/include/linux/swap.h	Fri Oct 19 05:19:13 2001
+++ vm/include/linux/swap.h	Fri Oct 19 05:19:29 2001
@@ -105,15 +105,14 @@
 extern void FASTCALL(lru_cache_del(struct page *));
 
 extern void FASTCALL(deactivate_page(struct page *));
-extern void FASTCALL(deactivate_page_nolock(struct page *));
 extern void FASTCALL(activate_page(struct page *));
-extern void FASTCALL(activate_page_nolock(struct page *));
 
 extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
 extern wait_queue_head_t kswapd_wait;
-extern int FASTCALL(try_to_free_pages(unsigned int, unsigned int));
+extern int FASTCALL(try_to_free_pages(zone_t *, unsigned int, unsigned int));
+extern int vm_scan_ratio, vm_mapped_ratio, vm_balance_ratio;
 
 /* linux/mm/page_io.c */
 extern void rw_swap_page(int, struct page *);
@@ -134,7 +133,6 @@
 extern struct page * read_swap_cache_async(swp_entry_t);
 
 /* linux/mm/oom_kill.c */
-extern int out_of_memory(void);
 extern void oom_kill(void);
 
 /* linux/mm/swapfile.c */
@@ -177,27 +175,91 @@
 		BUG();				\
 } while (0)
 
+#define inc_nr_active_pages(page)				\
+do {								\
+	pg_data_t * __pgdat;					\
+	zone_t * __classzone, * __overflow;			\
+								\
+	__classzone = (page)->zone;				\
+	__pgdat = __classzone->zone_pgdat;			\
+	__overflow = __pgdat->node_zones + __pgdat->nr_zones;	\
+								\
+	while (__classzone < __overflow) {			\
+		__classzone->nr_active_pages++;			\
+		__classzone++;					\
+	}							\
+	nr_active_pages++;					\
+} while (0)
+
+#define dec_nr_active_pages(page)				\
+do {								\
+	pg_data_t * __pgdat;					\
+	zone_t * __classzone, * __overflow;			\
+								\
+	__classzone = (page)->zone;				\
+	__pgdat = __classzone->zone_pgdat;			\
+	__overflow = __pgdat->node_zones + __pgdat->nr_zones;	\
+								\
+	while (__classzone < __overflow) {			\
+		__classzone->nr_active_pages--;			\
+		__classzone++;					\
+	}							\
+	nr_active_pages--;					\
+} while (0)
+
+#define inc_nr_inactive_pages(page)				\
+do {								\
+	pg_data_t * __pgdat;					\
+	zone_t * __classzone, * __overflow;			\
+								\
+	__classzone = (page)->zone;				\
+	__pgdat = __classzone->zone_pgdat;			\
+	__overflow = __pgdat->node_zones + __pgdat->nr_zones;	\
+								\
+	while (__classzone < __overflow) {			\
+		__classzone->nr_inactive_pages++;		\
+		__classzone++;					\
+	}							\
+	nr_inactive_pages++;					\
+} while (0)
+
+#define dec_nr_inactive_pages(page)				\
+do {								\
+	pg_data_t * __pgdat;					\
+	zone_t * __classzone, * __overflow;			\
+								\
+	__classzone = (page)->zone;				\
+	__pgdat = __classzone->zone_pgdat;			\
+	__overflow = __pgdat->node_zones + __pgdat->nr_zones;	\
+								\
+	while (__classzone < __overflow) {			\
+		__classzone->nr_inactive_pages--;		\
+		__classzone++;					\
+	}							\
+	nr_inactive_pages--;					\
+} while (0)
+
 #define add_page_to_active_list(page)		\
 do {						\
 	DEBUG_LRU_PAGE(page);			\
 	SetPageActive(page);			\
 	list_add(&(page)->lru, &active_list);	\
-	nr_active_pages++;			\
+	inc_nr_active_pages(page);		\
 } while (0)
 
 #define add_page_to_inactive_list(page)		\
 do {						\
 	DEBUG_LRU_PAGE(page);			\
-	SetPageInactive(page);		\
+	SetPageInactive(page);			\
 	list_add(&(page)->lru, &inactive_list);	\
-	nr_inactive_pages++;			\
+	inc_nr_inactive_pages(page);		\
 } while (0)
 
 #define del_page_from_active_list(page)		\
 do {						\
 	list_del(&(page)->lru);			\
 	ClearPageActive(page);			\
-	nr_active_pages--;			\
+	dec_nr_active_pages(page);		\
 	DEBUG_LRU_PAGE(page);			\
 } while (0)
 
@@ -205,7 +267,7 @@
 do {						\
 	list_del(&(page)->lru);			\
 	ClearPageInactive(page);		\
-	nr_inactive_pages--;			\
+	dec_nr_inactive_pages(page);		\
 	DEBUG_LRU_PAGE(page);			\
 } while (0)
 
diff -urN vm-ref/include/linux/sysctl.h vm/include/linux/sysctl.h
--- vm-ref/include/linux/sysctl.h	Fri Oct 19 05:19:13 2001
+++ vm/include/linux/sysctl.h	Fri Oct 19 05:19:29 2001
@@ -134,12 +134,13 @@
 	VM_FREEPG=3,		/* struct: Set free page thresholds */
 	VM_BDFLUSH=4,		/* struct: Control buffer cache flushing */
 	VM_OVERCOMMIT_MEMORY=5,	/* Turn off the virtual memory safety limit */
-	VM_BUFFERMEM=6,		/* struct: Set buffer memory thresholds */
-	VM_PAGECACHE=7,		/* struct: Set cache memory thresholds */
 	VM_PAGERDAEMON=8,	/* struct: Control kswapd behaviour */
 	VM_PGT_CACHE=9,		/* struct: Set page table cache parameters */
 	VM_PAGE_CLUSTER=10,	/* int: set number of pages to swap together */
 	VM_HEAP_STACK_GAP=11,	/* int: page gap between heap and stack */
+	VM_SCAN_RATIO=12,	/* part of the inactive list to scan */
+	VM_MAPPED_RATIO=13,	/* detect when it's time to start paging */
+	VM_BALANCE_RATIO=14,	/* balance active and inactive caches */
 };
 
 
diff -urN vm-ref/kernel/fork.c vm/kernel/fork.c
--- vm-ref/kernel/fork.c	Fri Oct 19 05:19:10 2001
+++ vm/kernel/fork.c	Fri Oct 19 05:19:29 2001
@@ -645,7 +645,7 @@
 	p->lock_depth = -1;		/* -1 = no lock */
 	p->start_time = jiffies;
 
-	INIT_LIST_HEAD(&p->local_pages);
+	INIT_LIST_HEAD(&p->local_pages.list);
 
 	retval = -ENOMEM;
 	/* copy all the process information */
diff -urN vm-ref/kernel/sysctl.c vm/kernel/sysctl.c
--- vm-ref/kernel/sysctl.c	Fri Oct 19 05:19:13 2001
+++ vm/kernel/sysctl.c	Fri Oct 19 05:19:29 2001
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/sysrq.h>
 #include <linux/highuid.h>
+#include <linux/swap.h>
 
 #include <asm/uaccess.h>
 
@@ -259,6 +260,12 @@
 };
 
 static ctl_table vm_table[] = {
+	{VM_SCAN_RATIO, "vm_scan_ratio", 
+	 &vm_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+	{VM_MAPPED_RATIO, "vm_mapped_ratio", 
+	 &vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+	{VM_BALANCE_RATIO, "vm_balance_ratio", 
+	 &vm_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
 	{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL,
 	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
 	 &bdflush_min, &bdflush_max},
diff -urN vm-ref/mm/filemap.c vm/mm/filemap.c
--- vm-ref/mm/filemap.c	Fri Oct 19 05:19:13 2001
+++ vm/mm/filemap.c	Fri Oct 19 05:19:29 2001
@@ -2919,8 +2919,15 @@
 		}
 unlock:
 		kunmap(page);
+
+		/*
+		 * Mark the page accessed if we wrote the
+		 * beginning or we just did an lseek.
+		 */
+		if (!offset || !file->f_reada)
+			mark_page_accessed(page);
+
 		/* Mark it unlocked again and drop the page.. */
-		SetPageReferenced(page);
 		UnlockPage(page);
 		page_cache_release(page);
 
diff -urN vm-ref/mm/highmem.c vm/mm/highmem.c
--- vm-ref/mm/highmem.c	Fri Oct 19 05:19:13 2001
+++ vm/mm/highmem.c	Fri Oct 19 05:19:29 2001
@@ -328,7 +328,6 @@
 	struct list_head *tmp;
 	struct page *page;
 
-repeat_alloc:
 	page = alloc_page(GFP_NOHIGHIO);
 	if (page)
 		return page;
@@ -338,6 +337,7 @@
 	 */
 	wakeup_bdflush();
 
+repeat_alloc:
 	/*
 	 * Try to allocate from the emergency pool.
 	 */
@@ -366,7 +366,6 @@
 	struct list_head *tmp;
 	struct buffer_head *bh;
 
-repeat_alloc:
 	bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO);
 	if (bh)
 		return bh;
@@ -376,6 +375,7 @@
 	 */
 	wakeup_bdflush();
 
+repeat_alloc:
 	/*
 	 * Try to allocate from the emergency pool.
 	 */
diff -urN vm-ref/mm/memory.c vm/mm/memory.c
--- vm-ref/mm/memory.c	Fri Oct 19 05:19:13 2001
+++ vm/mm/memory.c	Fri Oct 19 05:19:29 2001
@@ -1105,10 +1105,6 @@
 	return;
 }
 
-/* Swap 80% full? Release the pages as they are paged in.. */
-#define vm_swap_full() \
-	(swapper_space.nrpages*5 > total_swap_pages*4)
-
 /*
  * We hold the mm semaphore and the page_table_lock on entry and exit.
  */
@@ -1164,12 +1160,10 @@
 	swap_free(entry);
 	mark_page_accessed(page);
 	if (exclusive_swap_page(page)) {
-		if (write_access || vm_swap_full()) {
-			pte = pte_mkdirty(pte);
-			if (vma->vm_flags & VM_WRITE)
-				pte = pte_mkwrite(pte);
-			delete_from_swap_cache(page);
-		}
+		if (vma->vm_flags & VM_WRITE)
+			pte = pte_mkwrite(pte);
+		pte = pte_mkdirty(pte);
+		delete_from_swap_cache(page);
 	}
 	UnlockPage(page);
 
diff -urN vm-ref/mm/oom_kill.c vm/mm/oom_kill.c
--- vm-ref/mm/oom_kill.c	Wed Oct 10 02:16:27 2001
+++ vm/mm/oom_kill.c	Fri Oct 19 05:19:29 2001
@@ -192,67 +192,3 @@
 	schedule();
 	return;
 }
-
-static inline int node_zones_low(pg_data_t *pgdat)
-{
-	zone_t * zone;
-	int i;
-
-	for (i = pgdat->nr_zones-1; i >= 0; i--) {
-		zone = pgdat->node_zones + i;
-
-		if (zone->free_pages > (zone->pages_low))
-			return 0;
-
-	}
-	return 1;
-}
-
-static int all_zones_low(void)
-{
-	pg_data_t * pgdat = pgdat_list;
-
-	pgdat = pgdat_list;
-	do {
-		if (node_zones_low(pgdat))
-			continue;
-		return 0;
-	} while ((pgdat = pgdat->node_next));
-
-	return 1;
-}
-
-/**
- * out_of_memory - is the system out of memory?
- *
- * Returns 0 if there is still enough memory left,
- * 1 when we are out of memory (otherwise).
- */
-int out_of_memory(void)
-{
-	long cache_mem, limit;
-
-	/* Enough free memory?  Not OOM. */
-	if (!all_zones_low())
-		return 0;
-
-	/* Enough swap space left?  Not OOM. */
-	if (nr_swap_pages > 0)
-		return 0;
-
-	/*
-	 * If the buffer and page cache (including swap cache) are over
-	 * their (/proc tunable) minimum, we're still not OOM.  We test
-	 * this to make sure we don't return OOM when the system simply
-	 * has a hard time with the cache.
-	 */
-	cache_mem = atomic_read(&page_cache_size);
-	limit = 2;
-	limit *= num_physpages / 100;
-
-	if (cache_mem > limit)
-		return 0;
-
-	/* Else... */
-	return 1;
-}
diff -urN vm-ref/mm/page_alloc.c vm/mm/page_alloc.c
--- vm-ref/mm/page_alloc.c	Fri Oct 19 05:19:13 2001
+++ vm/mm/page_alloc.c	Fri Oct 19 05:19:31 2001
@@ -146,12 +146,13 @@
 	 * local since we must deal with fragmentation too and we
 	 * can't rely on the nr_local_pages information.
 	 */
-	if (current->nr_local_pages && !current->allocation_order)
+	if ((current->local_pages.nr && !current->local_pages.order) ||
+	    !memclass(page->zone, current->local_pages.classzone))
 		goto back_local_freelist;
 
-	list_add(&page->list, &current->local_pages);
+	list_add(&page->list, &current->local_pages.list);
 	page->index = order;
-	current->nr_local_pages++;
+	current->local_pages.nr++;
 }
 
 #define MARK_USED(index, order, area) \
@@ -233,35 +234,36 @@
 static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
 {
 	struct page * page = NULL;
-	int __freed = 0;
+	int __freed;
 
-	if (!(gfp_mask & __GFP_WAIT))
-		goto out;
 	if (in_interrupt())
 		BUG();
 
-	current->allocation_order = order;
+	current->local_pages.order = order;
+	current->local_pages.classzone = classzone;
 	current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
 
-	__freed = try_to_free_pages(gfp_mask, order);
+	__freed = try_to_free_pages(classzone, gfp_mask, order);
 
 	current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
 
-	if (current->nr_local_pages) {
+	if (current->local_pages.nr) {
 		struct list_head * entry, * local_pages;
 		struct page * tmp;
 		int nr_pages;
 
-		local_pages = &current->local_pages;
+		local_pages = &current->local_pages.list;
 
 		if (likely(__freed)) {
 			/* pick from the last inserted so we're lifo */
 			entry = local_pages->next;
 			do {
 				tmp = list_entry(entry, struct page, list);
-				if (tmp->index == order && memclass(tmp->zone, classzone)) {
+				if (!memclass(tmp->zone, classzone))
+					BUG();
+				if (tmp->index == order) {
 					list_del(entry);
-					current->nr_local_pages--;
+					current->local_pages.nr--;
 					set_page_count(tmp, 1);
 					page = tmp;
 
@@ -289,7 +291,7 @@
 			} while ((entry = entry->next) != local_pages);
 		}
 
-		nr_pages = current->nr_local_pages;
+		nr_pages = current->local_pages.nr;
 		/* free in reverse order so that the global order will be lifo */
 		while ((entry = local_pages->prev) != local_pages) {
 			list_del(entry);
@@ -298,9 +300,8 @@
 			if (!nr_pages--)
 				BUG();
 		}
-		current->nr_local_pages = 0;
+		current->local_pages.nr = 0;
 	}
- out:
 	*freed = __freed;
 	return page;
 }
@@ -358,8 +359,7 @@
 
 	/* here we're in the low on memory slow path */
 
-rebalance:
-	if (current->flags & PF_MEMALLOC) {
+	if (current->flags & PF_MEMALLOC && !in_interrupt()) {
 		zone = zonelist->zones;
 		for (;;) {
 			zone_t *z = *(zone++);
@@ -375,34 +375,52 @@
 
 	/* Atomic allocations - we can't balance anything */
 	if (!(gfp_mask & __GFP_WAIT))
-		return NULL;
+		goto out;
 
+ rebalance:
 	page = balance_classzone(classzone, gfp_mask, order, &freed);
 	if (page)
 		return page;
 
 	zone = zonelist->zones;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
+	if (likely(freed)) {
+		for (;;) {
+			zone_t *z = *(zone++);
+			if (!z)
+				break;
 
-		if (zone_free_pages(z, order) > z->pages_min) {
-			page = rmqueue(z, order);
-			if (page)
-				return page;
+			if (zone_free_pages(z, order) > z->pages_min) {
+				page = rmqueue(z, order);
+				if (page)
+					return page;
+			}
 		}
-	}
+		goto rebalance;
+	} else {
+		/* 
+		 * Check that no other task is been killed meanwhile,
+		 * in such a case we can succeed the allocation.
+		 */
+		for (;;) {
+			zone_t *z = *(zone++);
+			if (!z)
+				break;
 
-	/* Don't let big-order allocations loop */
-	if (order > 1)
-		return NULL;
+			if (zone_free_pages(z, order) > z->pages_high) {
+				page = rmqueue(z, order);
+				if (page)
+					return page;
+			}
+		}
+	}
 
-	/* Yield for kswapd, and try again */
-	current->policy |= SCHED_YIELD;
-	__set_current_state(TASK_RUNNING);
-	schedule();
-	goto rebalance;
+ out:
+	printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
+	       order, gfp_mask, !!(current->flags & PF_MEMALLOC));
+#ifdef CONFIG_DEBUG_GFP
+	show_stack(NULL);
+#endif
+	return NULL;
 }
 
 /*
@@ -523,13 +541,20 @@
 		zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
 		zonep = zonelist->zones;
 
-		for (zone = *zonep++; zone; zone = *zonep++)
-			sum += zone->free_pages;
+		zone = *zonep;
+		if (zone) {
+			sum += zone->nr_inactive_pages + zone->nr_active_pages;
+			do {
+				sum += zone->free_pages;
+				zonep++;
+				zone = *zonep;
+			} while (zone);
+		}
 
 		pgdat = pgdat->node_next;
 	} while (pgdat);
 
-	return sum + nr_active_pages + nr_inactive_pages;
+	return sum;
 }
 
 #if CONFIG_HIGHMEM
@@ -597,25 +622,24 @@
 		zone_t *zone;
 		for (zone = tmpdat->node_zones;
 			       	zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
-			printk("Zone:%s freepages:%6lukB min:%6luKB low:%6lukB " 
-				       "high:%6lukB\n", 
-					zone->name,
-					(zone->free_pages)
-					<< ((PAGE_SHIFT-10)),
-					zone->pages_min
-					<< ((PAGE_SHIFT-10)),
-					zone->pages_low
-					<< ((PAGE_SHIFT-10)),
-					zone->pages_high
-					<< ((PAGE_SHIFT-10)));
-			
+			printk("Zone:%s freepages:%6lukB|%lu min:%6luKB|%lu low:%6lukB|%lu high:%6lukB:%lu active:%6dkB|%d inactive:%6dkB|%d\n",
+			       zone->name,
+			       zone->free_pages << (PAGE_SHIFT-10),
+			       zone->free_pages,
+			       zone->pages_min << (PAGE_SHIFT-10),
+			       zone->pages_min,
+			       zone->pages_low << (PAGE_SHIFT-10),
+			       zone->pages_low,
+			       zone->pages_high << (PAGE_SHIFT-10),
+			       zone->pages_high,
+			       zone->nr_active_pages << (PAGE_SHIFT-10),
+			       zone->nr_active_pages,
+			       zone->nr_inactive_pages << (PAGE_SHIFT-10),
+			       zone->nr_inactive_pages);
+
 		tmpdat = tmpdat->node_next;
 	}
 
-	printk("Free pages:      %6dkB (%6dkB HighMem)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		nr_free_highpages() << (PAGE_SHIFT-10));
-
 	printk("( Active: %d, inactive: %d, free: %d )\n",
 	       nr_active_pages,
 	       nr_inactive_pages,
@@ -790,6 +814,7 @@
 		zone->zone_pgdat = pgdat;
 		zone->free_pages = 0;
 		zone->need_balance = 0;
+		zone->nr_active_pages = zone->nr_inactive_pages = 0;
 		if (!size)
 			continue;
 
diff -urN vm-ref/mm/shmem.c vm/mm/shmem.c
--- vm-ref/mm/shmem.c	Fri Oct 19 05:00:08 2001
+++ vm/mm/shmem.c	Fri Oct 19 05:19:29 2001
@@ -557,7 +557,7 @@
 		swap_free(*entry);
 		*entry = (swp_entry_t) {0};
 		delete_from_swap_cache(page);
-		flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1));
+		flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1);
 		page->flags = flags | (1 << PG_dirty);
 		add_to_page_cache_locked(page, mapping, idx);
 		info->swapped--;
diff -urN vm-ref/mm/swap.c vm/mm/swap.c
--- vm-ref/mm/swap.c	Fri Oct 19 05:00:08 2001
+++ vm/mm/swap.c	Fri Oct 19 05:19:29 2001
@@ -48,7 +48,7 @@
  * called on a page which is not on any of the lists, the
  * page is left alone.
  */
-void deactivate_page_nolock(struct page * page)
+static inline void deactivate_page_nolock(struct page * page)
 {
 	if (PageActive(page)) {
 		del_page_from_active_list(page);
@@ -66,7 +66,7 @@
 /*
  * Move an inactive page to the active list.
  */
-void activate_page_nolock(struct page * page)
+static inline void activate_page_nolock(struct page * page)
 {
 	if (PageInactive(page)) {
 		del_page_from_inactive_list(page);
diff -urN vm-ref/mm/vmscan.c vm/mm/vmscan.c
--- vm-ref/mm/vmscan.c	Fri Oct 19 05:00:08 2001
+++ vm/mm/vmscan.c	Fri Oct 19 05:19:29 2001
@@ -26,14 +26,29 @@
 #include <asm/pgalloc.h>
 
 /*
- * The "priority" of VM scanning is how much of the queues we
- * will scan in one go. A value of 6 for DEF_PRIORITY implies
- * that we'll scan 1/64th of the queues ("queue_length >> 6")
- * during a normal aging round.
+ * The "vm_scan_ratio" is how much of the queues we will scan
+ * in one go. A value of 6 for vm_scan_ratio implies that we'll
+ * scan 1/6 of the inactive list during a normal aging round.
  */
-#define DEF_PRIORITY (6)
+int vm_scan_ratio = 16;
 
-#define page_zone_plenty(page) ((page)->zone->free_pages > (page)->zone->pages_high)
+/*
+ * The "vm_mapped_ratio" controls when to start early-paging, we probe
+ * the inactive list during shrink_cache() and if there are too many
+ * mapped unfreeable pages we have an indication that we'd better
+ * start paging. The bigger vm_mapped_ratio is, the eaerlier the
+ * machine will run into swapping activities.
+ */
+int vm_mapped_ratio = 8;
+
+/*
+ * The "vm_balance_ratio" controls the balance between active and
+ * inactive cache. The bigger vm_balance_ratio is, the easier the
+ * active cache will grow, because we'll rotate the active list
+ * slowly. A value of 4 means we'll go towards a balance of
+ * 1/5 of the cache being inactive.
+ */
+int vm_balance_ratio = 32;
 
 /*
  * The swap-out function returns 1 if it successfully
@@ -45,7 +60,7 @@
  */
 
 /* mm->page_table_lock is held. mmap_sem is not held */
-static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
+static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone)
 {
 	pte_t pte;
 	swp_entry_t entry;
@@ -53,11 +68,14 @@
 	/* Don't look at this pte if it's been accessed recently. */
 	if (ptep_test_and_clear_young(page_table)) {
 		flush_tlb_page(vma, address);
+		mark_page_accessed(page);
 		return 0;
 	}
 
-	/* Don't bother replenishing zones that have tons of memory */
-	if (page_zone_plenty(page))
+	if (PageActive(page))
+		return 0;
+
+	if (!memclass(page->zone, classzone))
 		return 0;
 
 	if (TryLockPage(page))
@@ -146,7 +164,7 @@
 }
 
 /* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
+static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
 {
 	pte_t * pte;
 	unsigned long pmd_end;
@@ -170,7 +188,7 @@
 			struct page *page = pte_page(*pte);
 
 			if (VALID_PAGE(page) && !PageReserved(page)) {
-				count -= try_to_swap_out(mm, vma, address, pte, page);
+				count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
 				if (!count) {
 					address += PAGE_SIZE;
 					break;
@@ -185,7 +203,7 @@
 }
 
 /* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
+static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
 {
 	pmd_t * pmd;
 	unsigned long pgd_end;
@@ -205,7 +223,7 @@
 		end = pgd_end;
 	
 	do {
-		count = swap_out_pmd(mm, vma, pmd, address, end, count);
+		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
 		if (!count)
 			break;
 		address = (address + PMD_SIZE) & PMD_MASK;
@@ -215,7 +233,7 @@
 }
 
 /* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
+static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count, zone_t * classzone)
 {
 	pgd_t *pgdir;
 	unsigned long end;
@@ -230,7 +248,7 @@
 	if (address >= end)
 		BUG();
 	do {
-		count = swap_out_pgd(mm, vma, pgdir, address, end, count);
+		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
 		if (!count)
 			break;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
@@ -245,7 +263,7 @@
 /*
  * Returns remaining count of pages to be swapped out by followup call.
  */
-static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter)
+static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone)
 {
 	unsigned long address;
 	struct vm_area_struct* vma;
@@ -267,7 +285,7 @@
 			address = vma->vm_start;
 
 		for (;;) {
-			count = swap_out_vma(mm, vma, address, count);
+			count = swap_out_vma(mm, vma, address, count, classzone);
 			vma = vma->vm_next;
 			if (!vma)
 				break;
@@ -284,14 +302,14 @@
 	return count;
 }
 
-static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, int nr_pages));
-static int swap_out(unsigned int priority, unsigned int gfp_mask, int nr_pages)
+static int FASTCALL(swap_out(zone_t * classzone, unsigned int gfp_mask));
+static int swap_out(zone_t * classzone, unsigned int gfp_mask)
 {
-	int counter;
+	int counter, nr_pages = SWAP_CLUSTER_MAX;
 	struct mm_struct *mm;
 
 	/* Then, look at the other mm's */
-	counter = mmlist_nr / priority;
+	counter = mmlist_nr;
 	do {
 		if (unlikely(current->need_resched)) {
 			__set_current_state(TASK_RUNNING);
@@ -312,7 +330,7 @@
 		atomic_inc(&mm->mm_users);
 		spin_unlock(&mmlist_lock);
 
-		nr_pages = swap_out_mm(mm, nr_pages, &counter);
+		nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone);
 
 		mmput(mm);
 
@@ -327,13 +345,13 @@
 	return 0;
 }
 
-static int FASTCALL(shrink_cache(int nr_pages, int max_scan, unsigned int gfp_mask));
-static int shrink_cache(int nr_pages, int max_scan, unsigned int gfp_mask)
+static int FASTCALL(shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask, int * mapped));
+static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask, int * mapped)
 {
 	struct list_head * entry;
+	int __mapped = 0;
 
-	spin_lock(&pagemap_lru_lock);
-	while (max_scan && (entry = inactive_list.prev) != &inactive_list) {
+	while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) {
 		struct page * page;
 
 		if (unlikely(current->need_resched)) {
@@ -351,16 +369,17 @@
 
 		list_del(entry);
 		list_add(entry, &inactive_list);
-		if (PageTestandClearReferenced(page))
+
+		if (!memclass(page->zone, classzone))
 			continue;
 
 		max_scan--;
-		if (unlikely(page_zone_plenty(page)))
-			continue;
 
 		/* Racy check to avoid trylocking when not worthwhile */
-		if (!page->buffers && page_count(page) != 1)
+		if (!page->buffers && page_count(page) != 1) {
+			__mapped++;
 			continue;
+		}
 
 		/*
 		 * The page is locked. IO in progress?
@@ -457,13 +476,20 @@
 		/*
 		 * this is the non-racy check, it is critical to check
 		 * PageDirty _after_ we made sure the page is freeable
-		 * so not in use by anybody.
+		 * so not in use by anybody. At this point we're
+		 * guaranteed that page->buffers is NULL, nobody
+		 * can refill page->buffers under us because we still
+		 * hold the page lock.
 		 */
-		if (!is_page_cache_freeable(page) || PageDirty(page)) {
+		if (page_count(page) > 1) {
+			__mapped++;
+		page_was_dirty:
 			spin_unlock(&pagecache_lock);
 			UnlockPage(page);
 			continue;
 		}
+		if (unlikely(PageDirty(page)))
+			goto page_was_dirty;
 
 		/* point of no return */
 		if (likely(!PageSwapCache(page))) {
@@ -489,6 +515,7 @@
 	}
 	spin_unlock(&pagemap_lru_lock);
 
+	*mapped = __mapped;
 	return nr_pages;
 }
 
@@ -499,74 +526,93 @@
  * We move them the other way when we see the
  * reference bit on the page.
  */
-static void refill_inactive(int nr_pages)
+static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone));
+static void refill_inactive(int nr_pages, zone_t * classzone)
 {
 	struct list_head * entry;
 
-	spin_lock(&pagemap_lru_lock);
 	entry = active_list.prev;
-	while (nr_pages-- && entry != &active_list) {
+	while (nr_pages && entry != &active_list) {
 		struct page * page;
 
 		page = list_entry(entry, struct page, lru);
 		entry = entry->prev;
+
+		if (!memclass(page->zone, classzone))
+			continue;
+
 		if (PageTestandClearReferenced(page)) {
 			list_del(&page->lru);
 			list_add(&page->lru, &active_list);
 			continue;
 		}
 
+		nr_pages--;
+
 		del_page_from_active_list(page);
 		add_page_to_inactive_list(page);
+		SetPageReferenced(page);
+	}
+	if (entry != &active_list) {
+		list_del(&active_list);
+		list_add(&active_list, entry);
 	}
-	spin_unlock(&pagemap_lru_lock);
 }
 
-static int FASTCALL(shrink_caches(int priority, unsigned int gfp_mask, int nr_pages));
-static int shrink_caches(int priority, unsigned int gfp_mask, int nr_pages)
+static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * force_paging));
+static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * force_paging)
 {
-	int max_scan;
-	int chunk_size = nr_pages;
+	int max_scan, orig_nr_pages = nr_pages, mapped;
 	unsigned long ratio;
 
 	nr_pages -= kmem_cache_reap(gfp_mask);
 	if (nr_pages <= 0)
 		return 0;
 
-	nr_pages = chunk_size;
-	/* try to keep the active list 2/3 of the size of the cache */
-	ratio = (unsigned long) nr_pages * nr_active_pages / ((nr_inactive_pages + 1) * 2);
-	refill_inactive(ratio);
-
-	max_scan = nr_inactive_pages / priority;
-	nr_pages = shrink_cache(nr_pages, max_scan, gfp_mask);
-	if (nr_pages <= 0)
-		return 0;
+	spin_lock(&pagemap_lru_lock);
+	ratio = (unsigned long) orig_nr_pages * classzone->nr_active_pages / (((unsigned long) classzone->nr_inactive_pages * vm_balance_ratio) + 1);
+	refill_inactive(ratio, classzone);
 
-	shrink_dcache_memory(priority, gfp_mask);
-	shrink_icache_memory(priority, gfp_mask);
-#ifdef CONFIG_QUOTA
-	shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
-#endif
+	max_scan = classzone->nr_inactive_pages / vm_scan_ratio;
+	nr_pages = shrink_cache(orig_nr_pages, max_scan, classzone, gfp_mask, &mapped);
+	*force_paging = 0;
+	if ((unsigned long) mapped * vm_mapped_ratio > max_scan)
+		*force_paging = 1;
 
 	return nr_pages;
 }
 
-int try_to_free_pages(unsigned int gfp_mask, unsigned int order)
+static int FASTCALL(check_classzone_need_balance(zone_t * classzone));
+
+int try_to_free_pages(zone_t * classzone, unsigned int gfp_mask, unsigned int order)
 {
 	int ret = 0;
-	int priority = DEF_PRIORITY;
-	int nr_pages = SWAP_CLUSTER_MAX;
 
-	do {
-		nr_pages = shrink_caches(priority, gfp_mask, nr_pages);
-		if (nr_pages <= 0)
-			return 1;
+	for (;;) {
+		int tries = vm_scan_ratio << 2, force_paging;
+		int nr_pages = SWAP_CLUSTER_MAX;
 
-		ret |= swap_out(priority, gfp_mask, SWAP_CLUSTER_MAX << 2);
-	} while (--priority);
+		do {
+			nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &force_paging);
+			if (force_paging || nr_pages > 0)
+				ret |= swap_out(classzone, gfp_mask);
+			if (nr_pages <= 0)
+				return 1;
+
+			shrink_dcache_memory(vm_scan_ratio, gfp_mask);
+			shrink_icache_memory(1, gfp_mask);
+		} while (--tries);
+
+		if (likely(current->pid != 1))
+			break;
+		if (!check_classzone_need_balance(classzone))
+			break;
+		current->policy |= SCHED_YIELD;
+		__set_current_state(TASK_RUNNING);
+		schedule();
+	}
 
-	return ret;
+	return 0;
 }
 
 DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
@@ -595,10 +641,10 @@
 			schedule();
 		if (!zone->need_balance)
 			continue;
-		if (!try_to_free_pages(GFP_KSWAPD, 0)) {
+		if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
 			zone->need_balance = 0;
 			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ);
+			schedule_timeout(HZ*5);
 			continue;
 		}
 		if (check_classzone_need_balance(zone))
@@ -621,9 +667,6 @@
 		do
 			need_more_balance |= kswapd_balance_pgdat(pgdat);
 		while ((pgdat = pgdat->node_next));
-		if (need_more_balance && out_of_memory()) {
-			oom_kill();	
-		}
 	} while (need_more_balance);
 }