diff -urN 2.2.17pre9/include/linux/sched.h 2.2.17pre9-GFP-race/include/linux/sched.h --- 2.2.17pre9/include/linux/sched.h Wed Jun 28 17:13:15 2000 +++ 2.2.17pre9-GFP-race/include/linux/sched.h Fri Jun 30 04:03:09 2000 @@ -316,6 +316,7 @@ struct files_struct *files; /* memory management info */ struct mm_struct *mm; + struct list_head local_pages; int allocation_order, nr_local_pages; /* signal handlers */ spinlock_t sigmask_lock; /* Protects signal and blocked */ @@ -395,7 +396,7 @@ /* tss */ INIT_TSS, \ /* fs */ &init_fs, \ /* files */ &init_files, \ -/* mm */ &init_mm, \ +/* mm */ &init_mm, { &init_task.local_pages, &init_task.local_pages}, 0, 0, \ /* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \ /* exec cts */ 0,0, \ /* oom */ 0, \ diff -urN 2.2.17pre9/kernel/fork.c 2.2.17pre9-GFP-race/kernel/fork.c --- 2.2.17pre9/kernel/fork.c Mon Jan 17 16:44:50 2000 +++ 2.2.17pre9-GFP-race/kernel/fork.c Fri Jun 30 03:43:53 2000 @@ -665,6 +665,8 @@ p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; + INIT_LIST_HEAD(&p->local_pages); + retval = -ENOMEM; /* copy all the process information */ if (copy_files(clone_flags, p)) diff -urN 2.2.17pre9/mm/page_alloc.c 2.2.17pre9-GFP-race/mm/page_alloc.c --- 2.2.17pre9/mm/page_alloc.c Wed Jun 28 17:13:15 2000 +++ 2.2.17pre9-GFP-race/mm/page_alloc.c Fri Jun 30 16:52:40 2000 @@ -93,34 +93,68 @@ */ spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED; -static inline void free_pages_ok(unsigned long map_nr, unsigned long order, unsigned type) -{ +#define list(x) (mem_map+(x)) +#define __free_pages_ok(map_nr, mask, area, index) \ + nr_free_pages -= (mask); \ + while ((mask) + (1 << (NR_MEM_LISTS-1))) { \ + if (!test_and_change_bit((index), (area)->map)) \ + break; \ + (area)->count--; \ + remove_mem_queue(list((map_nr) ^ -(mask))); \ + (mask) <<= 1; \ + (area)++; \ + (index) >>= 1; \ + (map_nr) &= (mask); \ + } \ + add_mem_queue(area, list(map_nr)); + +static void free_local_pages(struct page * page) { + unsigned long order = page->offset; + unsigned int type = PageDMA(page) ? 1 : 0; struct free_area_struct *area = free_area[type] + order; - unsigned long index = map_nr >> (1 + order); + unsigned long map_nr = page - mem_map; unsigned long mask = (~0UL) << order; - unsigned long flags; + unsigned long index = map_nr >> (1 + order); - spin_lock_irqsave(&page_alloc_lock, flags); + __free_pages_ok(map_nr, mask, area, index); +} -#define list(x) (mem_map+(x)) +static inline void free_pages_ok(unsigned long map_nr, unsigned long order, unsigned type) +{ + struct free_area_struct *area; + unsigned long index; + unsigned long mask; + unsigned long flags; + struct page * page; + if (current->flags & PF_MEMALLOC) + goto local_freelist; + back_local_freelist: + + area = free_area[type] + order; + index = map_nr >> (1 + order); + mask = (~0UL) << order; map_nr &= mask; - nr_free_pages -= mask; - while (mask + (1 << (NR_MEM_LISTS-1))) { - if (!test_and_change_bit(index, area->map)) - break; - area->count--; - remove_mem_queue(list(map_nr ^ -mask)); - mask <<= 1; - area++; - index >>= 1; - map_nr &= mask; - } - add_mem_queue(area, list(map_nr)); - -#undef list + spin_lock_irqsave(&page_alloc_lock, flags); + __free_pages_ok(map_nr, mask, area, index); spin_unlock_irqrestore(&page_alloc_lock, flags); + return; + + local_freelist: + /* + * This is a little subtle: if the allocation order + * wanted is major than zero we'd better take all the pages + * local since we must deal with fragmentation too and we + * can't rely on the nr_local_pages information. + */ + if (!current->allocation_order && current->nr_local_pages) + goto back_local_freelist; + + page = mem_map + map_nr; + list_add((struct list_head *) page, ¤t->local_pages); + page->offset = order; + current->nr_local_pages++; } void __free_pages(struct page *page, unsigned long order) @@ -179,13 +213,32 @@ atomic_set(&map->count, 1); \ } while (0) +static void refile_local_pages(void) +{ + if (current->nr_local_pages) { + struct page * page; + struct list_head * entry; + int nr_pages = current->nr_local_pages; + + while ((entry = current->local_pages.next) != ¤t->local_pages) { + list_del(entry); + page = (struct page *) entry; + free_local_pages(page); + if (!nr_pages--) + panic("__get_free_pages local_pages list corrupted I"); + } + if (nr_pages) + panic("__get_free_pages local_pages list corrupted II"); + current->nr_local_pages = 0; + } +} + unsigned long __get_free_pages(int gfp_mask, unsigned long order) { unsigned long flags; - static atomic_t free_before_allocate = ATOMIC_INIT(0); if (order >= NR_MEM_LISTS) - goto nopage; + goto out; #ifdef ATOMIC_MEMORY_DEBUGGING if ((gfp_mask & __GFP_WAIT) && in_interrupt()) { @@ -194,26 +247,24 @@ printk("gfp called nonatomically from interrupt %p\n", __builtin_return_address(0)); } - goto nopage; + goto out; } #endif /* + * Acquire lock before reading nr_free_pages to make sure it + * won't change from under us. + */ + spin_lock_irqsave(&page_alloc_lock, flags); + + /* * If this is a recursive call, we'd better * do our best to just allocate things without * further thought. */ if (!(current->flags & PF_MEMALLOC)) { - int freed; extern struct wait_queue * kswapd_wait; - /* Somebody needs to free pages so we free some of our own. */ - if (atomic_read(&free_before_allocate)) { - current->flags |= PF_MEMALLOC; - try_to_free_pages(gfp_mask); - current->flags &= ~PF_MEMALLOC; - } - if (nr_free_pages > freepages.low) goto ok_to_allocate; @@ -224,34 +275,44 @@ if (nr_free_pages > freepages.min) goto ok_to_allocate; - current->flags |= PF_MEMALLOC; - atomic_inc(&free_before_allocate); - freed = try_to_free_pages(gfp_mask); - atomic_dec(&free_before_allocate); - current->flags &= ~PF_MEMALLOC; - - /* - * Re-check we're still low on memory after we blocked - * for some time. Somebody may have released lots of - * memory from under us while we was trying to free - * the pages. We check against pages_high to be sure - * to succeed only if lots of memory is been released. - */ - if (nr_free_pages > freepages.high) - goto ok_to_allocate; + if (gfp_mask & __GFP_WAIT) { + int freed; + /* + * If the task is ok to sleep it's fine also + * if we release irq here. + */ + spin_unlock_irq(&page_alloc_lock); + + current->flags |= PF_MEMALLOC; + current->allocation_order = order; + freed = try_to_free_pages(gfp_mask); + current->flags &= ~PF_MEMALLOC; - if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH))) - goto nopage; + spin_lock_irq(&page_alloc_lock); + refile_local_pages(); + + /* + * Re-check we're still low on memory after we blocked + * for some time. Somebody may have released lots of + * memory from under us while we was trying to free + * the pages. We check against pages_high to be sure + * to succeed only if lots of memory is been released. + */ + if (nr_free_pages > freepages.high) + goto ok_to_allocate; + + if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH))) + goto nopage; + } } ok_to_allocate: - spin_lock_irqsave(&page_alloc_lock, flags); /* if it's not a dma request, try non-dma first */ if (!(gfp_mask & __GFP_DMA)) RMQUEUE_TYPE(order, 0); RMQUEUE_TYPE(order, 1); + nopage: spin_unlock_irqrestore(&page_alloc_lock, flags); - -nopage: + out: return 0; } diff -urN 2.2.17pre9/mm/vmscan.c 2.2.17pre9-GFP-race/mm/vmscan.c --- 2.2.17pre9/mm/vmscan.c Wed Jun 28 17:13:15 2000 +++ 2.2.17pre9-GFP-race/mm/vmscan.c Fri Jun 30 04:07:43 2000 @@ -377,7 +377,7 @@ * cluster them so that we get good swap-out behaviour. See * the "free_memory()" macro for details. */ -static int do_try_to_free_pages(unsigned int gfp_mask) +int try_to_free_pages(unsigned int gfp_mask) { int priority; int ret = 0; @@ -499,7 +499,7 @@ while (nr_free_pages < freepages.high) { - if (do_try_to_free_pages(GFP_KSWAPD)) + if (try_to_free_pages(GFP_KSWAPD)) { if (tsk->need_resched) schedule(); @@ -510,17 +510,3 @@ } } } - -/* - * Called by non-kswapd processes when kswapd really cannot - * keep up with the demand for free memory. - */ -int try_to_free_pages(unsigned int gfp_mask) -{ - int retval = 1; - - if (gfp_mask & __GFP_WAIT) - retval = do_try_to_free_pages(gfp_mask); - return retval; -} -