From: Andi Kleen Change to core VM to use alloc_page_vma() instead of alloc_page(). Change the swap readahead to follow the policy of the VMA. --- 25-akpm/include/linux/swap.h | 7 ++++--- 25-akpm/mm/memory.c | 43 ++++++++++++++++++++++++++++++++++++------- 25-akpm/mm/swap_state.c | 5 +++-- 25-akpm/mm/swapfile.c | 2 +- 4 files changed, 44 insertions(+), 13 deletions(-) diff -puN include/linux/swap.h~numa-api-anon-memory-policy include/linux/swap.h --- 25/include/linux/swap.h~numa-api-anon-memory-policy Thu Apr 8 16:42:03 2004 +++ 25-akpm/include/linux/swap.h Thu Apr 8 16:42:03 2004 @@ -151,7 +151,7 @@ struct swap_list_t { extern void out_of_memory(void); /* linux/mm/memory.c */ -extern void swapin_readahead(swp_entry_t); +extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; @@ -200,7 +200,8 @@ extern int move_from_swap_cache(struct p extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page * lookup_swap_cache(swp_entry_t); -extern struct page * read_swap_cache_async(swp_entry_t); +extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma, + unsigned long addr); /* linux/mm/swapfile.c */ extern int total_swap_pages; @@ -243,7 +244,7 @@ extern spinlock_t swaplock; #define free_swap_and_cache(swp) /*NOTHING*/ #define swap_duplicate(swp) /*NOTHING*/ #define swap_free(swp) /*NOTHING*/ -#define read_swap_cache_async(swp) NULL +#define read_swap_cache_async(swp,vma,addr) NULL #define lookup_swap_cache(swp) NULL #define valid_swaphandles(swp, off) 0 #define can_share_swap_page(p) 0 diff -puN mm/memory.c~numa-api-anon-memory-policy mm/memory.c --- 25/mm/memory.c~numa-api-anon-memory-policy Thu Apr 8 16:42:03 2004 +++ 25-akpm/mm/memory.c Thu Apr 8 16:42:03 2004 @@ -1105,7 +1105,7 @@ static int do_wp_page(struct mm_struct * pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) goto no_pte_chain; - new_page = alloc_page(GFP_HIGHUSER); + new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!new_page) goto no_new_page; copy_cow_page(old_page,new_page,address); @@ -1259,9 +1259,17 @@ EXPORT_SYMBOL(vmtruncate); * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... + * + * This has been extended to use the NUMA policies from the mm triggering + * the readahead. + * + * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ -void swapin_readahead(swp_entry_t entry) +void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma) { +#ifdef CONFIG_NUMA + struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL; +#endif int i, num; struct page *new_page; unsigned long offset; @@ -1273,10 +1281,31 @@ void swapin_readahead(swp_entry_t entry) for (i = 0; i < num; offset++, i++) { /* Ok, do the async read-ahead now */ new_page = read_swap_cache_async(swp_entry(swp_type(entry), - offset)); + offset), vma, addr); if (!new_page) break; page_cache_release(new_page); +#ifdef CONFIG_NUMA + /* + * Find the next applicable VMA for the NUMA policy. + */ + addr += PAGE_SIZE; + if (addr == 0) + vma = NULL; + if (vma) { + if (addr >= vma->vm_end) { + vma = next_vma; + next_vma = vma ? vma->vm_next : NULL; + } + if (vma && addr < vma->vm_start) + vma = NULL; + } else { + if (next_vma && addr >= next_vma->vm_start) { + vma = next_vma; + next_vma = vma->vm_next; + } + } +#endif } lru_add_drain(); /* Push any new pages onto the LRU now */ } @@ -1299,8 +1328,8 @@ static int do_swap_page(struct mm_struct spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { - swapin_readahead(entry); - page = read_swap_cache_async(entry); + swapin_readahead(entry, address, vma); + page = read_swap_cache_async(entry, vma, address); if (!page) { /* * Back out if somebody else faulted in this pte while @@ -1405,7 +1434,7 @@ do_anonymous_page(struct mm_struct *mm, pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - page = alloc_page(GFP_HIGHUSER); + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); if (!page) goto no_mem; clear_user_highpage(page, addr); @@ -1497,7 +1526,7 @@ retry: * Should we do an early C-O-W break? */ if (write_access && !(vma->vm_flags & VM_SHARED)) { - struct page * page = alloc_page(GFP_HIGHUSER); + struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!page) goto oom; copy_user_highpage(page, new_page, address); diff -puN mm/swapfile.c~numa-api-anon-memory-policy mm/swapfile.c --- 25/mm/swapfile.c~numa-api-anon-memory-policy Thu Apr 8 16:42:03 2004 +++ 25-akpm/mm/swapfile.c Thu Apr 8 16:42:03 2004 @@ -670,7 +670,7 @@ static int try_to_unuse(unsigned int typ */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); - page = read_swap_cache_async(entry); + page = read_swap_cache_async(entry, NULL, 0); if (!page) { /* * Either swap_duplicate() failed because entry diff -puN mm/swap_state.c~numa-api-anon-memory-policy mm/swap_state.c --- 25/mm/swap_state.c~numa-api-anon-memory-policy Thu Apr 8 16:42:03 2004 +++ 25-akpm/mm/swap_state.c Thu Apr 8 16:42:03 2004 @@ -325,7 +325,8 @@ struct page * lookup_swap_cache(swp_entr * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. */ -struct page * read_swap_cache_async(swp_entry_t entry) +struct page *read_swap_cache_async(swp_entry_t entry, + struct vm_area_struct *vma, unsigned long addr) { struct page *found_page, *new_page = NULL; int err; @@ -349,7 +350,7 @@ struct page * read_swap_cache_async(swp_ * Get a new page to read into from swap. */ if (!new_page) { - new_page = alloc_page(GFP_HIGHUSER); + new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); if (!new_page) break; /* Out of memory */ } _