diff -urNp x-ref/arch/sparc64/mm/init.c x/arch/sparc64/mm/init.c --- x-ref/arch/sparc64/mm/init.c 2002-11-29 02:22:58.000000000 +0100 +++ x/arch/sparc64/mm/init.c 2003-01-30 19:52:36.000000000 +0100 @@ -1638,7 +1638,7 @@ void __init mem_init(void) * Set up the zero page, mark it reserved, so that page count * is not manipulated when freeing the page from user ptes. */ - mem_map_zero = _alloc_pages(GFP_KERNEL, 0); + mem_map_zero = alloc_pages(GFP_KERNEL, 0); if (mem_map_zero == NULL) { prom_printf("paging_init: Cannot alloc zero page.\n"); prom_halt(); diff -urNp x-ref/include/asm-alpha/max_numnodes.h x/include/asm-alpha/max_numnodes.h --- x-ref/include/asm-alpha/max_numnodes.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-alpha/max_numnodes.h 2003-01-30 19:52:42.000000000 +0100 @@ -0,0 +1,16 @@ +#ifndef _ASM_MAX_NUMNODES_H +#define _ASM_MAX_NUMNODES_H + +#include +#include + +#if defined(CONFIG_ALPHA_GENERIC) or defined(CONFIG_ALPHA_MARVEL) +#define MAX_NUMNODES 128 /* marvel */ +#elif defined(CONFIG_ALPHA_WILDFIRE) +# include +# define MAX_NUMNODES WILDFIRE_MAX_QBB +#else +# define MAX_NUMNODES 1 +#endif + +#endif diff -urNp x-ref/include/asm-alpha/mmzone.h x/include/asm-alpha/mmzone.h --- x-ref/include/asm-alpha/mmzone.h 2003-01-30 19:52:26.000000000 +0100 +++ x/include/asm-alpha/mmzone.h 2003-01-30 19:52:36.000000000 +0100 @@ -46,7 +46,6 @@ extern plat_pg_data_t *plat_node_data[]; (alpha_mv.node_mem_size \ ? alpha_mv.node_mem_size(nid) \ : ((nid) ? (0UL) : (~0UL))) -#define MAX_NUMNODES 128 /* marvel */ #define PHYSADDR_TO_NID(pa) ALPHA_PA_TO_NID(pa) #define PLAT_NODE_DATA(n) (plat_node_data[(n)]) @@ -67,8 +66,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, } #endif -#ifdef CONFIG_DISCONTIGMEM - /* * Following are macros that each numa implmentation must define. */ @@ -122,6 +119,4 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, #define numa_node_id() cputonode(smp_processor_id()) #endif /* CONFIG_NUMA */ -#endif /* CONFIG_DISCONTIGMEM */ - #endif /* _ASM_MMZONE_H_ */ diff -urNp x-ref/include/linux/mm.h x/include/linux/mm.h --- x-ref/include/linux/mm.h 2003-01-30 19:52:32.000000000 +0100 +++ x/include/linux/mm.h 2003-01-30 19:52:36.000000000 +0100 @@ -416,7 +416,6 @@ extern mem_map_t * mem_map; * can allocate highmem pages, the *get*page*() variants return * virtual kernel addresses to the allocated page(s). */ -extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); @@ -427,7 +426,13 @@ static inline struct page * alloc_pages( */ if (order >= MAX_ORDER) return NULL; - return _alloc_pages(gfp_mask, order); + /* + * we get the zone list from the current node and the gfp_mask. + * This zone list contains a maximum of + * MAXNODES*MAX_NR_ZONES zones. + */ + return __alloc_pages(gfp_mask, order, + NODE_DATA(numa_node_id())->node_zonelists + (gfp_mask & GFP_ZONEMASK)); } #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) diff -urNp x-ref/include/linux/mmzone.h x/include/linux/mmzone.h --- x-ref/include/linux/mmzone.h 2003-01-30 19:52:32.000000000 +0100 +++ x/include/linux/mmzone.h 2003-01-30 19:52:36.000000000 +0100 @@ -114,8 +114,14 @@ typedef struct zone_struct { * so despite the zonelist table being relatively big, the cache * footprint of this construct is very small. */ +#ifndef CONFIG_DISCONTIGMEM +#define MAX_NUMNODES 1 +#else +#include +#endif /* !CONFIG_DISCONTIGMEM */ + typedef struct zonelist_struct { - zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited + zone_t * zones [MAX_NUMNODES * MAX_NR_ZONES+1]; // NULL delimited } zonelist_t; #define GFP_ZONEMASK 0x0f @@ -188,6 +194,7 @@ extern void show_free_areas_core(pg_data extern void free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, unsigned long *zones_size, unsigned long paddr, unsigned long *zholes_size, struct page *pmap); +extern void build_all_zonelists(void); extern pg_data_t contig_page_data; diff -urNp x-ref/init/main.c x/init/main.c --- x-ref/init/main.c 2003-01-30 19:52:30.000000000 +0100 +++ x/init/main.c 2003-01-30 19:52:36.000000000 +0100 @@ -343,6 +343,7 @@ asmlinkage void __init start_kernel(void lock_kernel(); printk(linux_banner); setup_arch(&command_line); + build_all_zonelists(); printk("Kernel command line: %s\n", saved_command_line); parse_options(command_line); trap_init(); diff -urNp x-ref/kernel/ksyms.c x/kernel/ksyms.c --- x-ref/kernel/ksyms.c 2003-01-30 19:52:34.000000000 +0100 +++ x/kernel/ksyms.c 2003-01-30 19:52:36.000000000 +0100 @@ -91,7 +91,6 @@ EXPORT_SYMBOL(exit_fs); EXPORT_SYMBOL(exit_sighand); /* internal kernel memory management */ -EXPORT_SYMBOL(_alloc_pages); EXPORT_SYMBOL(__alloc_pages); EXPORT_SYMBOL(alloc_pages_node); EXPORT_SYMBOL(__get_free_pages); @@ -113,7 +112,10 @@ EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(__vmalloc); EXPORT_SYMBOL(vmalloc_to_page); +#ifndef CONFIG_DISCONTIGMEM +EXPORT_SYMBOL(contig_page_data); EXPORT_SYMBOL(mem_map); +#endif EXPORT_SYMBOL(remap_page_range); EXPORT_SYMBOL(max_mapnr); EXPORT_SYMBOL(high_memory); diff -urNp x-ref/mm/numa.c x/mm/numa.c --- x-ref/mm/numa.c 2002-01-22 18:53:56.000000000 +0100 +++ x/mm/numa.c 2003-01-30 19:52:36.000000000 +0100 @@ -82,49 +82,4 @@ void __init free_area_init_node(int nid, memset(pgdat->valid_addr_bitmap, 0, size); } -static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask, - unsigned int order) -{ - return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK)); -} - -/* - * This can be refined. Currently, tries to do round robin, instead - * should do concentratic circle search, starting from current node. - */ -struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order) -{ - struct page *ret = 0; - pg_data_t *start, *temp; -#ifndef CONFIG_NUMA - unsigned long flags; - static pg_data_t *next = 0; -#endif - - if (order >= MAX_ORDER) - return NULL; -#ifdef CONFIG_NUMA - temp = NODE_DATA(numa_node_id()); -#else - spin_lock_irqsave(&node_lock, flags); - if (!next) next = pgdat_list; - temp = next; - next = next->node_next; - spin_unlock_irqrestore(&node_lock, flags); -#endif - start = temp; - while (temp) { - if ((ret = alloc_pages_pgdat(temp, gfp_mask, order))) - return(ret); - temp = temp->node_next; - } - temp = pgdat_list; - while (temp != start) { - if ((ret = alloc_pages_pgdat(temp, gfp_mask, order))) - return(ret); - temp = temp->node_next; - } - return(0); -} - #endif /* CONFIG_DISCONTIGMEM */ diff -urNp x-ref/mm/page_alloc.c x/mm/page_alloc.c --- x-ref/mm/page_alloc.c 2003-01-30 19:52:32.000000000 +0100 +++ x/mm/page_alloc.c 2003-01-30 19:52:36.000000000 +0100 @@ -313,14 +313,6 @@ static struct page * rmqueue(zone_t *zon return NULL; } -#ifndef CONFIG_DISCONTIGMEM -struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order) -{ - return __alloc_pages(gfp_mask, order, - contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK)); -} -#endif - static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *)); static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) { @@ -728,13 +720,41 @@ void show_free_areas(void) /* * Builds allocation fallback zone lists. */ -static inline void build_zonelists(pg_data_t *pgdat) +static int __init build_zonelists_node(pg_data_t *pgdat, zonelist_t *zonelist, int j, int k) +{ + switch (k) { + zone_t *zone; + default: + BUG(); + case ZONE_HIGHMEM: + zone = pgdat->node_zones + ZONE_HIGHMEM; + if (zone->size) { +#ifndef CONFIG_HIGHMEM + BUG(); +#endif + zonelist->zones[j++] = zone; + } + case ZONE_NORMAL: + zone = pgdat->node_zones + ZONE_NORMAL; + if (zone->size) + zonelist->zones[j++] = zone; + case ZONE_DMA: + zone = pgdat->node_zones + ZONE_DMA; + if (zone->size) + zonelist->zones[j++] = zone; + } + + return j; +} + +static void __init build_zonelists(pg_data_t *pgdat) { - int i, j, k; + int i, j, k, node, local_node; + local_node = pgdat->node_id; + printk("Building zonelist for node : %d\n", local_node); for (i = 0; i <= GFP_ZONEMASK; i++) { zonelist_t *zonelist; - zone_t *zone; zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); @@ -746,33 +766,32 @@ static inline void build_zonelists(pg_da if (i & __GFP_DMA) k = ZONE_DMA; - switch (k) { - default: - BUG(); - /* - * fallthrough: - */ - case ZONE_HIGHMEM: - zone = pgdat->node_zones + ZONE_HIGHMEM; - if (zone->size) { -#ifndef CONFIG_HIGHMEM - BUG(); -#endif - zonelist->zones[j++] = zone; - } - case ZONE_NORMAL: - zone = pgdat->node_zones + ZONE_NORMAL; - if (zone->size) - zonelist->zones[j++] = zone; - case ZONE_DMA: - zone = pgdat->node_zones + ZONE_DMA; - if (zone->size) - zonelist->zones[j++] = zone; - } + j = build_zonelists_node(pgdat, zonelist, j, k); + /* + * Now we build the zonelist so that it contains the zones + * of all the other nodes. + * We don't want to pressure a particular node, so when + * building the zones for node N, we make sure that the + * zones coming right after the local ones are those from + * node N+1 (modulo N) + */ + for (node = local_node + 1; node < numnodes; node++) + j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + for (node = 0; node < local_node; node++) + j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + zonelist->zones[j++] = NULL; } } +void __init build_all_zonelists(void) +{ + int i; + + for(i = 0 ; i < numnodes ; i++) + build_zonelists(NODE_DATA(i)); +} + /* * Helper functions to size the waitqueue hash table. * Essentially these want to choose hash table sizes sufficiently @@ -1012,7 +1031,6 @@ void __init free_area_init_core(int nid, (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); } } - build_zonelists(pgdat); } void __init free_area_init(unsigned long *zones_size)