From: Andy Whitcroft This patch eliminates the false hole which can form between ZONE_NORMAL and ZONE_HIGHMEM when 4g/4g split is enabled. Basically, the patch changes the allocation of the numa remaps regions (the source of the holes) such that they officially fall within VMALLOC space, where they belong. -apw diff -upN reference/arch/i386/mm/discontig.c current/arch/i386/mm/discontig.c --- reference/arch/i386/mm/discontig.c 2004-03-30 21:42:13.000000000 -0800 +++ current/arch/i386/mm/discontig.c 2004-03-30 21:42:13.000000000 -0800 @@ -233,6 +233,13 @@ unsigned long __init setup_memory(void) unsigned long bootmap_size, system_start_pfn, system_max_low_pfn; unsigned long reserve_pages; + /* + * When mapping a NUMA machine we allocate the node_mem_map arrays + * from node local memory. They are then mapped directly into KVA + * between zone normal and vmalloc space. Calculate the size of + * this space and use it to adjust the boundry between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ get_memcfg_numa(); reserve_pages = calculate_numa_remap_pages(); @@ -240,7 +247,10 @@ unsigned long __init setup_memory(void) system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); find_max_pfn(); - system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; + printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", + reserve_pages, max_low_pfn + reserve_pages); + printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > system_max_low_pfn) @@ -248,7 +258,6 @@ unsigned long __init setup_memory(void) printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); #endif - system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", @@ -258,15 +267,16 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for (nid = 0; nid < numnodes; nid++) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn - node_remap_offset[nid]); + (highstart_pfn + reserve_pages) - node_remap_offset[nid]); allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); + vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for (nid = 0; nid < numnodes; nid++) find_max_pfn_node(nid); diff -upN reference/include/asm-i386/pgtable.h current/include/asm-i386/pgtable.h --- reference/include/asm-i386/pgtable.h 2004-03-30 16:24:52.000000000 -0800 +++ current/include/asm-i386/pgtable.h 2004-03-30 21:42:13.000000000 -0800 @@ -87,8 +87,8 @@ void paging_init(void); * area for the same reason. ;) */ #define VMALLOC_OFFSET (8*1024*1024) -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) +#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \ + 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) #else diff -upN reference/include/linux/mm.h current/include/linux/mm.h --- reference/include/linux/mm.h 2004-03-30 16:24:47.000000000 -0800 +++ current/include/linux/mm.h 2004-03-30 21:42:13.000000000 -0800 @@ -19,6 +19,7 @@ extern unsigned long max_mapnr; extern unsigned long num_physpages; extern void * high_memory; +extern unsigned long vmalloc_earlyreserve; extern int page_cluster; #include diff -upN reference/mm/memory.c current/mm/memory.c --- reference/mm/memory.c 2004-03-30 16:24:47.000000000 -0800 +++ current/mm/memory.c 2004-03-30 21:42:13.000000000 -0800 @@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map); #endif unsigned long num_physpages; +/* + * A number of key systems in x86 including ioremap() rely on the assumption + * that high_memory defines the upper bound on direct map memory, then end + * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and + * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ void * high_memory; struct page *highmem_start_page; +unsigned long vmalloc_earlyreserve; EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(high_memory); +EXPORT_SYMBOL(vmalloc_earlyreserve); /* * We special-case the C-O-W ZERO_PAGE, because it's such