From: Andy Whitcroft This patch eliminates the false hole which can form between ZONE_NORMAL and ZONE_HIGHMEM when 4g/4g split is enabled. Basically, the patch changes the allocation of the numa remaps regions (the source of the holes) such that they officially fall within VMALLOC space, where they belong. diff -upN reference/arch/i386/mm/discontig.c current/arch/i386/mm/discontig.c --- reference/arch/i386/mm/discontig.c 2004-04-09 21:41:41.000000000 -0700 +++ current/arch/i386/mm/discontig.c 2004-04-09 21:41:41.000000000 -0700 @@ -233,6 +233,13 @@ unsigned long __init setup_memory(void) unsigned long bootmap_size, system_start_pfn, system_max_low_pfn; unsigned long reserve_pages; + /* + * When mapping a NUMA machine we allocate the node_mem_map arrays + * from node local memory. They are then mapped directly into KVA + * between zone normal and vmalloc space. Calculate the size of + * this space and use it to adjust the boundry between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ get_memcfg_numa(); reserve_pages = calculate_numa_remap_pages(); @@ -240,7 +247,10 @@ unsigned long __init setup_memory(void) system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); find_max_pfn(); - system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; + printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", + reserve_pages, max_low_pfn + reserve_pages); + printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > system_max_low_pfn) @@ -248,7 +258,6 @@ unsigned long __init setup_memory(void) printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); #endif - system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", @@ -258,15 +267,16 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for (nid = 0; nid < numnodes; nid++) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn - node_remap_offset[nid]); + (highstart_pfn + reserve_pages) - node_remap_offset[nid]); allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); + vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for (nid = 0; nid < numnodes; nid++) find_max_pfn_node(nid); diff -upN reference/include/asm-i386/pgtable.h current/include/asm-i386/pgtable.h --- reference/include/asm-i386/pgtable.h 2004-04-09 21:41:40.000000000 -0700 +++ current/include/asm-i386/pgtable.h 2004-04-09 21:41:41.000000000 -0700 @@ -104,8 +104,8 @@ extern void entry_trampoline_setup(void) * area for the same reason. ;) */ #define VMALLOC_OFFSET (8*1024*1024) -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) +#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \ + 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) #else diff -upN reference/include/linux/mm.h current/include/linux/mm.h --- reference/include/linux/mm.h 2004-04-08 15:10:27.000000000 -0700 +++ current/include/linux/mm.h 2004-04-09 21:41:41.000000000 -0700 @@ -19,6 +19,7 @@ extern unsigned long max_mapnr; extern unsigned long num_physpages; extern void * high_memory; +extern unsigned long vmalloc_earlyreserve; extern int page_cluster; #include diff -upN reference/mm/memory.c current/mm/memory.c --- reference/mm/memory.c 2004-04-09 11:53:00.000000000 -0700 +++ current/mm/memory.c 2004-04-09 21:41:41.000000000 -0700 @@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map); #endif unsigned long num_physpages; +/* + * A number of key systems in x86 including ioremap() rely on the assumption + * that high_memory defines the upper bound on direct map memory, then end + * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and + * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ void * high_memory; struct page *highmem_start_page; +unsigned long vmalloc_earlyreserve; EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(high_memory); +EXPORT_SYMBOL(vmalloc_earlyreserve); /* * We special-case the C-O-W ZERO_PAGE, because it's such