From: Bob Picco A new option for SPARSEMEM is ARCH_SPARSEMEM_EXTREME. Architecture platforms with a very sparse physical address space would likely want to select this option. For those architecture platforms that don't select the option, the code generated is equivalent to SPARSEMEM currently in -mm. I'll be posting a patch on ia64 ml which uses this new SPARSEMEM feature. ARCH_SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM -mm implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. ARCH_SPARSEMEM_EXTREME depends on 64BIT and is by default boolean false. I've boot tested under aim load ia64 configured for ARCH_SPARSEMEM_EXTREME. I've also boot tested a 4 way Opteron machine with !ARCH_SPARSEMEM_EXTREME and tested with aim. Signed-off-by: Andy Whitcroft Signed-off-by: Bob Picco Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 30 ++++++++++++++++++++++++++++-- mm/Kconfig | 9 +++++++++ mm/sparse.c | 38 ++++++++++++++++++++++++++++++++------ 3 files changed, 69 insertions(+), 8 deletions(-) diff -puN include/linux/mmzone.h~sparsemem-extreme include/linux/mmzone.h --- 25/include/linux/mmzone.h~sparsemem-extreme 2005-06-22 21:57:22.000000000 -0700 +++ 25-akpm/include/linux/mmzone.h 2005-06-22 21:57:22.000000000 -0700 @@ -487,6 +487,28 @@ struct mem_section { unsigned long section_mem_map; }; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +/* + * Should we ever require GCC 4 or later then the flat array scheme + * can be eliminated and a uniform solution for EXTREME and !EXTREME can + * be arrived at. + */ +#define SECTION_ROOT_SHIFT (PAGE_SHIFT-3) +#define SECTION_ROOT_MASK ((1UL<> SECTION_ROOT_SHIFT) +#define NR_SECTION_ROOTS (NR_MEM_SECTIONS >> SECTION_ROOT_SHIFT) + +extern struct mem_section *mem_section[NR_SECTION_ROOTS]; + +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + if (!mem_section[SECTION_TO_ROOT(nr)]) + return NULL; + return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; +} + +#else + extern struct mem_section mem_section[NR_MEM_SECTIONS]; static inline struct mem_section *__nr_to_section(unsigned long nr) @@ -494,6 +516,10 @@ static inline struct mem_section *__nr_t return &mem_section[nr]; } +#define sparse_index_init(_sec, _nid) do {} while (0) + +#endif + /* * We use the lower bits of the mem_map pointer to store * a little bit of information. There should be at least @@ -513,12 +539,12 @@ static inline struct page *__section_mem static inline int valid_section(struct mem_section *section) { - return (section->section_mem_map & SECTION_MARKED_PRESENT); + return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } static inline int section_has_mem_map(struct mem_section *section) { - return (section->section_mem_map & SECTION_HAS_MEM_MAP); + return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } static inline int valid_section_nr(unsigned long nr) diff -puN mm/Kconfig~sparsemem-extreme mm/Kconfig --- 25/mm/Kconfig~sparsemem-extreme 2005-06-22 21:57:22.000000000 -0700 +++ 25-akpm/mm/Kconfig 2005-06-22 21:57:22.000000000 -0700 @@ -89,3 +89,12 @@ config NEED_MULTIPLE_NODES config HAVE_MEMORY_PRESENT def_bool y depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM + +# +# Architectecture platforms which require a two level mem_section in SPARSEMEM +# must select this option. This is usually for architecture platforms with +# an extremely sparse physical address space. +# +config ARCH_SPARSEMEM_EXTREME + def_bool n + depends on SPARSEMEM && 64BIT diff -puN mm/sparse.c~sparsemem-extreme mm/sparse.c --- 25/mm/sparse.c~sparsemem-extreme 2005-06-22 21:57:22.000000000 -0700 +++ 25-akpm/mm/sparse.c 2005-06-22 21:57:22.000000000 -0700 @@ -13,7 +13,26 @@ * * 1) mem_section - memory sections, mem_map's for valid memory */ -struct mem_section mem_section[NR_MEM_SECTIONS]; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +struct mem_section *mem_section[NR_SECTION_ROOTS] + ____cacheline_maxaligned_in_smp; + +static void sparse_index_init(unsigned long section, int nid) +{ + unsigned long root = SECTION_TO_ROOT(section); + + if (mem_section[root]) + return; + mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); + if (mem_section[root]) + memset(mem_section[root], 0, PAGE_SIZE); + else + panic("memory_present: NO MEMORY\n"); +} +#else +struct mem_section mem_section[NR_MEM_SECTIONS] + ____cacheline_maxaligned_in_smp; +#endif EXPORT_SYMBOL(mem_section); /* Record a memory area against a node. */ @@ -24,8 +43,13 @@ void memory_present(int nid, unsigned lo start &= PAGE_SECTION_MASK; for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); - if (!mem_section[section].section_mem_map) - mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; + struct mem_section *ms; + + sparse_index_init(section, nid); + + ms = __nr_to_section(section); + if (!ms->section_mem_map) + ms->section_mem_map = SECTION_MARKED_PRESENT; } } @@ -85,6 +109,7 @@ static struct page *sparse_early_mem_map { struct page *map; int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + struct mem_section *ms = __nr_to_section(pnum); map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); if (map) @@ -96,7 +121,7 @@ static struct page *sparse_early_mem_map return map; printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); - mem_section[pnum].section_mem_map = 0; + ms->section_mem_map = 0; return NULL; } @@ -114,8 +139,9 @@ void sparse_init(void) continue; map = sparse_early_mem_map_alloc(pnum); - if (map) - sparse_init_one_section(&mem_section[pnum], pnum, map); + if (!map) + continue; + sparse_init_one_section(__nr_to_section(pnum), pnum, map); } } _