From: Dave Hansen pgdat->node_size_lock is basically only neeeded in one place in the normal code: show_mem(), which is the arch-specific sysrq-m printing function. Strictly speaking, the architectures not doing memory hotplug do no need this locking in show_mem(). However, they are all included for completeness. This should also make any future consolidation of all of the implementations a little more straightforward. This lock is also held in the sparsemem code during a memory removal, as sections are invalidated. This is the place there pfn_valid() is made false for a memory area that's being removed. The lock is only required when doing pfn_valid() operations on memory which the user does not already have a reference on the page, such as in show_mem(). Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton --- arch/alpha/mm/numa.c | 3 +++ arch/i386/mm/pgtable.c | 3 +++ arch/ia64/mm/discontig.c | 7 ++++++- arch/m32r/mm/init.c | 9 ++++++++- arch/parisc/mm/init.c | 3 +++ arch/ppc64/mm/init.c | 6 ++++++ include/linux/memory_hotplug.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/mmzone.h | 12 ++++++++++++ mm/page_alloc.c | 1 + 9 files changed, 76 insertions(+), 2 deletions(-) diff -puN arch/alpha/mm/numa.c~memory-hotplug-locking-node_size_lock arch/alpha/mm/numa.c --- devel/arch/alpha/mm/numa.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/arch/alpha/mm/numa.c 2005-09-12 03:34:27.000000000 -0700 @@ -371,6 +371,8 @@ show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_node(nid) { + unsigned long flags; + pgdat_resize_lock(NODE_DATA(nid), &flags); i = node_spanned_pages(nid); while (i-- > 0) { struct page *page = nid_page_nr(nid, i); @@ -384,6 +386,7 @@ show_mem(void) else shared += page_count(page) - 1; } + pgdat_resize_unlock(NODE_DATA(nid), &flags); } printk("%ld pages of RAM\n",total); printk("%ld free pages\n",free); diff -puN arch/i386/mm/pgtable.c~memory-hotplug-locking-node_size_lock arch/i386/mm/pgtable.c --- devel/arch/i386/mm/pgtable.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/arch/i386/mm/pgtable.c 2005-09-12 03:34:27.000000000 -0700 @@ -31,11 +31,13 @@ void show_mem(void) pg_data_t *pgdat; unsigned long i; struct page_state ps; + unsigned long flags; printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -48,6 +50,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk(KERN_INFO "%d pages of RAM\n", total); printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); diff -puN arch/ia64/mm/discontig.c~memory-hotplug-locking-node_size_lock arch/ia64/mm/discontig.c --- devel/arch/ia64/mm/discontig.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/arch/ia64/mm/discontig.c 2005-09-12 03:34:27.000000000 -0700 @@ -524,9 +524,13 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - unsigned long present = pgdat->node_present_pages; + unsigned long present; + unsigned long flags; int shared = 0, cached = 0, reserved = 0; + printk("Node ID: %d\n", pgdat->node_id); + pgdat_resize_lock(pgdat, &flags); + present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { struct page *page = pgdat_page_nr(pgdat, i); if (!ia64_pfn_valid(pgdat->node_start_pfn+i)) @@ -538,6 +542,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page)-1; } + pgdat_resize_unlock(pgdat, &flags); total_present += present; total_reserved += reserved; total_cached += cached; diff -puN arch/m32r/mm/init.c~memory-hotplug-locking-node_size_lock arch/m32r/mm/init.c --- devel/arch/m32r/mm/init.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/arch/m32r/mm/init.c 2005-09-12 03:34:27.000000000 -0700 @@ -48,6 +48,8 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + unsigned long flags; + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -60,6 +62,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk("%d pages of RAM\n", total); printk("%d pages of HIGHMEM\n",highmem); @@ -150,10 +153,14 @@ int __init reservedpages_count(void) int reservedpages, nid, i; reservedpages = 0; - for_each_online_node(nid) + for_each_online_node(nid) { + unsigned long flags; + pgdat_resize_lock(NODE_DATA(nid), &flags); for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) if (PageReserved(nid_page_nr(nid, i))) reservedpages++; + pgdat_resize_unlock(NODE_DATA(nid), &flags); + } return reservedpages; } diff -puN arch/parisc/mm/init.c~memory-hotplug-locking-node_size_lock arch/parisc/mm/init.c --- devel/arch/parisc/mm/init.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/arch/parisc/mm/init.c 2005-09-12 03:34:27.000000000 -0700 @@ -505,7 +505,9 @@ void show_mem(void) for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { struct page *p; + unsigned long flags; + pgdat_resize_lock(NODE_DATA(i), &flags); p = nid_page_nr(i, j) - node_start_pfn(i); total++; @@ -517,6 +519,7 @@ void show_mem(void) free++; else shared += page_count(p) - 1; + pgdat_resize_unlock(NODE_DATA(i), &flags); } } #endif diff -puN arch/ppc64/mm/init.c~memory-hotplug-locking-node_size_lock arch/ppc64/mm/init.c --- devel/arch/ppc64/mm/init.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/arch/ppc64/mm/init.c 2005-09-12 03:34:27.000000000 -0700 @@ -104,6 +104,8 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + unsigned long flags; + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat_page_nr(pgdat, i); total++; @@ -114,6 +116,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk("%ld pages of RAM\n", total); printk("%ld reserved pages\n", reserved); @@ -649,11 +652,14 @@ void __init mem_init(void) #endif for_each_pgdat(pgdat) { + unsigned long flags; + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; } + pgdat_resize_unlock(pgdat, &flags); } codesize = (unsigned long)&_etext - (unsigned long)&_stext; diff -puN /dev/null include/linux/memory_hotplug.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/include/linux/memory_hotplug.h 2005-09-12 03:34:27.000000000 -0700 @@ -0,0 +1,34 @@ +#ifndef __LINUX_MEMORY_HOTPLUG_H +#define __LINUX_MEMORY_HOTPLUG_H + +#include +#include + +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * pgdat resizing functions + */ +static inline +void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) +{ + spin_lock_irqsave(&pgdat->node_size_lock, *flags); +} +static inline +void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) +{ + spin_lock_irqrestore(&pgdat->node_size_lock, *flags); +} +static inline +void pgdat_resize_init(struct pglist_data *pgdat) +{ + spin_lock_init(&pgdat->node_size_lock); +} +#else /* ! CONFIG_MEMORY_HOTPLUG */ +/* + * Stub functions for when hotplug is off + */ +static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} +static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} +static inline void pgdat_resize_init(struct pglist_data *pgdat) {} +#endif +#endif /* __LINUX_MEMORY_HOTPLUG_H */ diff -puN include/linux/mmzone.h~memory-hotplug-locking-node_size_lock include/linux/mmzone.h --- devel/include/linux/mmzone.h~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/include/linux/mmzone.h 2005-09-12 03:34:27.000000000 -0700 @@ -275,6 +275,16 @@ typedef struct pglist_data { struct page *node_mem_map; #endif struct bootmem_data *bdata; +#ifdef CONFIG_MEMORY_HOTPLUG + /* + * Must be held any time you expect node_start_pfn, node_present_pages + * or node_spanned_pages stay constant. Holding this will also + * guarantee that any pfn_valid() stays that way. + * + * Nests above zone->lock and zone->size_seqlock. + */ + spinlock_t node_size_lock; +#endif unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ unsigned long node_spanned_pages; /* total size of physical page @@ -295,6 +305,8 @@ typedef struct pglist_data { #endif #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) +#include + extern struct pglist_data *pgdat_list; void __get_zone_counts(unsigned long *active, unsigned long *inactive, diff -puN mm/page_alloc.c~memory-hotplug-locking-node_size_lock mm/page_alloc.c --- devel/mm/page_alloc.c~memory-hotplug-locking-node_size_lock 2005-09-12 03:34:27.000000000 -0700 +++ devel-akpm/mm/page_alloc.c 2005-09-12 03:34:27.000000000 -0700 @@ -1973,6 +1973,7 @@ static void __init free_area_init_core(s int nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; + pgdat_resize_init(pgdat); pgdat->nr_zones = 0; init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; _