diff options
author | Alex Williamson <alex.williamson@hp.com> | 2005-01-03 04:16:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-01-03 04:16:59 -0800 |
commit | d841f01f2354d63fea302c7baf78b2d9eca96a96 (patch) | |
tree | 7756c8520354909ad867039b82776c732dafc0b0 /mm | |
parent | d32d6f8ab2ef776261359b78127fd0251a041a90 (diff) | |
download | history-d841f01f2354d63fea302c7baf78b2d9eca96a96.tar.gz |
[PATCH] collect page_states only from online cpus
I noticed the function __read_page_state() curiously high in a q-tools
profile of a write to a software raid0 device. Seems this is because we're
checking page_states for all possible cpus and we have NR_CPUS possible
when CONFIG_HOTPLUG_CPU=y. The default config for ia64 is now NR_CPUS=512,
so on a little 8-way box, this is a significant waste of time. The patch
below updates __read_page_state() and __get_page_state() to only count
page_state info for online cpus. To keep the stats consistent, the
page_alloc notifier is updated to move page_states off of the cpu going
offline. On my profile, this dropped __read_page_state() back into the
noise and boosted block write performance by 5% (as measured by spew -
http://spew.berlios.de).
Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 33 |
1 files changed, 22 insertions, 11 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1f264ba47d3309..7c081bea888e4d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -938,18 +938,18 @@ void __get_page_state(struct page_state *ret, int nr) int cpu = 0; memset(ret, 0, sizeof(*ret)); + + cpu = first_cpu(cpu_online_map); while (cpu < NR_CPUS) { unsigned long *in, *out, off; - if (!cpu_possible(cpu)) { - cpu++; - continue; - } - in = (unsigned long *)&per_cpu(page_states, cpu); - cpu++; - if (cpu < NR_CPUS && cpu_possible(cpu)) + + cpu = next_cpu(cpu, cpu_online_map); + + if (cpu < NR_CPUS) prefetch(&per_cpu(page_states, cpu)); + out = (unsigned long *)ret; for (off = 0; off < nr; off++) *out++ += *in++; @@ -976,12 +976,9 @@ unsigned long __read_page_state(unsigned offset) unsigned long ret = 0; int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_online_cpu(cpu) { unsigned long in; - if (!cpu_possible(cpu)) - continue; - in = (unsigned long)&per_cpu(page_states, cpu) + offset; ret += *((unsigned long *)in); } @@ -1813,14 +1810,28 @@ static int page_alloc_cpu_notify(struct notifier_block *self, { int cpu = (unsigned long)hcpu; long *count; + unsigned long *src, *dest; if (action == CPU_DEAD) { + int i; + /* Drain local pagecache count. */ count = &per_cpu(nr_pagecache_local, cpu); atomic_add(*count, &nr_pagecache); *count = 0; local_irq_disable(); __drain_pages(cpu); + + /* Add dead cpu's page_states to our own. */ + dest = (unsigned long *)&__get_cpu_var(page_states); + src = (unsigned long *)&per_cpu(page_states, cpu); + + for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long); + i++) { + dest[i] += src[i]; + src[i] = 0; + } + local_irq_enable(); } return NOTIFY_OK; |