aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@hp.com>2005-01-03 04:16:59 -0800
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-01-03 04:16:59 -0800
commitd841f01f2354d63fea302c7baf78b2d9eca96a96 (patch)
tree7756c8520354909ad867039b82776c732dafc0b0 /mm
parentd32d6f8ab2ef776261359b78127fd0251a041a90 (diff)
downloadhistory-d841f01f2354d63fea302c7baf78b2d9eca96a96.tar.gz
[PATCH] collect page_states only from online cpus
I noticed the function __read_page_state() curiously high in a q-tools profile of a write to a software raid0 device. Seems this is because we're checking page_states for all possible cpus and we have NR_CPUS possible when CONFIG_HOTPLUG_CPU=y. The default config for ia64 is now NR_CPUS=512, so on a little 8-way box, this is a significant waste of time. The patch below updates __read_page_state() and __get_page_state() to only count page_state info for online cpus. To keep the stats consistent, the page_alloc notifier is updated to move page_states off of the cpu going offline. On my profile, this dropped __read_page_state() back into the noise and boosted block write performance by 5% (as measured by spew - http://spew.berlios.de). Signed-off-by: Alex Williamson <alex.williamson@hp.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c33
1 files changed, 22 insertions, 11 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1f264ba47d3309..7c081bea888e4d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -938,18 +938,18 @@ void __get_page_state(struct page_state *ret, int nr)
int cpu = 0;
memset(ret, 0, sizeof(*ret));
+
+ cpu = first_cpu(cpu_online_map);
while (cpu < NR_CPUS) {
unsigned long *in, *out, off;
- if (!cpu_possible(cpu)) {
- cpu++;
- continue;
- }
-
in = (unsigned long *)&per_cpu(page_states, cpu);
- cpu++;
- if (cpu < NR_CPUS && cpu_possible(cpu))
+
+ cpu = next_cpu(cpu, cpu_online_map);
+
+ if (cpu < NR_CPUS)
prefetch(&per_cpu(page_states, cpu));
+
out = (unsigned long *)ret;
for (off = 0; off < nr; off++)
*out++ += *in++;
@@ -976,12 +976,9 @@ unsigned long __read_page_state(unsigned offset)
unsigned long ret = 0;
int cpu;
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ for_each_online_cpu(cpu) {
unsigned long in;
- if (!cpu_possible(cpu))
- continue;
-
in = (unsigned long)&per_cpu(page_states, cpu) + offset;
ret += *((unsigned long *)in);
}
@@ -1813,14 +1810,28 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
{
int cpu = (unsigned long)hcpu;
long *count;
+ unsigned long *src, *dest;
if (action == CPU_DEAD) {
+ int i;
+
/* Drain local pagecache count. */
count = &per_cpu(nr_pagecache_local, cpu);
atomic_add(*count, &nr_pagecache);
*count = 0;
local_irq_disable();
__drain_pages(cpu);
+
+ /* Add dead cpu's page_states to our own. */
+ dest = (unsigned long *)&__get_cpu_var(page_states);
+ src = (unsigned long *)&per_cpu(page_states, cpu);
+
+ for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
+ i++) {
+ dest[i] += src[i];
+ src[i] = 0;
+ }
+
local_irq_enable();
}
return NOTIFY_OK;