From: Christoph Lameter The definition of the irq_stat as an array means that the individual elements of the irq_stat array are located on one NUMA node requiring internode traffic to access irq_stat from other nodes. This patch makes irq_stat a per_cpu variable which allows most accesses to be local. Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton --- 25-akpm/arch/i386/kernel/apic.c | 2 +- 25-akpm/arch/i386/kernel/io_apic.c | 2 +- 25-akpm/arch/i386/kernel/irq.c | 5 ++++- 25-akpm/arch/i386/kernel/nmi.c | 4 ++-- 25-akpm/arch/i386/kernel/process.c | 2 +- 25-akpm/include/asm-i386/hardirq.h | 7 ++++++- 6 files changed, 15 insertions(+), 7 deletions(-) diff -puN arch/i386/kernel/apic.c~per-cpu-irq-stat arch/i386/kernel/apic.c --- 25/arch/i386/kernel/apic.c~per-cpu-irq-stat 2005-03-14 22:46:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/apic.c 2005-03-14 22:46:34.000000000 -0800 @@ -1165,7 +1165,7 @@ fastcall void smp_apic_timer_interrupt(s /* * the NMI deadlock-detector uses this. */ - irq_stat[cpu].apic_timer_irqs++; + per_cpu(irq_stat, cpu).apic_timer_irqs++; /* * NOTE! We'd better ACK the irq immediately, diff -puN arch/i386/kernel/io_apic.c~per-cpu-irq-stat arch/i386/kernel/io_apic.c --- 25/arch/i386/kernel/io_apic.c~per-cpu-irq-stat 2005-03-14 22:46:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/io_apic.c 2005-03-14 22:46:34.000000000 -0800 @@ -275,7 +275,7 @@ static struct irq_cpu_info { #define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) #define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) + (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) diff -puN arch/i386/kernel/irq.c~per-cpu-irq-stat arch/i386/kernel/irq.c --- 25/arch/i386/kernel/irq.c~per-cpu-irq-stat 2005-03-14 22:46:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/irq.c 2005-03-14 22:46:34.000000000 -0800 @@ -16,6 +16,9 @@ #include #include +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; +EXPORT_PER_CPU_SYMBOL(irq_stat); + #ifndef CONFIG_X86_LOCAL_APIC /* * 'what should we do if we get a hw irq event on an illegal vector'. @@ -246,7 +249,7 @@ skip: for (j = 0; j < NR_CPUS; j++) if (cpu_online(j)) seq_printf(p, "%10u ", - irq_stat[j].apic_timer_irqs); + per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); diff -puN arch/i386/kernel/nmi.c~per-cpu-irq-stat arch/i386/kernel/nmi.c --- 25/arch/i386/kernel/nmi.c~per-cpu-irq-stat 2005-03-14 22:46:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/nmi.c 2005-03-14 22:46:34.000000000 -0800 @@ -110,7 +110,7 @@ int __init check_nmi_watchdog (void) printk(KERN_INFO "testing NMI watchdog ... "); for (cpu = 0; cpu < NR_CPUS; cpu++) - prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count; + prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks @@ -488,7 +488,7 @@ void nmi_watchdog_tick (struct pt_regs * */ int sum, cpu = smp_processor_id(); - sum = irq_stat[cpu].apic_timer_irqs; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs; if (last_irq_sums[cpu] == sum) { /* diff -puN arch/i386/kernel/process.c~per-cpu-irq-stat arch/i386/kernel/process.c --- 25/arch/i386/kernel/process.c~per-cpu-irq-stat 2005-03-14 22:46:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/process.c 2005-03-14 22:46:34.000000000 -0800 @@ -162,7 +162,7 @@ void cpu_idle (void) if (!idle) idle = default_idle; - irq_stat[cpu].idle_timestamp = jiffies; + __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } schedule(); diff -puN include/asm-i386/hardirq.h~per-cpu-irq-stat include/asm-i386/hardirq.h --- 25/include/asm-i386/hardirq.h~per-cpu-irq-stat 2005-03-14 22:46:34.000000000 -0800 +++ 25-akpm/include/asm-i386/hardirq.h 2005-03-14 22:46:34.000000000 -0800 @@ -12,8 +12,13 @@ typedef struct { unsigned int apic_timer_irqs; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; -#include /* Standard mappings for irq_cpustat_t above */ +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +extern irq_cpustat_t irq_stat[]; + +#define __ARCH_IRQ_STAT +#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) void ack_bad_irq(unsigned int irq); +#include #endif /* __ASM_HARDIRQ_H */ _