From: Ashok Raj When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj Acked-by: Zwane Mwaikambo Grudgingly-acked-by: Andi Kleen Signed-off-by: Coywolf Qi Hunt Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton --- arch/i386/Kconfig | 5 + arch/i386/kernel/io_apic.c | 55 ++++++++++--------- arch/ia64/Kconfig | 5 + arch/ia64/kernel/irq.c | 39 ------------- arch/x86_64/Kconfig | 5 + arch/x86_64/kernel/io_apic.c | 102 ++++++++++++++++++++++------------- drivers/pci/msi.c | 17 +---- drivers/pci/msi.h | 5 - include/asm-ia64/hw_irq.h | 7 -- include/asm-ia64/irq.h | 6 -- include/linux/irq.h | 123 +++++++++++++++++++++++++++++++++++++++++++ kernel/irq/manage.c | 4 + kernel/irq/proc.c | 14 ++++ 13 files changed, 253 insertions(+), 134 deletions(-) diff -puN arch/i386/Kconfig~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree arch/i386/Kconfig --- devel/arch/i386/Kconfig~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/arch/i386/Kconfig 2005-07-08 23:11:22.000000000 -0700 @@ -1322,6 +1322,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + config X86_SMP bool depends on SMP && !X86_VOYAGER diff -puN arch/i386/kernel/io_apic.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree arch/i386/kernel/io_apic.c --- devel/arch/i386/kernel/io_apic.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/arch/i386/kernel/io_apic.c 2005-07-08 23:11:22.000000000 -0700 @@ -33,6 +33,7 @@ #include #include #include + #include #include #include @@ -222,13 +223,21 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; + cpumask_t tmp; + cpus_and(tmp, cpumask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(cpumask, tmp, CPU_MASK_ALL); + apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; @@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsi break; entry = irq_2_pin + entry->next; } + set_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsi # define Dprintk(x...) # endif -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(irq, cpumask_of_cpu(new_cpu)); } } @@ -528,16 +532,12 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - irq_desc_t *desc = irq_desc + selected_irq; - unsigned long flags; Dprintk("irq = %d moved to cpu = %d\n", selected_irq, min_loaded); /* mark for change destination */ - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[selected_irq] = - cpumask_of_cpu(min_loaded); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); + /* Since we made a change, come back sooner to * check for more variation. */ @@ -568,7 +568,8 @@ static int balanced_irq(void *unused) /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < NR_IRQS ; i++) { - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); + pending_irq_cpumask[i] = cpumask_of_cpu(0); + set_pending_irq(i, cpumask_of_cpu(0)); } for ( ; ; ) { @@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str) __setup("noirqbalance", irqbalance_disable); -static inline void move_irq(int irq) -{ - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } -} - late_initcall(balanced_irq_init); - -#else /* !CONFIG_IRQBALANCE */ -static inline void move_irq(int irq) { } #endif /* CONFIG_IRQBALANCE */ +#endif /* CONFIG_SMP */ #ifndef CONFIG_SMP void fastcall send_IPI_self(int vector) @@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void) } } +#endif /* * EISA Edge/Level control register, ELCR @@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(vo spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsig { int irq = vector_to_irq(vector); + move_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (uns { int irq = vector_to_irq(vector); + move_irq(vector); end_level_ioapic_irq(irq); } @@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsig unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } #endif +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -2000,7 +1998,9 @@ static struct hw_interrupt_type ioapic_e .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static struct hw_interrupt_type ioapic_level_type = { @@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_l .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff -puN arch/ia64/Kconfig~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree arch/ia64/Kconfig --- devel/arch/ia64/Kconfig~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/arch/ia64/Kconfig 2005-07-08 23:11:22.000000000 -0700 @@ -440,6 +440,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + source "arch/ia64/hp/sim/Kconfig" source "arch/ia64/oprofile/Kconfig" diff -puN arch/ia64/kernel/irq.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree arch/ia64/kernel/irq.c --- devel/arch/ia64/kernel/irq.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/arch/ia64/kernel/irq.c 2005-07-08 23:11:22.000000000 -0700 @@ -91,23 +91,8 @@ skip: } #ifdef CONFIG_SMP -/* - * This is updated when the user sets irq affinity via /proc - */ -static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; -static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)]; - static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; -/* - * Arch specific routine for deferred write to iosapic rte to reprogram - * intr destination. - */ -void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) -{ - pending_irq_cpumask[irq] = mask_val; -} - void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { cpumask_t mask = CPU_MASK_NONE; @@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int if (irq < NR_IRQS) { irq_affinity[irq] = mask; + set_irq_info(irq, mask); irq_redir[irq] = (char) (redir & 0xff); } } - - -void move_irq(int irq) -{ - /* note - we hold desc->lock */ - cpumask_t tmp; - irq_desc_t *desc = irq_descp(irq); - int redir = test_bit(irq, pending_irq_redir); - - if (unlikely(!desc->handler->set_affinity)) - return; - - if (!cpus_empty(pending_irq_cpumask[irq])) { - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); - if (unlikely(!cpus_empty(tmp))) { - desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), - pending_irq_cpumask[irq]); - } - cpus_clear(pending_irq_cpumask[irq]); - } -} - - #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU diff -puN arch/x86_64/Kconfig~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree arch/x86_64/Kconfig --- devel/arch/x86_64/Kconfig~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/arch/x86_64/Kconfig 2005-07-08 23:11:22.000000000 -0700 @@ -444,6 +444,11 @@ config ISA_DMA_API bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + menu "Power management options" source kernel/power/Kconfig diff -puN arch/x86_64/kernel/io_apic.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree arch/x86_64/kernel/io_apic.c --- devel/arch/x86_64/kernel/io_apic.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/arch/x86_64/kernel/io_apic.c 2005-07-08 23:11:22.000000000 -0700 @@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR #define vector_to_irq(vector) (vector) #endif +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#ifdef CONFIG_SMP +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + set_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int entry->pin = pin; } -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} #define DO_ACTION(name,R,ACTION, FINAL) \ \ @@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(vo spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_ */ static void ack_edge_ioapic_irq(unsigned int irq) { + move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic static void end_level_ioapic_irq (unsigned int irq) { + move_irq(irq); ack_APIC_irq(); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - unsigned long flags; - unsigned int dest; - - dest = cpu_mask_to_apicid(mask); - - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsig { int irq = vector_to_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (uns { int irq = vector_to_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsig unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } -#endif +#endif // CONFIG_SMP +#endif // CONFIG_PCI_MSI /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_e .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static struct hw_interrupt_type ioapic_level_type = { @@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_l .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; @@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) } } +#endif diff -puN drivers/pci/msi.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree drivers/pci/msi.c --- devel/drivers/pci/msi.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/drivers/pci/msi.c 2005-07-08 23:11:22.000000000 -0700 @@ -91,6 +91,7 @@ static void set_msi_affinity(unsigned in { struct msi_desc *entry; struct msg_address address; + unsigned int irq = vector; entry = (struct msi_desc *)msi_desc[vector]; if (!entry || !entry->dev) @@ -112,6 +113,7 @@ static void set_msi_affinity(unsigned in entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), address.lo_address.value); + set_native_irq_info(irq, cpu_mask); break; } case PCI_CAP_ID_MSIX: @@ -125,22 +127,13 @@ static void set_msi_affinity(unsigned in MSI_TARGET_CPU_SHIFT); entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); writel(address.lo_address.value, entry->mask_base + offset); + set_native_irq_info(irq, cpu_mask); break; } default: break; } } - -#ifdef CONFIG_IRQBALANCE -static inline void move_msi(int vector) -{ - if (!cpus_empty(pending_irq_balance_cpumask[vector])) { - set_msi_affinity(vector, pending_irq_balance_cpumask[vector]); - cpus_clear(pending_irq_balance_cpumask[vector]); - } -} -#endif /* CONFIG_IRQBALANCE */ #endif /* CONFIG_SMP */ static void mask_MSI_irq(unsigned int vector) @@ -191,13 +184,13 @@ static void shutdown_msi_irq(unsigned in static void end_msi_irq_wo_maskbit(unsigned int vector) { - move_msi(vector); + move_native_irq(vector); ack_APIC_irq(); } static void end_msi_irq_w_maskbit(unsigned int vector) { - move_msi(vector); + move_native_irq(vector); unmask_MSI_irq(vector); ack_APIC_irq(); } diff -puN drivers/pci/msi.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree drivers/pci/msi.h --- devel/drivers/pci/msi.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/drivers/pci/msi.h 2005-07-08 23:11:22.000000000 -0700 @@ -19,7 +19,6 @@ #define NR_HP_RESERVED_VECTORS 20 extern int vector_irq[NR_VECTORS]; -extern cpumask_t pending_irq_balance_cpumask[NR_IRQS]; extern void (*interrupt[NR_IRQS])(void); extern int pci_vector_resources(int last, int nr_released); @@ -29,10 +28,6 @@ extern int pci_vector_resources(int last #define set_msi_irq_affinity NULL #endif -#ifndef CONFIG_IRQBALANCE -static inline void move_msi(int vector) {} -#endif - /* * MSI-X Address Register */ diff -puN include/asm-ia64/hw_irq.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree include/asm-ia64/hw_irq.h --- devel/include/asm-ia64/hw_irq.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/include/asm-ia64/hw_irq.h 2005-07-08 23:11:22.000000000 -0700 @@ -117,13 +117,6 @@ __ia64_local_vector_to_irq (ia64_vector * and to obtain the irq descriptor for a given irq number. */ -/* Return a pointer to the irq descriptor for IRQ. */ -static inline irq_desc_t * -irq_descp (int irq) -{ - return irq_desc + irq; -} - /* Extract the IA-64 vector that corresponds to IRQ. */ static inline ia64_vector irq_to_vector (int irq) diff -puN include/asm-ia64/irq.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree include/asm-ia64/irq.h --- devel/include/asm-ia64/irq.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/include/asm-ia64/irq.h 2005-07-08 23:11:22.000000000 -0700 @@ -30,12 +30,6 @@ extern void disable_irq_nosync (unsigned extern void enable_irq (unsigned int); extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); -#ifdef CONFIG_SMP -extern void move_irq(int irq); -#else -#define move_irq(irq) -#endif - struct irqaction; struct pt_regs; int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); diff -puN include/linux/irq.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree include/linux/irq.h --- devel/include/linux/irq.h~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/include/linux/irq.h 2005-07-08 23:11:22.000000000 -0700 @@ -71,16 +71,139 @@ typedef struct irq_desc { unsigned int irq_count; /* For detecting broken interrupts */ unsigned int irqs_unhandled; spinlock_t lock; +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) + unsigned int move_irq; /* Flag need to re-target intr dest*/ +#endif } ____cacheline_aligned irq_desc_t; extern irq_desc_t irq_desc [NR_IRQS]; +/* Return a pointer to the irq descriptor for IRQ. */ +static inline irq_desc_t * +irq_descp (int irq) +{ + return irq_desc + irq; +} + #include /* the arch dependent stuff */ extern int setup_irq(unsigned int irq, struct irqaction * new); #ifdef CONFIG_GENERIC_HARDIRQS extern cpumask_t irq_affinity[NR_IRQS]; + +#ifdef CONFIG_SMP +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ + irq_affinity[irq] = mask; +} +#else +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ +} +#endif + +#ifdef CONFIG_SMP + +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) +extern cpumask_t pending_irq_cpumask[NR_IRQS]; + +static inline void set_pending_irq(unsigned int irq, cpumask_t mask) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->move_irq = 1; + pending_irq_cpumask[irq] = mask; + spin_unlock_irqrestore(&desc->lock, flags); +} + +static inline void +move_native_irq(int irq) +{ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (likely (!desc->move_irq)) + return; + + desc->move_irq = 0; + + if (likely(cpus_empty(pending_irq_cpumask[irq]))) + return; + + if (!desc->handler->set_affinity) + return; + + /* note - we hold the desc->lock */ + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is comming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + */ + if (unlikely(!cpus_empty(tmp))) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq,tmp); + desc->handler->enable(irq); + } + cpus_clear(pending_irq_cpumask[irq]); +} + +#ifdef CONFIG_PCI_MSI +/* + * Wonder why these are dummies? + * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq() + * counter part after translating the vector to irq info. We need to perform + * this operation on the real irq, when we dont use vector, i.e when + * pci_use_vector() is false. + */ +static inline void move_irq(int irq) +{ +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ +} + +#else // CONFIG_PCI_MSI + +static inline void move_irq(int irq) +{ + move_native_irq(irq); +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} +#endif // CONFIG_PCI_MSI + +#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE + +#define move_irq(x) +#define move_native_irq(x) +#define set_pending_irq(x,y) +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} + +#endif // CONFIG_GENERIC_PENDING_IRQ + +#else // CONFIG_SMP + +#define move_irq(x) +#define move_native_irq(x) + +#endif // CONFIG_SMP + extern int no_irq_affinity; extern int noirqdebug_setup(char *str); diff -puN kernel/irq/manage.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree kernel/irq/manage.c --- devel/kernel/irq/manage.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/kernel/irq/manage.c 2005-07-08 23:11:22.000000000 -0700 @@ -18,6 +18,10 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) +cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; +#endif + /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * diff -puN kernel/irq/proc.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree kernel/irq/proc.c --- devel/kernel/irq/proc.c~x86-x86_64-deferred-handling-of-writes-to-proc-irq-xx-smp_affinitypatch-added-to-mm-tree 2005-07-08 23:11:22.000000000 -0700 +++ devel-akpm/kernel/irq/proc.c 2005-07-08 23:11:22.000000000 -0700 @@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_d */ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; -void __attribute__((weak)) -proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) +#ifdef CONFIG_GENERIC_PENDING_IRQ +void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) +{ + /* + * Save these away for later use. Re-progam when the + * interrupt is pending + */ + set_pending_irq(irq, mask_val); +} +#else +void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) { irq_affinity[irq] = mask_val; irq_desc[irq].handler->set_affinity(irq, mask_val); } +#endif static int irq_affinity_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) _