From: Keith Mannthey The following is a patch to fix inconsistent use of the function set_ioapic_affinity. In the current kernel it is unclear as to weather the value being passed to the function is a cpu mask or valid apic id. In irq_affinity_write_proc the kernel passes on a cpu mask but the kirqd thread passes on logical apic ids. In flat apic mode this is not an issue because a cpu mask represents the apic value. However in clustered apic mode the cpu mask is very different from the logical apic id. This is an attempt to do the right thing for clustered apics. I clarify that the value being passed to set_ioapic_affinity is a cpu mask not a apicid. Set_ioapic_affinity will do the conversion to logical apic ids. Since many cpu masks don't map to valid apicids in clustered apic mode TARGET_CPUS is used as a default value when such a situation occurs. I think this is a good step in making irq_affinity clustered apic safe. 25-akpm/arch/i386/kernel/io_apic.c | 29 ++++++++--------- 25-akpm/include/asm-i386/mach-bigsmp/mach_apic.h | 37 +++++++++++++++++++++- 25-akpm/include/asm-i386/mach-default/mach_apic.h | 5 ++ 25-akpm/include/asm-i386/mach-numaq/mach_apic.h | 34 ++++++++++++++++++++ 25-akpm/include/asm-i386/mach-summit/mach_apic.h | 37 +++++++++++++++++++++- 25-akpm/include/asm-i386/mach-visws/mach_apic.h | 4 ++ 6 files changed, 129 insertions(+), 17 deletions(-) diff -puN arch/i386/kernel/io_apic.c~clustered-io_apic-fix arch/i386/kernel/io_apic.c --- 25/arch/i386/kernel/io_apic.c~clustered-io_apic-fix Mon May 5 17:07:25 2003 +++ 25-akpm/arch/i386/kernel/io_apic.c Mon May 5 17:09:40 2003 @@ -240,22 +240,22 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } -static void set_ioapic_affinity (unsigned int irq, unsigned long mask) +static void set_ioapic_affinity (unsigned int irq, unsigned long cpu_mask) { unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; - - /* - * Only the first 8 bits are valid. - */ - mask = mask << 24; + unsigned int apicid_value; + + apicid_value = cpu_mask_to_apicid(cpu_mask); + /* Prepare to do the io_apic_write */ + apicid_value = apicid_value << 24; spin_lock_irqsave(&ioapic_lock, flags); for (;;) { pin = entry->pin; if (pin == -1) break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, mask); + io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -279,7 +279,7 @@ static void set_ioapic_affinity (unsigne extern unsigned long irq_affinity[NR_IRQS]; -static int __cacheline_aligned pending_irq_balance_apicid[NR_IRQS]; +static int __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -356,7 +356,7 @@ static inline void balance_irq(int cpu, unsigned long flags; spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_apicid[irq]=cpu_to_logical_apicid(new_cpu); + pending_irq_balance_cpumask[irq] = 1 << new_cpu; spin_unlock_irqrestore(&desc->lock, flags); } } @@ -553,8 +553,7 @@ tryanotherirq: selected_irq, min_loaded); /* mark for change destination */ spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_apicid[selected_irq] = - cpu_to_logical_apicid(min_loaded); + pending_irq_balance_cpumask[selected_irq] = 1 << min_loaded; spin_unlock_irqrestore(&desc->lock, flags); /* Since we made a change, come back sooner to * check for more variation. @@ -586,7 +585,7 @@ int balanced_irq(void *unused) /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < NR_IRQS ; i++) - pending_irq_balance_apicid[i] = cpu_to_logical_apicid(0); + pending_irq_balance_cpumask[i] = 1; repeat: set_current_state(TASK_INTERRUPTIBLE); @@ -663,9 +662,9 @@ static void set_ioapic_affinity (unsigne static inline void move_irq(int irq) { /* note - we hold the desc->lock */ - if (unlikely(pending_irq_balance_apicid[irq])) { - set_ioapic_affinity(irq, pending_irq_balance_apicid[irq]); - pending_irq_balance_apicid[irq] = 0; + if (unlikely(pending_irq_balance_cpumask[irq])) { + set_ioapic_affinity(irq, pending_irq_balance_cpumask[irq]); + pending_irq_balance_cpumask[irq] = 0; } } diff -puN include/asm-i386/mach-bigsmp/mach_apic.h~clustered-io_apic-fix include/asm-i386/mach-bigsmp/mach_apic.h --- 25/include/asm-i386/mach-bigsmp/mach_apic.h~clustered-io_apic-fix Mon May 5 17:07:25 2003 +++ 25-akpm/include/asm-i386/mach-bigsmp/mach_apic.h Mon May 5 17:08:18 2003 @@ -34,10 +34,12 @@ static inline unsigned long check_apicid return 0; } static inline unsigned long check_apicid_present(int bit) -{ +{ return (phys_cpu_present_map & (1 << bit)); } +#define apicid_cluster(apicid) (apicid & 0xF0) + static inline unsigned long calculate_ldr(unsigned long old) { unsigned long id; @@ -134,4 +136,37 @@ static inline unsigned get_apic_id(unsig #define GET_APIC_ID(x) get_apic_id(x) +static inline unsigned int cpu_mask_to_apicid (unsigned long cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = hweight32(cpumask); + /* Return id to all */ + if (num_bits_set == 32) + return (int) 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = ffs(cpumask)-1; + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpumask & (1 << cpu)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n",__FUNCTION__); + return TARGET_CPUS; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + #endif /* __ASM_MACH_APIC_H */ diff -puN include/asm-i386/mach-default/mach_apic.h~clustered-io_apic-fix include/asm-i386/mach-default/mach_apic.h --- 25/include/asm-i386/mach-default/mach_apic.h~clustered-io_apic-fix Mon May 5 17:07:25 2003 +++ 25-akpm/include/asm-i386/mach-default/mach_apic.h Mon May 5 17:07:25 2003 @@ -120,4 +120,9 @@ static inline int apic_id_registered(voi &phys_cpu_present_map)); } +static inline unsigned int cpu_mask_to_apicid (unsigned long cpumask) +{ + return cpumask; +} + #endif /* __ASM_MACH_APIC_H */ diff -puN include/asm-i386/mach-numaq/mach_apic.h~clustered-io_apic-fix include/asm-i386/mach-numaq/mach_apic.h --- 25/include/asm-i386/mach-numaq/mach_apic.h~clustered-io_apic-fix Mon May 5 17:07:25 2003 +++ 25-akpm/include/asm-i386/mach-numaq/mach_apic.h Mon May 5 17:07:25 2003 @@ -17,6 +17,7 @@ #define APIC_BROADCAST_ID 0x0F #define check_apicid_used(bitmap, apicid) ((bitmap) & (1 << (apicid))) #define check_apicid_present(bit) (phys_cpu_present_map & (1 << bit)) +#define apicid_cluster(apicid) (apicid & 0xF0) static inline int apic_id_registered(void) { @@ -115,4 +116,37 @@ static inline unsigned get_apic_id(unsig #define GET_APIC_ID(x) get_apic_id(x) +static inline unsigned int cpu_mask_to_apicid (unsigned long cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = hweight32(cpumask); + /* Return id to all */ + if (num_bits_set == 32) + return (int) 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = ffs(cpumask)-1; + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpumask & (1 << cpu)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n",__FUNCTION__); + return TARGET_CPUS; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + #endif /* __ASM_MACH_APIC_H */ diff -puN include/asm-i386/mach-summit/mach_apic.h~clustered-io_apic-fix include/asm-i386/mach-summit/mach_apic.h --- 25/include/asm-i386/mach-summit/mach_apic.h~clustered-io_apic-fix Mon May 5 17:07:25 2003 +++ 25-akpm/include/asm-i386/mach-summit/mach_apic.h Mon May 5 17:08:49 2003 @@ -34,7 +34,7 @@ static inline unsigned long target_cpus( #define APIC_BROADCAST_ID (0x0F) static inline unsigned long check_apicid_used(unsigned long bitmap, int apicid) -{ +{ return (x86_summit ? 0 : (bitmap & (1 << apicid))); } @@ -44,6 +44,8 @@ static inline unsigned long check_apicid return (x86_summit ? 1 : (phys_cpu_present_map & (1 << bit))); } +#define apicid_cluster(apicid) (apicid & 0xF0) + extern u8 bios_cpu_apicid[]; static inline void init_apic_ldr(void) @@ -142,4 +144,37 @@ static inline unsigned get_apic_id(unsig #define GET_APIC_ID(x) get_apic_id(x) +static inline unsigned int cpu_mask_to_apicid (unsigned long cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = hweight32(cpumask); + /* Return id to all */ + if (num_bits_set == 32) + return (int) 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = ffs(cpumask)-1; + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpumask & (1 << cpu)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n",__FUNCTION__); + return TARGET_CPUS; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + #endif /* __ASM_MACH_APIC_H */ diff -puN include/asm-i386/mach-visws/mach_apic.h~clustered-io_apic-fix include/asm-i386/mach-visws/mach_apic.h --- 25/include/asm-i386/mach-visws/mach_apic.h~clustered-io_apic-fix Mon May 5 17:07:25 2003 +++ 25-akpm/include/asm-i386/mach-visws/mach_apic.h Mon May 5 17:07:25 2003 @@ -77,4 +77,8 @@ static inline int check_phys_apicid_pres return test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map); } +static inline unsigned int cpu_mask_to_apicid (unsigned long cpumask) +{ + return cpumask; +} #endif /* __ASM_MACH_APIC_H */ _