Patch from Dave Hansen The new lost-tick detection code can be relatively uninformative when something using the common_interrupt->do_IRQ path keeps interrupts disabled for a long time. This patch will record the last interrupt on the current CPU, and spit it out along with the stack trace. I ignore the timer interrupt, because it is always running when this detection occurs, and would overwrite the previous interrupt that took a long time. You can now turn this on with a boot-time option with the same name as x86-64: "report_lost_ticks". I don't like the default of 5, so I let it take a plain report_lost_ticks, or report_lost_ticks=100, if you want. Warning! Detected 4094446 micro-second gap between interrupts. Compensating for 4093 lost ticks. Call Trace: [] handle_IRQ_event+0x28/0x50 [] do_IRQ+0xa0/0x10c [] common_interrupt+0x43/0x58 Last run common irq: 24: eth2 i386/kernel/irq.c | 9 +++++++++ i386/kernel/time.c | 48 +++++++++++++++++++++++++++++++++++++----------- asm-i386/irq.h | 3 +++ 3 files changed, 49 insertions(+), 11 deletions(-) diff -puN arch/i386/kernel/irq.c~report-lost-ticks arch/i386/kernel/irq.c --- 25-power4/arch/i386/kernel/irq.c~report-lost-ticks 2003-02-18 21:46:30.000000000 -0800 +++ 25-power4-akpm/arch/i386/kernel/irq.c 2003-02-18 21:46:30.000000000 -0800 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -68,6 +69,11 @@ irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; +/* + * Used to determine the culprits who disabled interrupts for a long time + */ +DEFINE_PER_CPU(int, last_do_IRQ_interrupt) = { -1 }; + static void register_irq_proc (unsigned int irq); /* @@ -329,6 +335,9 @@ asmlinkage unsigned int do_IRQ(struct pt struct irqaction * action; unsigned int status; + if (irq) /* don't count the timer */ + __get_cpu_var(last_do_IRQ_interrupt) = irq; + irq_enter(); #ifdef CONFIG_DEBUG_STACKOVERFLOW diff -puN arch/i386/kernel/time.c~report-lost-ticks arch/i386/kernel/time.c --- 25-power4/arch/i386/kernel/time.c~report-lost-ticks 2003-02-18 21:46:30.000000000 -0800 +++ 25-power4-akpm/arch/i386/kernel/time.c 2003-02-18 21:46:30.000000000 -0800 @@ -267,6 +267,24 @@ static inline void do_timer_interrupt(in #endif } +static int report_lost_ticks; +int __init report_lost_ticks_setup(char *str) +{ + char* numstr; + + if (str[0] == '=') { + numstr = &str[1]; + return get_option(&numstr,&report_lost_ticks) ? 1 : 0; + } else if (str[0] == '\0') { + report_lost_ticks = 5; + return 1; + } + + return 0; +} + +__setup("report_lost_ticks", report_lost_ticks_setup); + /* * Lost tick detection and compensation */ @@ -274,18 +292,15 @@ static inline void detect_lost_tick(void { /* read time since last interrupt */ unsigned long delta = timer->get_offset(); - static unsigned long dbg_print; /* check if delta is greater then two ticks */ - if(delta >= 2*(1000000/HZ)){ + if (unlikely((delta >= 2*(1000000/HZ)))) { + + if (report_lost_ticks > 0) { + struct irqaction *action; + int last_irq; - /* - * only print debug info first 5 times - */ - /* - * AKPM: disable this for now; it's nice, but irritating. - */ - if (0 && dbg_print < 5) { + last_irq = __get_cpu_var(last_do_IRQ_interrupt); printk(KERN_WARNING "\nWarning! Detected %lu " "micro-second gap between interrupts.\n", delta); @@ -293,13 +308,24 @@ static inline void detect_lost_tick(void "ticks.\n", delta/(1000000/HZ)-1); dump_stack(); - dbg_print++; + + if (last_irq >= 0 && + (action = irq_desc[last_irq].action)) { + printk(KERN_WARNING " Last run common " + "irq: %d: %s", + last_irq, action->name); + for (action = action->next; action; + action = action->next) + printk(", %s", action->name); + printk("\n"); + } + + report_lost_ticks--; } /* calculate number of missed ticks */ delta = delta/(1000000/HZ)-1; jiffies += delta; } - } /* diff -puN include/asm-i386/irq.h~report-lost-ticks include/asm-i386/irq.h --- 25-power4/include/asm-i386/irq.h~report-lost-ticks 2003-02-18 21:46:30.000000000 -0800 +++ 25-power4-akpm/include/asm-i386/irq.h 2003-02-18 21:46:30.000000000 -0800 @@ -12,6 +12,8 @@ #include #include +#include + /* include comes from machine specific directory */ #include "irq_vectors.h" @@ -24,6 +26,7 @@ extern void disable_irq(unsigned int); extern void disable_irq_nosync(unsigned int); extern void enable_irq(unsigned int); extern void release_x86_irqs(struct task_struct *); +DECLARE_PER_CPU(int, last_do_IRQ_interrupt); #ifdef CONFIG_X86_LOCAL_APIC #define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ _