diff -urN linux-2.5.21/Documentation/preempt-logging.txt linux/Documentation/preempt-logging.txt --- linux-2.5.21/Documentation/preempt-logging.txt Wed Dec 31 16:00:00 1969 +++ linux/Documentation/preempt-logging.txt Thu Jun 13 15:32:07 2002 @@ -0,0 +1,27 @@ +Kernel preemption debugging tool +--------------------------------- + +CONFIG_PREEMPT_LOG adds a kernel preemption log, which can be very +useful in debugging unprotected critical regions. The log records the +instruction address where the preemption occurred, the PID of the +process that was preempted, and a timestamp. + +show_preempt_log() dumps the most recent 20 events, most recent first. +Printed for each event are the PID of the preempted process, the number +of milliseconds since the most recent preemption and the instruction +pointer address of where the process was preempted. show_preempt_log() +is added to the kernel Oops message, and can also be viewed with +SysRq-G. (The addresses are repeated in a format that can be fed to +ksymoops for decoding into symbols.) + +save_preempt_log() can be used to take a snapshot of the preemption log +after an "interesting" event has taken place. For example, I used this +to catch the preemption problem that turned out to be the page fault +trap handler being called with interrupts enabled, leading to corruption +of the fault address register if preemption occurred before it was +saved. I added a call to save_preemp_log() in fault.c at the point +where a process is about to be killed with a SEGFAULT. Sure enough, the +preemption log showed a preemption on the first instruction of the trap +handler whenever a process unexpectedly SEGVed. (This problem was fixed +in Linux subsequently, when it turned out to be a problem even without +preemption.) diff -urN linux-2.5.21/arch/i386/Config.help linux/arch/i386/Config.help --- linux-2.5.21/arch/i386/Config.help Sat Jun 8 22:27:21 2002 +++ linux/arch/i386/Config.help Thu Jun 13 15:32:07 2002 @@ -937,6 +937,16 @@ of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. +CONFIG_PREEMPT_LOG + Say Y here to include a log of kernel preemption events. This can + be very helpful when debugging problems caused by preemption taking + place in unprotected critical regions. + + See Documentation/preempt-logging.txt for more information. + + If you are debugging kernel preemption and think a log of preemption + events would be useful, say Y here. Otherwise say N. + CONFIG_DEBUG_OBSOLETE Say Y here if you want to reduce the chances of the tree compiling, and are prepared to dig into driver internals to fix compile errors. diff -urN linux-2.5.21/arch/i386/config.in linux/arch/i386/config.in --- linux-2.5.21/arch/i386/config.in Sat Jun 8 22:28:47 2002 +++ linux/arch/i386/config.in Thu Jun 13 15:32:07 2002 @@ -415,6 +415,7 @@ bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + dep_bool ' Preemption logging' CONFIG_PREEMPT_LOG $CONFIG_PREEMPT if [ "$CONFIG_HIGHMEM" = "y" ]; then bool ' Highmem debugging' CONFIG_DEBUG_HIGHMEM fi diff -urN linux-2.5.21/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- linux-2.5.21/arch/i386/kernel/entry.S Sat Jun 8 22:28:19 2002 +++ linux/arch/i386/kernel/entry.S Thu Jun 13 15:32:25 2002 @@ -238,6 +238,12 @@ jnz restore_all movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebx) sti +#ifdef CONFIG_PREEMPT_LOG + movl EIP(%esp),%ecx + pushl %ecx + call log_preemption + addl $4,%esp +#endif call schedule movl $0,TI_PRE_COUNT(%ebx) jmp restore_all diff -urN linux-2.5.21/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.5.21/arch/i386/kernel/traps.c Sat Jun 8 22:27:43 2002 +++ linux/arch/i386/kernel/traps.c Thu Jun 13 15:32:07 2002 @@ -239,6 +239,8 @@ } } printk("\n"); + + show_preempt_log(); } static void handle_BUG(struct pt_regs *regs) diff -urN linux-2.5.21/drivers/char/sysrq.c linux/drivers/char/sysrq.c --- linux-2.5.21/drivers/char/sysrq.c Sat Jun 8 22:28:44 2002 +++ linux/drivers/char/sysrq.c Thu Jun 13 15:32:07 2002 @@ -279,6 +279,17 @@ action_msg: "Show Memory", }; +static void sysrq_handle_preempt_log(int key, struct pt_regs *pt_regs, + struct kbd_struct *kbd, struct tty_struct *tty) { + show_preempt_log(); +} + +static struct sysrq_key_op sysrq_preempt_log_op = { + handler: sysrq_handle_preempt_log, + help_msg: "shoWpreemptlog", + action_msg: "Show Preempt Log", +}; + /* SHOW SYSRQ HANDLERS BLOCK */ @@ -385,6 +396,11 @@ /* t */ &sysrq_showstate_op, /* u */ &sysrq_mountro_op, /* v */ NULL, +#ifdef CONFIG_PREEMPT_LOG +/* w */ &sysrq_preempt_log_op, +#else +/* w */ NULL, +#endif /* w */ NULL, /* x */ NULL, /* w */ NULL, diff -urN linux-2.5.21/include/linux/sched.h linux/include/linux/sched.h --- linux-2.5.21/include/linux/sched.h Sat Jun 8 22:27:21 2002 +++ linux/include/linux/sched.h Thu Jun 13 15:32:45 2002 @@ -155,7 +155,6 @@ extern void migration_init(void); extern unsigned long cache_decay_ticks; - #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); diff -urN linux-2.5.21/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-2.5.21/include/linux/spinlock.h Sat Jun 8 22:30:01 2002 +++ linux/include/linux/spinlock.h Thu Jun 13 15:33:19 2002 @@ -159,7 +159,19 @@ #ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPT_LOG +asmlinkage void preempt_schedule(void *); +#define preempt_schedule_and_log(ip) preempt_schedule(ip) +extern void show_preempt_log(void); +extern void save_preempt_log(void); +extern void log_preemption(void *); +#else asmlinkage void preempt_schedule(void); +#define preempt_schedule_and_log(ip) preempt_schedule() +#define show_preempt_log() do { } while(0) +#define save_preempt_log() do { } while(0) +#define log_preemption(p) do { } while(0) +#endif #define preempt_get_count() (current_thread_info()->preempt_count) @@ -180,7 +192,7 @@ --current_thread_info()->preempt_count; \ barrier(); \ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ - preempt_schedule(); \ + preempt_schedule_and_log(current_text_addr()); \ } while (0) #define spin_lock(lock) \ diff -urN linux-2.5.21/kernel/sched.c linux/kernel/sched.c --- linux-2.5.21/kernel/sched.c Sat Jun 8 22:28:13 2002 +++ linux/kernel/sched.c Thu Jun 13 15:32:07 2002 @@ -858,17 +858,154 @@ * off of preempt_enable. Kernel preemptions off return from interrupt * occur there and call schedule directly. */ +#ifdef CONFIG_PREEMPT_LOG +asmlinkage void preempt_schedule(void *ip) +#else asmlinkage void preempt_schedule(void) +#endif { struct thread_info *ti = current_thread_info(); if (unlikely(ti->preempt_count)) return; + log_preemption(ip); + ti->preempt_count = PREEMPT_ACTIVE; schedule(); ti->preempt_count = 0; } + +#ifdef CONFIG_PREEMPT_LOG + +struct preempt_entry { + struct timeval tv; + void *ip; + pid_t pid; +}; + +#define PREEMPT_LOG_LEN 100 + +static struct preempt_entry preempt_log[PREEMPT_LOG_LEN] = {{{ 0, 0 }, 0, 0 }}; +static struct preempt_entry saved_preempt_log[PREEMPT_LOG_LEN] = + {{{ 0, 0 }, 0, 0 }}; + +static int pli = -1; +static int saved_pli = 0; +static int preempt_log_saved = 0; +static spinlock_t preempt_log_lock = SPIN_LOCK_UNLOCKED; + +void log_preemption(void *ip) +{ + unsigned long flags; + + if (!ip) + return; + + /* Avoid unwanted preemption on unlock */ + preempt_disable(); + + spin_lock_irqsave(&preempt_log_lock, flags); + + /* + * Compress the log by eliminating consecutive identical + * preempted addresses. This prevents the log from filling + * up with duplicate addresses from default_idle. + */ + if (ip == preempt_log[pli].ip) + goto out; + + if (++pli == PREEMPT_LOG_LEN) + pli = 0; + preempt_log[pli].pid = current->pid; + preempt_log[pli].ip = ip; + do_gettimeofday(&preempt_log[pli].tv); + +out: + spin_unlock_irqrestore(&preempt_log_lock, flags); + preempt_enable_no_resched(); +} + +void save_preempt_log() +{ + unsigned long flags; + + spin_lock_irqsave(&preempt_log_lock, flags); + memcpy(saved_preempt_log, preempt_log, sizeof(saved_preempt_log)); + saved_pli = pli; + spin_unlock_irqrestore(&preempt_log_lock, flags); + preempt_log_saved = 1; +} + +void show_preempt_log() +{ + int i; + struct timeval *t0; + struct timeval now; + long recent; + + if (!preempt_log_saved) + save_preempt_log(); + + do_gettimeofday(&now); + + t0 = &saved_preempt_log[saved_pli].tv; + + recent = ((now.tv_sec - t0->tv_sec) * 1000000 + + (now.tv_usec - t0->tv_usec)) / 1000; + printk("Most recently logged preemption event happened %ldms ago\n", + recent); + + printk(" PID TIME IP\n"); + + for (i = 0; i < 20; i++) { + int n = saved_pli - i; + long msecs; + struct timeval *t; + + if (n < 0) + n += PREEMPT_LOG_LEN; + + t = &saved_preempt_log[n].tv; + + msecs = ((t0->tv_sec - t->tv_sec) * 1000000 + + (t0->tv_usec - t->tv_usec)) / 1000; + + printk("%5d", saved_preempt_log[n].pid); + printk(" %6ld", msecs); +#if (BITS_PER_LONG == 32) + printk(" [<%08lx>]\n", + (unsigned long)(saved_preempt_log[n].ip)); +#else + printk(" [<%016lx>]\n", + (unsigned long)(saved_preempt_log[n].ip)); +#endif + } + + /* + * Repeat the addresses while pretending to be a trace, + * so that the existing version of ksymoops will helpfully + * decode them. + */ + printk("Trace:"); + for (i = 0; i < 20; i++) { + int n = saved_pli - i; + + if (n < 0) + n += PREEMPT_LOG_LEN; + +#if (BITS_PER_LONG == 32) + printk(" [<%08lx>]", (unsigned long)(saved_preempt_log[n].ip)); +#else + printk(" [<%016lx>]", (unsigned long)(saved_preempt_log[n].ip)); +#endif + } + printk("\n"); + + preempt_log_saved = 0; +} +#endif /* CONFIG_PREEMPT_LOG */ + #endif /* CONFIG_PREEMPT */ /*