From: Russell Miller A BUG or an oops will often leave a machine in a useless state. There is no way to remotely recover the machine from that state. The patch adds a /proc/sys/kernel/panic_on_oops sysctl which, when set, will cause the x86 kernel to call panic() at the end of the oops handler. If the user has also set /proc/sys/kernel/panic then a reboot will occur. The implementation will try to sleep for a while before panicing so the oops info has a chance of hitting the logs. The implementation is designed so that other architectures can easily do this in their oops handlers. Documentation/sysctl/kernel.txt | 12 ++++++++++++ arch/i386/kernel/traps.c | 9 +++++++++ include/linux/kernel.h | 1 + include/linux/sysctl.h | 1 + kernel/panic.c | 7 +++---- kernel/sysctl.c | 2 ++ 6 files changed, 28 insertions(+), 4 deletions(-) diff -puN arch/i386/kernel/traps.c~panic-on-oops arch/i386/kernel/traps.c --- 25/arch/i386/kernel/traps.c~panic-on-oops 2003-04-02 21:50:15.000000000 -0800 +++ 25-akpm/arch/i386/kernel/traps.c 2003-04-02 21:57:44.000000000 -0800 @@ -302,6 +302,15 @@ void die(const char * str, struct pt_reg show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(5 * HZ); + panic("Fatal exception"); + } do_exit(SIGSEGV); } diff -puN include/linux/kernel.h~panic-on-oops include/linux/kernel.h --- 25/include/linux/kernel.h~panic-on-oops 2003-04-02 21:50:15.000000000 -0800 +++ 25-akpm/include/linux/kernel.h 2003-04-02 21:50:15.000000000 -0800 @@ -104,6 +104,7 @@ static inline void console_verbose(void) extern void bust_spinlocks(int yes); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ +extern int panic_on_oops; extern int tainted; extern const char *print_tainted(void); diff -puN include/linux/sysctl.h~panic-on-oops include/linux/sysctl.h --- 25/include/linux/sysctl.h~panic-on-oops 2003-04-02 21:50:15.000000000 -0800 +++ 25-akpm/include/linux/sysctl.h 2003-04-02 21:50:15.000000000 -0800 @@ -130,6 +130,7 @@ enum KERN_CADPID=54, /* int: PID of the process to notify on CAD */ KERN_PIDMAX=55, /* int: PID # limit */ KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ + KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ }; diff -puN kernel/panic.c~panic-on-oops kernel/panic.c --- 25/kernel/panic.c~panic-on-oops 2003-04-02 21:50:15.000000000 -0800 +++ 25-akpm/kernel/panic.c 2003-04-02 21:50:15.000000000 -0800 @@ -20,6 +20,8 @@ asmlinkage void sys_sync(void); /* it's really int */ int panic_timeout; +int panic_on_oops; +int tainted; struct notifier_block *panic_notifier_list; @@ -28,7 +30,6 @@ static int __init panic_setup(char *str) panic_timeout = simple_strtoul(str, NULL, 0); return 1; } - __setup("panic=", panic_setup); /** @@ -51,7 +52,7 @@ NORET_TYPE void panic(const char * fmt, bust_spinlocks(1); va_start(args, fmt); - vsprintf(buf, fmt, args); + vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic: %s\n",buf); if (in_interrupt()) @@ -123,5 +124,3 @@ const char *print_tainted() snprintf(buf, sizeof(buf), "Not tainted"); return(buf); } - -int tainted = 0; diff -puN kernel/sysctl.c~panic-on-oops kernel/sysctl.c --- 25/kernel/sysctl.c~panic-on-oops 2003-04-02 21:50:15.000000000 -0800 +++ 25-akpm/kernel/sysctl.c 2003-04-02 21:50:15.000000000 -0800 @@ -275,6 +275,8 @@ static ctl_table kern_table[] = { #endif {KERN_PIDMAX, "pid_max", &pid_max, sizeof (int), 0600, NULL, &proc_dointvec}, + {KERN_PANIC_ON_OOPS,"panic_on_oops", + &panic_on_oops,sizeof(int),0644,NULL,&proc_dointvec}, {0} }; diff -puN Documentation/sysctl/kernel.txt~panic-on-oops Documentation/sysctl/kernel.txt --- 25/Documentation/sysctl/kernel.txt~panic-on-oops 2003-04-02 21:50:15.000000000 -0800 +++ 25-akpm/Documentation/sysctl/kernel.txt 2003-04-02 21:50:15.000000000 -0800 @@ -204,6 +204,18 @@ software watchdog, the recommended setti ============================================================== +panic_on_oops: + +Controls the kernel's behaviour when an oops or BUG is encountered. + +0: try to continue operation + +1: delay a few seconds (to give klogd time to record the oops output) and + then panic. If the `panic' sysctl is also non-zero then the machine will + be rebooted. + +============================================================== + pid_max: PID allocation wrap value. When the kenrel's next PID value _