diff -urNp --exclude CVS --exclude BitKeeper x-ref/Documentation/Configure.help x/Documentation/Configure.help --- x-ref/Documentation/Configure.help 2003-06-07 16:06:27.000000000 +0200 +++ x/Documentation/Configure.help 2003-06-07 16:06:30.000000000 +0200 @@ -21104,6 +21104,132 @@ CONFIG_KMSGDUMP_SAFE never made because we consider that the user knows what stands on his diskettes. If unsure, just say "Y". +Kernel debugging support +CONFIG_KERNEL_DEBUGGING + Shows low level kernel tracing, debugging and general hacking tools. + Mere mortals say N. + +Debug kernel stack overflows +CONFIG_DEBUG_KSTACK + If you see "kernel stack corruption. Aiee" messages, and a kernel + hacker told you to 'switch on kernel stack debugging', then this + is the right option =B-) + Do 'make clean' after changing this option! + For normal systems, this option adds noticeable overhead, so say N. + +Kernel Stack Meter +CONFIG_KSTACK_METER + With this option set, the kernel will log the minimum stack left + following boot of the machine, and the function that was executing + when the machine reached that value. This is useful for determining + how much stack the kernel needs. It also allow us to detect if there + is some piece of code that could be optimized to run without eating + a lot of stack. To see the current values: + `cat /proc/sys/debug/kstack-meter' + To reinitialize the counter to default: + `echo -1 0 >/proc/sys/debug/kstack-meter' + The first integer is the minimum stack size left. The second is the + function that was running when that condition was reached. + For normal systems, this option adds noticeable overhead, so say N. + + With this option enabled the IA32 NMI watchdog will be disabled. + +Kernel stack overflow threshold +CONFIG_KSTACK_THRESHOLD + If the stack has less bytes than this left, assume you are headed for an + overflow. + +Detect software lockups +CONFIG_DEBUG_SOFTLOCKUP + If you see strange lockups and a kernel hacker told you to 'switch + on software lockup detection', then this is the right option =B-) + Do 'make clean' after changing this option! + For normal systems, this option adds noticeable overhead, so say N. + +Deadlock threshold +CONFIG_SOFTLOCKUP_THRESHOLD + The number of procedure calls a process can make without going + through schedule. Any process that does more calls than this number + is "looping". Alas it does not catch inline procedure calls. + +Enable kernel tracer +CONFIG_TRACE + For kernel hackers who want to know where the path of execution goes + and how much time the kernel spends in the various procedures. The + trace is stored in /proc/trace (say Y to "/proc filesystem support"!) + and in order to read it, you need the ktrace program, see + scripts/ktrace. For normal systems, this option adds noticeable + overhead, so say N. + + With this option enabled the IA32 NMI watchdog will be disabled. + +Size of trace buffer +CONFIG_TRACE_SIZE + The number of trace entries to store in the kernel. + +Trace timestamp +CONFIG_TRACE_TIMESTAMP + Attempts to store an accurate timestamp against each trace entry, + scripts/ktrace will calculate the interval between successive + entries. On processors where an accurate timestamp is not available, + the jiffie counter is used instead. Jiffies are almost useless + because most procedure calls run in less than one jiffie but it is + better than nothing. Recommended if you want procedure times and your + cpu supports an accurate timestamp, however it adds 64 or 32 bits to + each trace entry. + +Truncated trace timestamp +CONFIG_TRACE_TRUNCTIME + If the full timestamp field is taking up too much room (64 bits per + entry on x86) and you are willing to risk wraparound of the + timestamp, say Y here. Only the last 32 bits of the timestamp will + be stored. Unless you are *really* short on storage, say N. + +Process ID for trace +CONFIG_TRACE_PID + If you want to know which process a trace table entry is for, say Y + here. Recommended but adds sizeof(pid_t) to each trace table entry. + +Cpu ID for tracer +CONFIG_TRACE_CPU + If you want to know which cpu a trace table entry is for, say Y here. + Only effective on SMP systems. Recommended but it adds sizeof(int) + to each trace table entry. + +Emergency trace length +CONFIG_ETRACE_LENGTH + This option controls the number of trace table entries printed in the + event of a kernel Oops. The default value (30) is usually enough. + +GCC profiling support +CONFIG_PROFILE_GCC + This option improves the kernel profiling by using the gcc profiling feature. + With this option enabled the kernel will use gcc profiling, not once + each timer interrupt. This option enabled will add a lot of overhead to + the kernel. If you want to run this kernel for production and you want + profiling, it's recommended that you use normal profiling and that you + say N here. + +Print %eip to resolve symbols from locks +CONFIG_PRINT_EIP + This allows the kernel to print on the console the %eip address every + time a kernel function is called. This facilitates the resolution + of addresses after a complete machine lockup. A system with this + enabled should only be run in console mode, not X. The %eip addresses + are only displayed on virtual consoles (/dev/tty0...), and not + on serial consoles. This displays a column for each CPU and a one-up + counter for each CPU. If your system locks up while this feature is + enabled, for each CPU record the first column (the address) sorted + by the second column (the one-up counter). ksymoops or a manual + trace of Symbols.map may then be used to determine the lockup. + For normal systems, this option adds noticeable overhead, so say N. + Say Y here if a kernel hacker tell you to do that. + +Get Free Pages poisoner +CONFIG_GFP_POISON + Enable this option to make memory corruption at the GFP layer a bit + more visible. + ISDN support CONFIG_ISDN ISDN ("Integrated Services Digital Networks", called RNIS in France) diff -urNp --exclude CVS --exclude BitKeeper x-ref/Documentation/debugging.txt x/Documentation/debugging.txt --- x-ref/Documentation/debugging.txt 1970-01-01 01:00:00.000000000 +0100 +++ x/Documentation/debugging.txt 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,80 @@ +Debugging the kernel for fun and profit. + +Assorted tools of varying usefulness exist to debug the kernel. By far +the best debugging tool is the human brain. As Linus has said :- + + ... + I'm afraid that I've seen too many people fix bugs + by looking at debugger output, and that almost + inevitably leads to fixing the symptoms rather than + the underlying problems. + ... + "Use the Source, Luke, use the Source. Be one with + the code.". Think of Luke Skywalker discarding the + automatic firing system when closing on the deathstar, + and firing the proton torpedo (or whatever) manually. + _Then_ do you have the right mindset for fixing kernel + bugs. + ... + +Having said that, sometimes reading the source is not enough. The +following tools exist in the IKD patch :- + + Debug kernel stack overflows + Detect software lockups + Kernel tracer (show logic flow through procedures) + + Written by Ingo Molnar . Currently + maintained by Mike Galbraith . + + Print-EIP on video ram + + Improved by Andrea Arcangeli. + + Kernel stack meter + Kernel real profiling + Semaphore deadlock detector + + Developed by Andrea Arcangeli. + + kdb + Written by Scott Lurndal (SGI) + Integration into IKD by Andrea Arcangeli (v1.0) and Keith Owens + (v1.1 with kallsyms). + + free_pages poisoner + Written by Andrea Arcangeli + + slab posioner made a config option + Done by Andrea Arcangeli + + CONFIG_KALLSYMS added, makes all non-stack kernel symbols available + to debuggers. Needs `kallsyms` from modutils >= 2.3.11. + Written by Keith Owens + + lockmeter + Written by John Hawkes (SGI) + +COMPILER NOTE: all the features that needs the profiling stuff + (like the kernel tracer) needs a recent compiler + (gcc-2.7.2.3 doesn't work anymore with them). + I think the problem is that old good gcc doesn't like + the init sections. The suggested compiler at 19991219 + is egcs-2.91.66. + +The original merge of debugging tools into a single patch set (IKD) +is been done by Keith Owens . +PGP 2.6 917/C817FEC9. +Fingerprint 2B 25 0A 31 02 AE CA F7 73 0C 28 69 4A 7B 65 27 +PGP 5/GPG +pub 1024D/27B464EA 1998-05-27 Keith Owens + Key fingerprint = A8AD 7F99 34E6 546C 8D00 5375 8B85 0737 27B4 64EA +uid Keith Owens +sub 2048g/44ABB66C 1998-05-27 + +Currently the IKD patch is maintained by Andrea Arcangeli +and Mike Galbraith and is dowloadable at: + + ftp://ftp.*.kernel.org/pub/linux/kernel/people/andrea/ikd/ + +Have fun with it. diff -urNp --exclude CVS --exclude BitKeeper x-ref/Documentation/ktrace.txt x/Documentation/ktrace.txt --- x-ref/Documentation/ktrace.txt 1970-01-01 01:00:00.000000000 +0100 +++ x/Documentation/ktrace.txt 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,88 @@ +ktrace - Trace logic flow through the kernel with time stamps. + + +******* Please read debugging.txt first. ******* + + +LIMITATION: nanosecond accuracy timings on x86 CPUs works only if the + CPU has the rtdsc instruction. If you have another x86 + CPU, undef the HAVE_RTDSC define in include/asm/profiler.h. + See the 'tsc' flag in the /proc/cpuinfo flags field if + unsure. + + Alpha CPU support is not yet tested. + Intel SMP is tested + + +INSTALLATION + +If you are reading this, you have probably already applied the patch to +your kernel, now set the options and rebuild. Under Kernel Hacking, +say Y to Kernel debugging support then Y to Enable kernel tracing. +Make dep clean, recompile, install the new kernel and modules, reboot. + +Expect the new kernel to be somewhat slower than the unpatched kernel. +Check out /proc/trace, if it exists then you can go on to to the +user-space part: + +In /usr/src/linux, make debug. To get the current trace on a 166 MHz +CPU: + +scripts/ktrace --speed 166 --map /usr/src/linux/System.map > output.txt + +you should get something like this in output.txt: + +MHZ: 166. +read 4420 lines from System.map. +calibration done, estimated measurement latency: 0.34 microseconds. + +c01299ca put_unused_buffer_head + (0.90) +c011232b wake_up +<13/f0> (1.48) +c0129a26 get_more_buffer_heads + (0.61) +c012880f get_hash_table +<13/c0> (1.34) +c01296ca __brelse + (97.15) +c0129345 set_writetime + (0.11) +c0129398 refile_buffer +<10/334> (0.36) +[...] + +By default, all of the kernel except for init_task and the profiler +is traced. This can lead to a very busy trace file, full of +low level routines. To turn off tracing for a directory and all its +subdirectories, add the line + + override CFLAGS := $(CFLAGS:%-pg=%-g -c) + +to the relevant Makefile, before Rules.make. Delete the *.o files you +want to recompile and make zImage/modules. + +ktrace can get an exclusive lock on /proc/trace before reading it. +This allows ktrace to be suspended until an event occurs. For example, + +* User written program gets exclusive lock on /proc/trace, waits for + event to occur. + +* After starting above program, user runs ktrace with -l or --lock + options which suspends on the lock. + +* User written program detects the desired event, releases the lock. + +* ktrace runs, the resulting trace is as close to the event as + scheduling will allow. + +Sometimes you cannot read /proc/trace directly, typically because the +system is dead and ktrace cannot be run. If it is still responding to +the Magic-SysRQ key (you did select that option didn't you?) then +SysRQ-g dumps syslog and /proc/trace to all consoles, the latter is in +hex. Capture the output via a serial console on another machine +(another useful debugging option). + +After your dead machine has been restarted, take the captured hex dump +of /proc/trace and feed it to ktrace with the option "-d filename" or +"--dump filename". The lock option is ignored when reading a dumped +ktrace. + +Have fun, mail mingo@pc5829.hil.siemens.at if problems. + +Updated by: Mike Galbraith mikeg@weiden.de + +map option, dump option and kernel integration by Keith Owens . diff -urNp --exclude CVS --exclude BitKeeper x-ref/Makefile x/Makefile --- x-ref/Makefile 2003-06-07 16:06:28.000000000 +0200 +++ x/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -35,7 +35,12 @@ CROSS_COMPILE = AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc +ifndef KERNEL_CC + CC =$(CROSS_COMPILE)gcc +else + CC =$(CROSS_COMPILE)$(KERNEL_CC) +endif + CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm @@ -45,13 +50,16 @@ OBJDUMP = $(CROSS_COMPILE)objdump MAKEFILES = $(TOPDIR)/.config GENKSYMS = /sbin/genksyms DEPMOD = /sbin/depmod +KALLSYMS = /sbin/kallsyms MODFLAGS = -DMODULE CFLAGS_KERNEL = PERL = perl +AWK = awk +TMPPREFIX = export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ - CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK all: do-it-all @@ -100,6 +108,13 @@ CFLAGS := $(CPPFLAGS) -Wall -Wstrict-pro -fno-strict-aliasing -fno-common ifndef CONFIG_FRAME_POINTER CFLAGS += -fomit-frame-pointer +else +ifeq ($(CONFIG_KERNEL_DEBUGGING), y) +CFLAGS += -fomit-frame-pointer +endif +endif +ifeq ($(CONFIG_DEBUG_MCOUNT),y) +CFLAGS += -pg endif ifeq ($(CONFIG_X86_REMOTE_DEBUG),y) ifeq ($(CONFIG_X86_64),y) @@ -143,6 +158,11 @@ NETWORKS =net/network.o LIBS =$(TOPDIR)/lib/lib.a SUBDIRS =kernel drivers mm fs net ipc lib +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) + SUBDIRS += kernel/debug + CORE_FILES += kernel/debug/debug.o +endif + DRIVERS-n := DRIVERS-y := DRIVERS-m := @@ -215,7 +235,7 @@ DRIVERS := $(DRIVERS-y) CLEAN_FILES = \ kernel/ksyms.lst include/linux/compile.h \ vmlinux System.map \ - .tmp* \ + $(TMPPREFIX).tmp* \ drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ drivers/char/conmakehash \ drivers/char/drm/*-mod.c \ @@ -331,6 +351,11 @@ include/config/MARKER: scripts/split-inc scripts/split-include include/linux/autoconf.h include/config @ touch include/config/MARKER +debug: include/linux/version.h + $(MAKE) -C scripts ktrace + $(MAKE) -C scripts/memleak all + $(MAKE) -C scripts/lockstat all + linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) $(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER @@ -507,7 +532,7 @@ sums: dep-files: scripts/mkdep archdep include/linux/version.h rm -f .depend .hdepend - $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" + $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS) scripts) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS) scripts" ifdef CONFIG_MODVERSIONS $(MAKE) update-modverfile endif diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/alpha/config.in x/arch/alpha/config.in --- x-ref/arch/alpha/config.in 2003-06-07 16:06:15.000000000 +0200 +++ x/arch/alpha/config.in 2003-06-07 16:06:30.000000000 +0200 @@ -441,6 +441,8 @@ else define_tristate CONFIG_MATHEMU y fi +source kernel/debug/Config.in + endmenu source lib/Config.in diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/alpha/kernel/entry.S x/arch/alpha/kernel/entry.S --- x-ref/arch/alpha/kernel/entry.S 2003-06-07 16:06:27.000000000 +0200 +++ x/arch/alpha/kernel/entry.S 2003-06-07 16:06:30.000000000 +0200 @@ -119,6 +119,17 @@ ldq $28,144($30); \ addq $30,184,$30 +/* + * Conditionally do profiling + */ +#ifdef CONFIG_TRACE +#define CALL_MCOUNT \ + lda $28,_mcount; \ + jsr $28,($28),_mcount +#else +#define CALL_MCOUNT +#endif + .text .set noat #if defined(__linux__) && !defined(__ELF__) @@ -141,6 +152,8 @@ entInt: .ent entMM entMM: SAVE_ALL + ldq $8,current_set + CALL_MCOUNT /* save $9 - $15 so the inline exception code can manipulate them. */ subq $30,56,$30 stq $9,0($30) @@ -391,6 +404,11 @@ undo_switch_stack: .ent entUna entUna: lda $30,-256($30) +#ifdef CONFIG_TRACE + stq $8,64($30) + ldq $8,current_set +#endif + CALL_MCOUNT stq $0,0($30) ldq $0,256($30) /* get PS */ stq $1,8($30) @@ -402,6 +420,10 @@ entUna: stq $5,40($30) stq $6,48($30) stq $7,56($30) +#ifndef CONFIG_TRACE + stq $8,64($30) + ldq $8,current_set +#endif stq $8,64($30) stq $9,72($30) stq $10,80($30) @@ -462,6 +484,9 @@ entUna: .ent entUnaUser entUnaUser: ldq $0,0($30) /* restore original $0 */ +#ifdef CONFIG_TRACE + ldq $8,64($30) +#endif lda $30,256($30) /* pop entUna's stack frame */ SAVE_ALL /* setup normal kernel stack */ lda $30,-56($30) @@ -588,6 +613,7 @@ ret_from_reschedule: beq $4,restore_all bne $5,signal_return restore_all: + CALL_MCOUNT RESTORE_ALL call_pal PAL_rti diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/alpha/lib/Makefile x/arch/alpha/lib/Makefile --- x-ref/arch/alpha/lib/Makefile 2003-03-15 03:24:53.000000000 +0100 +++ x/arch/alpha/lib/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -53,6 +53,10 @@ ifeq ($(CONFIG_SMP),y) OBJS += dec_and_lock.o endif +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) + OBJS += _mcount.o +endif + lib.a: $(OBJS) $(AR) rcs lib.a $(OBJS) diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/alpha/mm/fault.c x/arch/alpha/mm/fault.c --- x-ref/arch/alpha/mm/fault.c 2003-06-07 16:06:15.000000000 +0200 +++ x/arch/alpha/mm/fault.c 2003-06-07 16:06:30.000000000 +0200 @@ -189,6 +189,8 @@ no_context: return; } + /* recursion is the curse of the programming classes */ + SUSPEND_MCOUNT_PROC(current); /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/boot/Makefile x/arch/i386/boot/Makefile --- x-ref/arch/i386/boot/Makefile 2003-03-15 03:24:54.000000000 +0100 +++ x/arch/i386/boot/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -12,6 +12,8 @@ BOOT_INCL = $(TOPDIR)/include/linux/conf $(TOPDIR)/include/linux/autoconf.h \ $(TOPDIR)/include/asm/boot.h +override CFLAGS := $(CFLAGS:%-pg=% ) + zImage: $(CONFIGURE) bootsect setup compressed/vmlinux tools/build $(OBJCOPY) compressed/vmlinux compressed/vmlinux.out tools/build bootsect setup compressed/vmlinux.out $(ROOT_DEV) > zImage diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/config.in x/arch/i386/config.in --- x-ref/arch/i386/config.in 2003-06-07 16:06:27.000000000 +0200 +++ x/arch/i386/config.in 2003-06-07 16:06:30.000000000 +0200 @@ -542,6 +542,13 @@ if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; th bool ' KGDB: Console messages through gdb' CONFIG_GDB_CONSOLE bool ' KGDB: Enable kernel asserts' CONFIG_KERNEL_ASSERTS fi + + source kernel/debug/Config.in + + # arch specific debugging options + if [ "$CONFIG_KERNEL_DEBUGGING" = "y" ]; then + bool ' Print %eip to resolve symbols from locks' CONFIG_PRINT_EIP n + fi fi endmenu diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/Makefile x/arch/i386/kernel/Makefile --- x-ref/arch/i386/kernel/Makefile 2003-06-07 16:06:27.000000000 +0200 +++ x/arch/i386/kernel/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -64,4 +64,10 @@ gdbstart: gdbstart.c cleankernel: dummy -rm -f gdbstart +# Not safe to have tracing turned on in the init_task. That way lies deadlock. +ifeq ($(CONFIG_DEBUG_MCOUNT),y) +init_task.o: init_task.c $(TOPDIR)/include/linux/sched.h + $(CC) $(CFLAGS:%-pg=%-g -c) $(EXTRA_CFLAGS) -c -o $@ $< +endif + include $(TOPDIR)/Rules.make diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/entry.S x/arch/i386/kernel/entry.S --- x-ref/arch/i386/kernel/entry.S 2003-06-07 16:06:28.000000000 +0200 +++ x/arch/i386/kernel/entry.S 2003-06-07 16:06:30.000000000 +0200 @@ -203,6 +203,13 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_CURRENT(%ebx) +#ifdef CONFIG_DEBUG_MCOUNT + pushl %eax + pushl %ebx + call SYMBOL_NAME(mcount) + popl %ebx + popl %eax +#endif testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS jne tracesys cmpl $(NR_syscalls),%eax @@ -210,6 +217,11 @@ ENTRY(system_call) call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value ENTRY(ret_from_sys_call) +#ifdef CONFIG_DEBUG_MCOUNT + pushl %eax + call SYMBOL_NAME(mcount) + popl %eax +#endif cli # need_resched and signals atomic test cmpl $0,need_resched(%ebx) jne reschedule @@ -224,16 +236,35 @@ signal_return: testl $(VM_MASK),EFLAGS(%esp) movl %esp,%eax jne v86_signal_return +#ifndef CONFIG_KERNEL_DEBUGGING xorl %edx,%edx +#else + pushl $0 + pushl %eax +#endif call SYMBOL_NAME(do_signal) +#ifdef CONFIG_KERNEL_DEBUGGING + addl $8,%esp +#endif jmp restore_all ALIGN v86_signal_return: +#ifdef CONFIG_KERNEL_DEBUGGING + pushl %eax +#endif call SYMBOL_NAME(save_v86_state) movl %eax,%esp +#ifndef CONFIG_KERNEL_DEBUGGING xorl %edx,%edx +#else + pushl $0 + pushl %eax +#endif call SYMBOL_NAME(do_signal) +#ifdef CONFIG_KERNEL_DEBUGGING + addl $8,%esp +#endif jmp restore_all ALIGN diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/irq.c x/arch/i386/kernel/irq.c --- x-ref/arch/i386/kernel/irq.c 2003-06-07 16:06:19.000000000 +0200 +++ x/arch/i386/kernel/irq.c 2003-06-07 16:06:30.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -655,6 +656,8 @@ out: if (softirq_pending(cpu)) do_softirq(); + + MCOUNT(); return 1; } @@ -1013,6 +1016,8 @@ int setup_irq(unsigned int irq, struct i rand_initialize_irq(irq); } + MCOUNT(); + /* * The following block of code has to be executed atomically */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/process.c x/arch/i386/kernel/process.c --- x-ref/arch/i386/kernel/process.c 2003-06-07 16:06:27.000000000 +0200 +++ x/arch/i386/kernel/process.c 2003-06-07 16:06:30.000000000 +0200 @@ -953,7 +953,7 @@ void dump_thread(struct pt_regs * regs, * More important, however, is the fact that this allows us much * more flexibility. */ -void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +void STDCALL(__switch_to(struct task_struct *prev_p, struct task_struct *next_p)) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/semaphore.c x/arch/i386/kernel/semaphore.c --- x-ref/arch/i386/kernel/semaphore.c 2003-06-07 16:06:25.000000000 +0200 +++ x/arch/i386/kernel/semaphore.c 2003-06-07 16:06:30.000000000 +0200 @@ -311,6 +311,10 @@ asm( ".align 4\n" ".globl __wtd_down_failed\n" "__wtd_down_failed:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif "pushl %eax\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" @@ -318,6 +322,10 @@ asm( "popl %ecx\n\t" "popl %edx\n\t" "popl %eax\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif "ret" ); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/time.c x/arch/i386/kernel/time.c --- x-ref/arch/i386/kernel/time.c 2003-06-07 16:06:28.000000000 +0200 +++ x/arch/i386/kernel/time.c 2003-06-07 16:06:30.000000000 +0200 @@ -600,8 +600,10 @@ static inline void do_timer_interrupt(in * system, in that case we have to call the local interrupt handler. */ #ifndef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_PROFILE_GCC if (!user_mode(regs)) x86_do_profile(regs->eip); +#endif #else if (!using_apic_timer) smp_local_timer_interrupt(regs); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/traps.c x/arch/i386/kernel/traps.c --- x-ref/arch/i386/kernel/traps.c 2003-06-07 16:06:27.000000000 +0200 +++ x/arch/i386/kernel/traps.c 2003-06-07 16:06:30.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include /* mcount debugger */ #ifdef CONFIG_MCA #include @@ -150,6 +151,53 @@ static inline int kernel_text_address(un #endif +#ifdef CONFIG_KERNEL_DEBUGGING +inline void print_call_trace_exact (struct pt_regs * regs, unsigned long esp) +{ + int i=1; + unsigned long *this_stack, *prev_stack, prev_addr, *prev_bp, framesize; + + printk("\nCall Trace: "); + + /* + * the stack layout: /----- *this_stack + * V + * [this_frame][prev_bp][prev_addr][prev_frame][...] + */ + + this_stack = (unsigned long *) regs->ebp; + framesize=0; + + while ((unsigned long) this_stack >= (esp & ~0x1fffUL) && + (unsigned long) (this_stack+1) < + (esp & ~0x1fffUL)+0x2000UL) + { + prev_addr = *(this_stack+1); + + if (!(i++ % 8)) + printk("\n "); + /* ksymoops expects [] */ + printk("[<%08lx>] (%lu) ", prev_addr, framesize); + + prev_bp = (unsigned long *)(*this_stack); + prev_stack = this_stack; + this_stack = prev_bp; + + if (i > 100) + { + printk("WARNING: something fishy with the stack frame?\n"); + printk("this_stack: [<%08lx>]\n", + (unsigned long)this_stack); + break; + } + framesize = (unsigned long)this_stack-(unsigned long)prev_stack; + } +#ifdef CONFIG_TRACE + print_emergency_trace(); +#endif +} +#endif /* CONFIG_KERNEL_DEBUGGING */ + void show_trace(unsigned long * stack) { int i; @@ -247,6 +295,12 @@ void show_registers(struct pt_regs *regs printk("\nStack: "); show_stack((unsigned long*)esp); +#ifdef CONFIG_KERNEL_DEBUGGING + /* + * If debugging is switched on then we can walk the stack frame. + */ + print_call_trace_exact(regs, esp); +#endif module_oops_tracking_print(); printk("Code: "); @@ -372,6 +426,7 @@ static void inline do_trap(int trapnr, i if (ret) goto trap_signal; return; } + printk("\n"); } #define DO_ERROR(trapnr, signr, str, name) \ diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/mm/fault.c x/arch/i386/mm/fault.c --- x-ref/arch/i386/mm/fault.c 2003-06-07 16:06:27.000000000 +0200 +++ x/arch/i386/mm/fault.c 2003-06-07 16:06:30.000000000 +0200 @@ -9,6 +9,7 @@ * */ +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include /* For unblank_screen() */ #include @@ -339,6 +341,8 @@ no_context: spin_lock(&oops_lock); bust_spinlocks(1); + /* recursion is the curse of the programming classes */ + SUSPEND_MCOUNT_PROC(current); if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/ia64/config.in x/arch/ia64/config.in --- x-ref/arch/ia64/config.in 2003-06-07 16:06:15.000000000 +0200 +++ x/arch/ia64/config.in 2003-06-07 16:06:30.000000000 +0200 @@ -275,6 +275,12 @@ if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; th bool ' Disable VHPT' CONFIG_DISABLE_VHPT bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + source kernel/debug/Config.in + # arch specific debugging options + if [ "$CONFIG_KERNEL_DEBUGGING" = "y" ]; then + bool ' Print %eip to resolve symbols from locks' CONFIG_PRINT_EIP n + fi + bool ' Early printk support' CONFIG_IA64_EARLY_PRINTK if [ "$CONFIG_IA64_EARLY_PRINTK" != "n" ]; then bool ' Early printk on MMIO serial port' CONFIG_IA64_EARLY_PRINTK_UART diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/ia64/kernel/Makefile x/arch/ia64/kernel/Makefile --- x-ref/arch/ia64/kernel/Makefile 2003-03-15 03:24:54.000000000 +0100 +++ x/arch/ia64/kernel/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -9,6 +9,7 @@ all: kernel.o head.o init_task.o + O_TARGET := kernel.o export-objs := ia64_ksyms.o @@ -25,5 +26,12 @@ obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_IA64_MCA) += mca.o mca_asm.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o +obj-$(CONFIG_DEBUG_MCOUNT) += _mcount.o + +# Not safe to have tracing turned on in the init_task. That way lies deadlock. +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) +init_task.o: init_task.c $(TOPDIR)/include/linux/sched.h + $(CC) $(CFLAGS:%-pg=%-g -c) $(EXTRA_CFLAGS) -c -o $@ $< +endif include $(TOPDIR)/Rules.make diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/ia64/kernel/_mcount.S x/arch/ia64/kernel/_mcount.S --- x-ref/arch/ia64/kernel/_mcount.S 1970-01-01 01:00:00.000000000 +0100 +++ x/arch/ia64/kernel/_mcount.S 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,37 @@ + .text + .psr abi64 + .psr lsb + .lsb + + .align 16 + .global _mcount + .proc _mcount +_mcount: + alloc loc0 = ar.pfs, 4, 3, 3, 0 + mov loc1 = rp + mov loc2 = r8 // gcc uses r8 to pass pointer to return structure + ;; + mov out0 = in2 + mov out1 = rp + br.call.sptk.few rp = mcount + ;; +.here: +{ + .mii + mov gp = in1 + mov r2 = ip + mov ar.pfs = loc0 +} + ;; + adds r2 = 1f - .here, r2 + mov b7 = loc1 + mov rp = in2 + ;; + mov r8 = loc2 + mov b6 = r2 + br.ret.sptk.few b6 + +1: alloc r2 = ar.pfs, 0, 0, 9, 0 + mov ar.pfs = r40 + br b7 + .endp _mcount diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/sparc64/kernel/time.c x/arch/sparc64/kernel/time.c --- x-ref/arch/sparc64/kernel/time.c 2003-06-07 16:06:28.000000000 +0200 +++ x/arch/sparc64/kernel/time.c 2003-06-07 16:06:30.000000000 +0200 @@ -429,6 +429,7 @@ static __inline__ void timer_check_rtc(v void sparc64_do_profile(unsigned long pc, unsigned long o7) { +#ifndef CONFIG_PROFILE_GCC if (prof_buffer && current->pid) { extern int _stext; extern int rwlock_impl_begin, rwlock_impl_end; @@ -456,6 +457,7 @@ void sparc64_do_profile(unsigned long pc pc = prof_len - 1; atomic_inc((atomic_t *)&prof_buffer[pc]); } +#endif } static void timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) diff -urNp --exclude CVS --exclude BitKeeper x-ref/drivers/char/sysrq.c x/drivers/char/sysrq.c --- x-ref/drivers/char/sysrq.c 2003-06-07 16:06:27.000000000 +0200 +++ x/drivers/char/sysrq.c 2003-06-07 16:06:30.000000000 +0200 @@ -10,6 +10,8 @@ * (c) 2000 Crutcher Dunnavant * overhauled to use key registration * based upon discusions in irc://irc.openprojects.net/#kernelnewbies + * Add dumploGs. Keith Owens 12/04/1998. + * Add Oops, changed Off to oFf. Keith Owens 26/04/1998. */ #include @@ -27,6 +29,7 @@ #include #include #include +#include #include @@ -35,6 +38,10 @@ extern void reset_vc(unsigned int); extern struct list_head super_blocks; +#ifdef CONFIG_TRACE +extern void ktrace_to_console(void); +#endif + /* Whether we react on sysrq keys or just ignore them */ int sysrq_enabled = 1; @@ -461,6 +468,8 @@ void __handle_sysrq_nolock(int key, stru if (!sysrq_enabled) return; + SUSPEND_MCOUNT; /* no point in tracing/profiling this */ + orig_log_level = console_loglevel; console_loglevel = 7; printk(KERN_INFO "SysRq : "); @@ -482,6 +491,7 @@ void __handle_sysrq_nolock(int key, stru printk ("\n"); console_loglevel = orig_log_level; } + RESUME_MCOUNT; } EXPORT_SYMBOL(handle_sysrq); diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/proc/generic.c x/fs/proc/generic.c --- x-ref/fs/proc/generic.c 2003-06-07 16:06:29.000000000 +0200 +++ x/fs/proc/generic.c 2003-06-07 16:06:30.000000000 +0200 @@ -106,6 +106,11 @@ proc_file_read(struct file * file, char * return the bytes, and set `start' to the desired offset * as an unsigned int. - Paul.Russell@rustcorp.com.au */ + /* Ensure that the data will fit when using the ppos hack, + * otherwise userland receives truncated data. + */ + if (n > count-1 && start && start < page) + break; n -= copy_to_user(buf, start < page ? page : start, n); if (n == 0) { if (retval == 0) diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/proc/proc_misc.c x/fs/proc/proc_misc.c --- x-ref/fs/proc/proc_misc.c 2003-06-07 16:06:25.000000000 +0200 +++ x/fs/proc/proc_misc.c 2003-06-07 16:06:30.000000000 +0200 @@ -659,6 +659,114 @@ static struct file_operations proc_sysrq }; #endif +#ifdef CONFIG_TRACE +#include +/* + * This function accesses kernel tracer information. The returned data is + * binary: the sampling step and the actual contents of the trace + * ringbuffer. Use of the program 'ktrace' is recommended in order to + * get meaningful info out of these data. + */ +static ssize_t read_trace(struct file *file, char *buf, size_t count, loff_t *ppos) +{ + loff_t p = *ppos, left; + unsigned long flags; + int i; + + SUSPEND_MCOUNT_TRACE; + LOCK_MCOUNT_TRACE(flags); + + /* Calibrate the tracer */ + for (i = 1; i <= TRACE_CALIBRATION_CALLS; ++i) + mcount_internal(-1); + + UNLOCK_MCOUNT_TRACE(flags); + + if (p >= sizeof(*trace_table)) + count = 0; + else if (count > sizeof(*trace_table) - p) + count = sizeof(*trace_table) - p; + + left = copy_to_user(buf, p + (char *)trace_table, count); + + RESUME_MCOUNT_TRACE; + + if (count && left == count) + return -EFAULT; + + *ppos += count - left; + return count - left; +} + +/* + * Writing to /proc/trace resets the counters. Doesnt make much sense + * as it's a ringbuffer, but we do it anyways, it might make sense for + * doing short term traces. + */ + +static ssize_t write_trace(struct file * file, const char * buf, size_t count, loff_t *ppos) +{ + unsigned long flags; + SUSPEND_MCOUNT_TRACE; + LOCK_MCOUNT_TRACE(flags); + memset(trace_table->entries, 0, sizeof(trace_table->entries)); + trace_table->curr_call = CONFIG_TRACE_SIZE-1; + UNLOCK_MCOUNT_TRACE(flags); + RESUME_MCOUNT_TRACE; + return count; +} + +/* + * Dump the kernel trace table in hex to all registered consoles. + * A method of getting the trace table when all else fails. + * This is a raw dump, the entire table is printed in hex, 80 hex digits + * to a line. Capture the output via a serial console and feed into + * ktrace with the "-d filename" option. + * Not recommended for a large trace table over a slow serial line. + */ +#define TRACE_LINE_WIDTH 80 +void ktrace_to_console(void) +{ + static const char hexchar[] = "0123456789abcdef"; + int i; + unsigned c; + char buf[TRACE_LINE_WIDTH+3], *p; + + SUSPEND_MCOUNT_TRACE; + /* Should LOCK_MCOUNT_TRACE here but that might stop output. + * Live with the risk of dumping garbage. Cannot calibrate + * without the lock, OTOH accurate timing figures are probably + * the least of our worries at this point. + */ + + for (i = 0, p = buf; i < sizeof(*trace_table); ++i) { + /* hex convert inline, 200,000+ calls to vsprintf is slow */ + c = *((unsigned char *)(trace_table)+i); + *p++ = hexchar[c>>4]; + *p++ = hexchar[c&0xf]; + if (p - buf >= TRACE_LINE_WIDTH) { + *p++ = '\n'; + *p++ = '\0'; + console_print(buf); + p = buf; + } + } + if (p != buf) { + *p++ = '\n'; + *p++ = '\0'; + console_print(buf); + } + RESUME_MCOUNT_TRACE; +} + +static struct file_operations proc_trace_operations = { + read: read_trace, + write: write_trace, +}; + +struct proc_dir_entry *proc_root_trace; +#endif /* CONFIG_TRACE */ + struct proc_dir_entry *proc_root_kcore; static void create_seq_entry(char *name, mode_t mode, struct file_operations *f) diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-alpha/profiler.h x/include/asm-alpha/profiler.h --- x-ref/include/asm-alpha/profiler.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-alpha/profiler.h 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,51 @@ +#ifndef _LINUX_PROFILER_ASM_H +#define _LINUX_PROFILER_ASM_H + +#include + +#ifdef CONFIG_DEBUG_MCOUNT + +/* + * You've got to define two macros if you port the profiling stuff: + */ + +/* + * [kernel stack overflow profiling] + * + * this says how much kernel stack space is >left<. If this goes + * below a certain treshold then we generate an artificial oops. + * + * we do not assume anything about stack growth direction + */ + +#define get_stack_left() \ +({ \ + register unsigned long sp; \ + asm("bis $30,$30,%0" : "=r" (sp)); \ + sp & ~(PAGE_MASK << 1) - sizeof(struct task_struct); \ +}) + +/* + * [kernel tracer] + * + * this macro gets fast an accurate time and puts it into a 'u32' + * variable. It's used as a tracer timestamp. + */ + +#ifdef CONFIG_TRACE_TIMESTAMP +#define get_profiler_timestamp() \ + ( { \ + register u32 __res; \ + asm volatile ("rpcc %0" : "=r" (__res)); \ + __res; \ + } ) + +/* Always u32, even when CONFIG_TRACE_TRUNCTIME */ +typedef u32 profiler_timestamp_t; +#endif /* CONFIG_TRACE_TIMESTAMP */ + +typedef unsigned long profiler_pc_t; + +#endif /* CONFIG_DEBUG_MCOUNT */ + +#endif /* _LINUX_PROFILER_ASM_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-i386/profiler.h x/include/asm-i386/profiler.h --- x-ref/include/asm-i386/profiler.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-i386/profiler.h 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,62 @@ +#ifndef _LINUX_PROFILER_ASM_H +#define _LINUX_PROFILER_ASM_H + +#include + +#ifdef CONFIG_DEBUG_MCOUNT + +/* + * You've got to define two macros if you port the profiling stuff: + */ + +/* + * [kernel stack overflow profiling] + * + * this says how much kernel stack space is >left<. If this goes + * below a certain treshold then we generate an artificial oops. + * + * we do not assume anything about stack growth direction + */ + +#define get_stack_left() \ +({ \ + register unsigned long __res; \ + __asm__("movl %%esp, %0" : "=r" (__res)); \ + (__res & ~(PAGE_MASK << 1)) - sizeof(struct task_struct); \ +}) + +/* + * [kernel tracer] + * + * this macro gets fast an accurate time and puts it into a 'long long' + * variable. It's used as a tracer timestamp. + */ + +#ifdef CONFIG_TRACE_TIMESTAMP +#define get_profiler_timestamp() \ + ( { \ + register u64 __res; \ + if (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability)) { \ + __asm__ __volatile__( \ + "rdtsc" : "=A"(__res) \ + ); \ + } \ + else { \ + /* no rdtsc, use jiffies instead */ \ + __res = jiffies; \ + } \ + __res; \ + } ) + +#ifdef CONFIG_TRACE_TRUNCTIME +typedef u32 profiler_timestamp_t; +#else +typedef u64 profiler_timestamp_t; +#endif /* CONFIG_TRACE_TRUNCTIME */ +#endif /* CONFIG_TRACE_TIMESTAMP */ + +typedef unsigned long profiler_pc_t; + +#endif /* CONFIG_DEBUG_MCOUNT */ + +#endif /* _LINUX_PROFILER_ASM_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-i386/system.h x/include/asm-i386/system.h --- x-ref/include/asm-i386/system.h 2003-06-07 16:06:25.000000000 +0200 +++ x/include/asm-i386/system.h 2003-06-07 16:06:30.000000000 +0200 @@ -10,8 +10,15 @@ #ifdef __KERNEL__ struct task_struct; /* one of the stranger aspects of C forward declarations.. */ + +#include +#ifndef CONFIG_KERNEL_DEBUGGING /* Fix the FASTCALL thing -Andrea */ extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); +#else +extern void STDCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); +#endif +#ifdef CONFIG_KERNEL_DEBUGGING /* we can' t use FASTCALL -Andrea */ #define switch_to(prev,next,last) do { \ asm volatile("pushl %%esi\n\t" \ "pushl %%edi\n\t" \ @@ -19,6 +26,8 @@ extern void FASTCALL(__switch_to(struct "movl %%esp,%0\n\t" /* save ESP */ \ "movl %2,%%esp\n\t" /* restore ESP */ \ "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %5\n\t" /* pass args throught the stack */ \ + "pushl %4\n\t" /* pass args throught the stack */ \ "pushl %3\n\t" /* restore EIP */ \ "jmp __switch_to\n" \ "1:\t" \ @@ -29,6 +38,25 @@ extern void FASTCALL(__switch_to(struct :"m" (next->thread.esp),"m" (next->thread.eip), \ "a" (prev), "d" (next)); \ } while (0) +#else /* original */ +#define switch_to(prev,next,last) do { \ + asm volatile("pushl %%esi\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %2,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %3\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popl %%edi\n\t" \ + "popl %%esi\n\t" \ + :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \ + :"m" (next->thread.esp),"m" (next->thread.eip), \ + "a" (prev), "d" (next)); \ +} while (0) +#endif #define _set_base(addr,base) do { unsigned long __pr; \ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-ia64/profiler.h x/include/asm-ia64/profiler.h --- x-ref/include/asm-ia64/profiler.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-ia64/profiler.h 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,12 @@ +#ifndef _LINUX_PROFILER_ASM_H +#define _LINUX_PROFILER_ASM_H + +#include + +#ifdef CONFIG_DEBUG_MCOUNT + +typedef unsigned long profiler_pc_t; + +#endif /* CONFIG_DEBUG_MCOUNT */ + +#endif /* _LINUX_PROFILER_ASM_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/kernel.h x/include/linux/kernel.h --- x-ref/include/linux/kernel.h 2003-06-07 16:06:24.000000000 +0200 +++ x/include/linux/kernel.h 2003-06-07 16:06:30.000000000 +0200 @@ -8,6 +8,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -26,7 +27,11 @@ #define LONG_MIN (-LONG_MAX - 1) #define ULONG_MAX (~0UL) -#define STACK_MAGIC 0xdeadbeef +#if BITS_PER_LONG < 64 +# define STACK_MAGIC 0xdeadbeef +#else +# define STACK_MAGIC 0xfeedbabedeadbeef +#endif #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -50,10 +55,12 @@ extern int console_printk[]; # define ATTRIB_NORET __attribute__((noreturn)) # define NORET_AND noreturn, -#if defined(__i386__) || defined(UM_FASTCALL) +#if (defined(__i386__) || defined(UM_FASTCALL)) && !defined(CONFIG_KERNEL_DEBUGGING) #define FASTCALL(x) x __attribute__((regparm(3))) +#define STDCALL(x) x #else #define FASTCALL(x) x +#define STDCALL(x) __attribute__((stdcall)) x #endif struct completion; diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/profiler.h x/include/linux/profiler.h --- x-ref/include/linux/profiler.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/linux/profiler.h 2003-06-07 16:09:19.000000000 +0200 @@ -0,0 +1,90 @@ +#ifndef _LINUX_PROFILER_H +#define _LINUX_PROFILER_H + +#include + +#ifdef CONFIG_DEBUG_MCOUNT + +#include +#include +#include +#include + +#ifdef __ia64__ +extern void mcount(profiler_pc_t, profiler_pc_t); +#else +extern void mcount (void); +#endif +extern int mcount_internal(profiler_pc_t self_addr); +extern atomic_t mcount_ready; /* controls all mcount() processing */ + +#define SUSPEND_MCOUNT atomic_dec(&mcount_ready) +#define RESUME_MCOUNT atomic_inc(&mcount_ready) +#define SUSPEND_MCOUNT_PROC(x) ((x)->flags |= PF_NO_MCOUNT) +#define RESUME_MCOUNT_PROC(x) ((x)->flags &= ~PF_NO_MCOUNT) +#define MCOUNT() mcount() + +#ifdef CONFIG_TRACE + +extern atomic_t mcount_trace_ready; /* controls just mcount() tracing */ +/* + * Protect the profiling table with a spin lock, only one cpu at a + * time. No point in read/write locks, almost all accesses are for + * write. Since this code is accessed from all contexts, use + * spin_lock_irqsave. + */ +extern spinlock_t trace_table_lock; + +/* Note: The hierarchy is mcount_ready, mcount_trace_ready, trace_table_lock */ + +struct trace_entry { + profiler_pc_t pc; +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t timestamp; +#endif +#ifdef CONFIG_TRACE_PID + pid_t pid; +#endif +#if defined(CONFIG_TRACE_CPU) && defined(CONFIG_SMP) + unsigned int cpu; +#endif +}; + +extern struct trace_table { + unsigned int table_size; + unsigned int curr_call; + struct trace_entry entries[CONFIG_TRACE_SIZE]; +} *trace_table; + +/* + * die_if_kernel() uses this to 'extend' the stack trace given in an Oops + * message. You can use this when debugging special code, as a debugging aid. + */ +void print_emergency_trace (void); + +#define TRACE_CALIBRATION_CALLS 20 + +#define SUSPEND_MCOUNT_TRACE atomic_dec(&mcount_trace_ready) +#define RESUME_MCOUNT_TRACE atomic_inc(&mcount_trace_ready) +#define LOCK_MCOUNT_TRACE(x) spin_lock_irqsave(&trace_table_lock, x); +#define UNLOCK_MCOUNT_TRACE(x) spin_unlock_irqrestore(&trace_table_lock, x); + +#else /* !CONFIG_TRACE */ + +#define SUSPEND_MCOUNT_TRACE +#define RESUME_MCOUNT_TRACE +#define LOCK_MCOUNT_TRACE(x) +#define UNLOCK_MCOUNT_TRACE(x) + +#endif /* CONFIG_TRACE */ +#else /* !CONFIG_DEBUG_MCOUNT */ + +#define SUSPEND_MCOUNT +#define RESUME_MCOUNT +#define SUSPEND_MCOUNT_PROC(x) +#define RESUME_MCOUNT_PROC(x) +#define MCOUNT() + +#endif /* CONFIG_DEBUG_MCOUNT */ + +#endif /* _LINUX_PROFILER_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/reboot.h x/include/linux/reboot.h --- x-ref/include/linux/reboot.h 2003-06-01 19:39:46.000000000 +0200 +++ x/include/linux/reboot.h 2003-06-07 16:06:30.000000000 +0200 @@ -20,6 +20,9 @@ * CAD_OFF Ctrl-Alt-Del sequence sends SIGINT to init task. * POWER_OFF Stop OS and remove all power from system, if possible. * RESTART2 Restart system using given command string. + * OOPS Cause a kernel Oops, the machine should continue afterwards. + * STACKFAULT Overflow the kernel stack with recursion. + * KERNEL_LOOP Endless kernel loop, unlocked. */ #define LINUX_REBOOT_CMD_RESTART 0x01234567 @@ -29,6 +32,9 @@ #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 +#define LINUX_REBOOT_CMD_OOPS 0x4F6F7001 +#define LINUX_REBOOT_CMD_STACKFAULT 0x53746602 +#define LINUX_REBOOT_CMD_KERNEL_LOOP 0x4C6F7003 #ifdef __KERNEL__ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/sched.h x/include/linux/sched.h --- x-ref/include/linux/sched.h 2003-06-07 16:06:27.000000000 +0200 +++ x/include/linux/sched.h 2003-06-07 16:06:30.000000000 +0200 @@ -444,6 +444,9 @@ struct task_struct { /* TASK_UNMAPPED_BASE value */ unsigned long map_base; +#ifdef CONFIG_DEBUG_SOFTLOCKUP + unsigned int deadlock_count; +#endif }; /* @@ -460,6 +463,9 @@ struct task_struct { #define PF_FREE_PAGES (1UL<<8) /* per process page freeing */ #define PF_NOIO (1UL<<9) /* avoid generating further I/O */ #define PF_FSTRANS (1UL<<10) /* inside a filesystem transaction */ +#ifdef CONFIG_DEBUG_MCOUNT +#define PF_NO_MCOUNT (1UL<<11) /* skip mcount() processing */ +#endif /* * Ptrace flags diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/sysctl.h x/include/linux/sysctl.h --- x-ref/include/linux/sysctl.h 2003-06-07 16:06:27.000000000 +0200 +++ x/include/linux/sysctl.h 2003-06-07 16:06:30.000000000 +0200 @@ -640,6 +640,10 @@ enum { }; /* CTL_DEBUG names: */ +enum { + DEBUG_KSTACK_METER = 1, + DEBUG_DISABLE_MCOUNT = 2, +}; /* CTL_DEV names: */ enum { diff -urNp --exclude CVS --exclude BitKeeper x-ref/init/main.c x/init/main.c --- x-ref/init/main.c 2003-06-07 16:06:27.000000000 +0200 +++ x/init/main.c 2003-06-07 16:06:30.000000000 +0200 @@ -112,6 +112,14 @@ extern void ecard_init(void); extern void ipc_init(void); #endif +#ifdef __sparc__ +extern int serial_console; +#endif + +#ifdef CONFIG_DEBUG_MCOUNT +extern void mcount_init(void); +#endif + /* * Boot command-line arguments */ @@ -459,6 +467,32 @@ static void __init do_initcalls(void) flush_scheduled_tasks(); } +#if defined(CONFIG_SMP) && defined(CONFIG_KERNEL_DEBUGGING) +void show_one (int i) +{ + static int curr=0x12345678; + + curr++; + *(((volatile int *)0x000b8000)+i)=curr; + *(((volatile int *)0x000b8100)+i)=curr; + *(((volatile int *)0x000b8200)+i)=curr; + *(((volatile int *)0x000b8300)+i)=curr; +} + +void show_us(void) +{ + for (;;) { + __cli(); + show_one(0); + show_one(10); + show_one(20); + show_one(30); + show_one(40); + show_one(50); + } +} +#endif + /* * Ok, the machine is now initialized. None of the devices * have been touched yet, but the CPU subsystem is up and diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/debug/Config.in x/kernel/debug/Config.in --- x-ref/kernel/debug/Config.in 1970-01-01 01:00:00.000000000 +0100 +++ x/kernel/debug/Config.in 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,44 @@ +# +# Common kernel debugging configuration. arch specific debugging facilities +# are in arch/xxx/config.in. +# + bool 'Kernel debugging support' CONFIG_KERNEL_DEBUGGING n + if [ "$CONFIG_KERNEL_DEBUGGING" = "y" ]; then + comment 'note: enabling either kernel tracer or stack meter will' + comment ' of necessity disable the nmi-watchdog.' + if [ "$CONFIG_NOHIGHMEM" = "y" ]; then + bool ' GFP poison' CONFIG_GFP_POISON n + fi + bool ' Debug kernel stack overflows' CONFIG_DEBUG_KSTACK n + if [ "$CONFIG_DEBUG_KSTACK" = "y" ]; then + int ' Stack threshold' CONFIG_KSTACK_THRESHOLD 500 + fi + bool ' Kernel Stack Meter' CONFIG_KSTACK_METER n + bool ' Detect software lockups' CONFIG_DEBUG_SOFTLOCKUP n + if [ "$CONFIG_DEBUG_SOFTLOCKUP" = "y" ]; then + int ' Deadlock threshold' CONFIG_SOFTLOCKUP_THRESHOLD 100000000 0 2147483647 + fi + bool ' GCC profiling support' CONFIG_PROFILE_GCC n + bool ' Enable kernel tracer' CONFIG_TRACE n + if [ "$CONFIG_TRACE" = "y" ]; then + int ' Trace ringbuffer size' CONFIG_TRACE_SIZE 16384 + int ' Emergency trace length' CONFIG_ETRACE_LENGTH 30 + bool ' Trace timestamps' CONFIG_TRACE_TIMESTAMP n + if [ "$CONFIG_TRACE_TIMESTAMP" = "y" ]; then + bool ' Truncate timestamp' CONFIG_TRACE_TRUNCTIME n + fi + bool ' Process ID' CONFIG_TRACE_PID n + bool ' Cpu ID' CONFIG_TRACE_CPU n + fi + # CONFIG_DEBUG_MCOUNT is "y" iff an option requires calls to mcount(). + if [ "$CONFIG_DEBUG_KSTACK" = "y" -o \ + "$CONFIG_DEBUG_SOFTLOCKUP" = "y" -o \ + "$CONFIG_KSTACK_METER" = "y" -o \ + "$CONFIG_TRACE" = "y" -o \ + "$CONFIG_PRINT_EIP" = "y" -o \ + "$CONFIG_PROFILE_GCC" = "y" ]; then + define_bool CONFIG_DEBUG_MCOUNT y + else + define_bool CONFIG_DEBUG_MCOUNT n + fi + fi diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/debug/Makefile x/kernel/debug/Makefile --- x-ref/kernel/debug/Makefile 1970-01-01 01:00:00.000000000 +0100 +++ x/kernel/debug/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,17 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +O_TARGET := debug.o +obj-y = profiler.o +export-objs = profiler.o + +# Must turn off profiling for the profiler. +override CFLAGS := $(CFLAGS:%-pg=%) + +include $(TOPDIR)/Rules.make diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/debug/profiler.c x/kernel/debug/profiler.c --- x-ref/kernel/debug/profiler.c 1970-01-01 01:00:00.000000000 +0100 +++ x/kernel/debug/profiler.c 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,421 @@ +/* + * linux/kernel/profiler.c + * + * Copyright (C) 1997 Ingo Molnar, Richard Henderson + * Copyright (C) 1998 Andrea Arcangeli + * + * This source is covered by the GNU GPL, the same as all kernel sources. + */ + +/* + * 'profiler.c' implements various profiling hacks, by abusing the profiling + * hook 'mcount', generated by GCC -pg + * + * Currently used for: + * + * - monitoring kernel stack usage and generating oopses when stack overflow + * - detecting software lockups + * - tracing the kernel + * + * Has to be a separate C module, because we have to compile it without -pg, + * to avoid recursion. + */ + +/* + * - print-eip is now a config option and it' s improved to give as the + * the execution order of the box and fixed some glitches. + * - developed CONFIG_PROFILE_GCC + * - developed CONFIG_KSTACK_METER + * - fixed get_stack_left() to handle the 8k 2.1.x kernel stack size. + * -arca + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Generally we dislike #ifdef's in main modules, but these mcount() based + * features are is too performance-sensitive to make them an all or nothing + * option, and too small to be put into header files. + */ + +#ifdef CONFIG_DEBUG_MCOUNT /* any mcount() functions activated? */ + +#ifdef CONFIG_TRACE + +spinlock_t trace_table_lock = SPIN_LOCK_UNLOCKED; +struct trace_table *trace_table = NULL; + +#endif /* CONFIG_TRACE */ + +#ifdef CONFIG_KSTACK_METER +struct { + unsigned int min_left_stack; + profiler_pc_t stack_eater_eip; +} kstack_meter = {-1UL, 0,}; + +static spinlock_t stack_meter_lock = SPIN_LOCK_UNLOCKED; +#endif + +/* deal with too early calls to mcount() and recursion */ +atomic_t mcount_ready = ATOMIC_INIT(0); +int sysctl_disable_mcount = 1; +#ifdef CONFIG_TRACE +atomic_t mcount_trace_ready = ATOMIC_INIT(0); +#endif + +void mcount_init (void) +{ +#ifdef CONFIG_TRACE + if ((trace_table = (struct trace_table *) alloc_bootmem(sizeof(*trace_table))) == NULL) { + printk("mcount_init: cannot allocate trace_table, size %lu. No tracing possible.\n", (unsigned long) sizeof(*trace_table)); + } + else { + trace_table->table_size = CONFIG_TRACE_SIZE; + trace_table->curr_call = 0; + memset(trace_table->entries, 0, sizeof(trace_table->entries)); + spin_lock_init(&trace_table_lock); +#ifdef CONFIG_TRACE_TIMESTAMP +#ifdef __i386__ + if (!(test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability))) + printk("mcount_init: cpu does not support rdtsc, timestamps are jiffies instead\n"); +#else + printk("mcount_init: not i386 cpu, timestamps are jiffies instead\n"); +#endif /* __i386__ */ +#endif /* CONFIG_TRACE_TIMESTAMP */ + RESUME_MCOUNT_TRACE; /* start it */ + } +#endif /* CONFIG_TRACE */ + + printk("mcount_init\n"); + /* + * Ok, from now on it's for real: + */ + RESUME_MCOUNT; /* start it */ +} + +#ifdef CONFIG_TRACE + +/* Strictly speaking this routine should get the trace_table spin lock. + * However it is rarely used and may not be in a safe context to get the + * lock so we just dump the table and hope it does not change under us. + */ + +void print_emergency_trace (void) +{ + struct trace_entry *t; + int i, j; + + SUSPEND_MCOUNT_TRACE; + printk ("[] "); + +/* + * Well, 30 entries is pretty arbitrary, seems to be a reasonable value. + */ + j = trace_table->curr_call-CONFIG_ETRACE_LENGTH; + for (i=0; ientries[j++]); + if(!(i % 4)) + printk("\n"); + /* ksymoops expects [] */ + printk ("[<%08lx>] ", t->pc); +#ifdef CONFIG_TRACE_PID + printk("(%d) ", t->pid); +#endif +#if defined(CONFIG_TRACE_CPU) && defined(CONFIG_SMP) + printk("(%d) ", t->cpu); +#endif + } + RESUME_MCOUNT_TRACE; +} +#endif /* CONFIG_TRACE */ + +/* + * this (64 bytes) is twice as big as cachelines, but we cannot + * guarantee cacheline alignment ... too bad. So we waste two + * cachelines in the bad case. + * + * cacheline alignment is absolutely vital in this case, as these + * variables are higher frequented than say .. "current", and they + * should stay local on the owner CPU under all circumstances. + */ +struct cacheline_t { unsigned int i; }; + +#ifdef CONFIG_PRINT_EIP +/* + * Use this as last resort, when nothing else helps. If a hard lockup + * happens then you can decode the last EIP from the binary coded + * form on the screen. + */ + +static __inline__ void print_eip(profiler_pc_t eip) +{ +#if defined(__i386__) || defined(__ia64__) +#define video ((short int *)(0xb8000 + PAGE_OFFSET)) +#endif +#define HISTORY 24 +#define ALIGN __cacheline_aligned + + int i, value; + unsigned int tmp; + + /* + * We split the codepath in a dumb way, to get speed and proper + * per-CPU execution. + */ +#ifdef CONFIG_SMP + if (!smp_processor_id()) + { +#endif + static struct cacheline_t curr_pos_0 ALIGN ={0,}; + static unsigned int count_0 = 0; + /* + * we cover 1M of code currently ... should be enuff + */ + if ((curr_pos_0.i += 80) == HISTORY*80) + curr_pos_0.i = 0; + + for (i=7; i>=0; i--) + { + /* + * mask off the hexa digits one by one. + */ + value = eip & 0xf; + if (value<10) + *(video+i+curr_pos_0.i) = 0x5400 + (value+'0'); + else + *(video+i+curr_pos_0.i) = 0x5400 + (value-10+'a'); + eip >>= 4; + } + /* *(video+8+curr_pos_0.i) = 0x5400 + '=';*/ + tmp = count_0++; + for (i=3; i>=0; i--) + { + /* + * mask off the hexa digits one by one. + */ + value = tmp & 0xf; + if (value<10) + *(video+i+9+curr_pos_0.i) = 0x5400 + (value+'0'); + else + *(video+i+9+curr_pos_0.i) = 0x5400 + (value-10+'a'); + tmp >>= 4; + } +#ifdef CONFIG_SMP + } else { + static struct cacheline_t curr_pos_1 ALIGN ={0,}; + static unsigned int count_1 = 0; + /* + * we cover 1M of code currently ... should be enuff + */ + + if ((curr_pos_1.i += 80) == HISTORY*80) + curr_pos_1.i = 0; + + for (i=7; i>=0; i--) { + /* + * mask off the hexa digits one by one. + */ + value = eip & 0xf; + if (value<10) + *(video+40+i+curr_pos_1.i) = 0x6400 + (value+'0'); + else + *(video+40+i+curr_pos_1.i) = 0x6400 + (value-10+'a'); + eip >>= 4; + } + /* *(video+48+curr_pos_1.i) = 0x6400 + '=';*/ + tmp = count_1++; + for (i=3; i>=0; i--) { + /* + * mask off the hexa digits one by one. + */ + value = tmp & 0xf; + if (value<10) + *(video+i+49+curr_pos_1.i) = 0x6400 + (value+'0'); + else + *(video+i+49+curr_pos_1.i) = 0x6400 + (value-10+'a'); + tmp >>= 4; + } + } +#endif /* CONFIG_SMP */ + +#undef ALIGN +#undef HISTORY +#undef video +} + +#endif /* CONFIG_PRINT_EIP */ + +#ifdef CONFIG_PROFILE_GCC /* arca */ +static __inline__ void kernel_profiling(profiler_pc_t eip) +{ + extern char _stext; + extern unsigned int * prof_buffer; + + if (!prof_buffer) + return; + + eip -= (unsigned long) &_stext; + eip >>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (eip > prof_len-1) + eip = prof_len-1; + + atomic_inc((atomic_t *)&prof_buffer[eip]); +} +#endif + +/* Watch this routine and mcount for any hidden calls to external + * routines. On SMP, something as simple as save_flags() calls + * __global_save_flags() in irq.c. If that module was compiled with + * -pg it calls back to mcount, stack overflow due to recursion. nm + * profiler.o should show no references to external procedures except + * for printk and vmalloc (from mcount_init). KAO. + */ + +inline int mcount_internal(profiler_pc_t self_addr) +{ +#ifdef CONFIG_PRINT_EIP + print_eip(self_addr); +#endif + +#ifdef CONFIG_PROFILE_GCC + kernel_profiling(self_addr); +#endif + +#ifdef CONFIG_DEBUG_SOFTLOCKUP + switch (current->deadlock_count) { + case 0: + if (current->pid) { + SUSPEND_MCOUNT; + printk("Deadlock threshold zero, should not happen, pid %d\n", current->pid); + RESUME_MCOUNT; + } + current->deadlock_count--; + return 0; + + case 1: + /* + * Oops on return. Do the oops outside this routine so + * mcount_ready and trace_table_lock are in a clean state. + */ + current->deadlock_count = 0; + /* no more mcount() processing for this process */ + SUSPEND_MCOUNT_PROC(current); + printk("Deadlock threshold exceeded, forcing Oops.\n"); + return 1; /* caller should oops */ + break; + + default: + current->deadlock_count--; + break; + } +#endif /* CONFIG_DEBUG_SOFTLOCKUP */ + +#ifdef CONFIG_DEBUG_KSTACK + if (get_stack_left() < CONFIG_KSTACK_THRESHOLD) { + SUSPEND_MCOUNT_PROC(current); + printk(KERN_ALERT "kernel stack overflow. Forcing Oops.\n"); + return 1; + } +#endif /* CONFIG_DEBUG_KSTACK */ + +#ifdef CONFIG_KSTACK_METER /* arca */ + { + unsigned int left_stack, flags; + + /* + * One CPU per time to be sure that min_left_stack is really + * the minimum. -arca + */ + spin_lock_irqsave(&stack_meter_lock, flags); + left_stack = get_stack_left() - sizeof(struct task_struct); + if (left_stack < kstack_meter.min_left_stack) + { + kstack_meter.min_left_stack = left_stack; + kstack_meter.stack_eater_eip = self_addr; + } + spin_unlock_irqrestore(&stack_meter_lock, flags); + } +#endif + +#ifdef CONFIG_TRACE + { + /* Protected by trace_table_lock */ + struct trace_entry *t; + ++(trace_table->curr_call); + while (trace_table->curr_call >= CONFIG_TRACE_SIZE) { + trace_table->curr_call -= CONFIG_TRACE_SIZE; + } + + t = &(trace_table->entries[trace_table->curr_call]); + + t->pc = self_addr; +#ifdef CONFIG_TRACE_TIMESTAMP + t->timestamp = get_profiler_timestamp(); +#endif +#ifdef CONFIG_TRACE_PID + t->pid = current->pid; +#endif +#if defined(CONFIG_TRACE_CPU) && defined(CONFIG_SMP) + t->cpu = smp_processor_id(); +#endif + } +#endif /* CONFIG_TRACE */ + return 0; +} + +#ifdef __ia64__ +void mcount(profiler_pc_t previous_eip, profiler_pc_t eip) +#else +void mcount(void) +#endif +{ + int do_oops; +#ifndef __ia64__ + profiler_pc_t eip; +#endif +#ifdef CONFIG_TRACE + unsigned long flags; +#endif + if (sysctl_disable_mcount || atomic_read(&mcount_ready) <= 0) + return; + +#ifdef CONFIG_TRACE + if (atomic_read(&mcount_trace_ready) <= 0) + return; +#endif + + if (current->flags & PF_NO_MCOUNT) + return; + + +#ifndef __ia64__ + eip = (profiler_pc_t) __builtin_return_address(0); +#endif + + LOCK_MCOUNT_TRACE(flags); + do_oops = mcount_internal(eip); + UNLOCK_MCOUNT_TRACE(flags); + + /* Do oops with mcount_ready and trace_table_lock in a clean state */ + if (do_oops) + *(char *)0=0; +} + +#ifdef CONFIG_MODULES +EXPORT_SYMBOL_NOVERS(mcount); +#endif + +#endif /* CONFIG_DEBUG_MCOUNT */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/fork.c x/kernel/fork.c --- x-ref/kernel/fork.c 2003-06-07 16:06:25.000000000 +0200 +++ x/kernel/fork.c 2003-06-07 16:06:30.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -731,7 +732,12 @@ int do_fork(unsigned long clone_flags, u p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; +#ifdef CONFIG_DEBUG_SOFTLOCKUP + p->deadlock_count=CONFIG_SOFTLOCKUP_THRESHOLD; +#endif + copy_flags(clone_flags, p); + RESUME_MCOUNT_PROC(p); down(&getpid_mutex); p->pid = get_pid(clone_flags); if (p->pid < 0) /* valid pids are >= 0 */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/panic.c x/kernel/panic.c --- x-ref/kernel/panic.c 2003-06-07 16:06:27.000000000 +0200 +++ x/kernel/panic.c 2003-06-07 16:06:30.000000000 +0200 @@ -16,6 +16,9 @@ #include #include #include +#ifdef CONFIG_TRACE +#include +#endif #if defined(__i386__) && defined(CONFIG_KMSGDUMP) extern void machine_dump(int); @@ -57,6 +60,9 @@ NORET_TYPE void panic(const char * fmt, machine_paniced = 1; bust_spinlocks(1); +#ifdef CONFIG_TRACE + SUSPEND_MCOUNT_TRACE; +#endif va_start(args, fmt); vsprintf(buf, fmt, args); va_end(args); diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/sched.c x/kernel/sched.c --- x-ref/kernel/sched.c 2003-06-07 16:06:27.000000000 +0200 +++ x/kernel/sched.c 2003-06-07 16:06:30.000000000 +0200 @@ -946,6 +946,9 @@ switch_tasks: rq->quiescent++; clear_tsk_need_resched(prev); +#ifdef CONFIG_DEBUG_SOFTLOCKUP + prev->deadlock_count=CONFIG_SOFTLOCKUP_THRESHOLD; +#endif if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/softirq.c x/kernel/softirq.c --- x-ref/kernel/softirq.c 2003-06-07 16:06:15.000000000 +0200 +++ x/kernel/softirq.c 2003-06-07 16:06:30.000000000 +0200 @@ -16,6 +16,7 @@ #include #include #include +#include /* - No shared variables, all the data are CPU local. @@ -329,12 +330,18 @@ static void bh_action(unsigned long nr) hardirq_endlock(cpu); spin_unlock(&global_bh_lock); +#if defined(CONFIG_DEBUG_SOFTLOCKUP) && defined(CONFIG_SMP) + mcount(); +#endif return; resched_unlock: spin_unlock(&global_bh_lock); resched: mark_bh(nr); +#if defined(CONFIG_DEBUG_SOFTLOCKUP) && defined(CONFIG_SMP) + mcount(); +#endif } void init_bh(int nr, void (*routine)(void)) diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/sys.c x/kernel/sys.c --- x-ref/kernel/sys.c 2003-06-07 16:06:15.000000000 +0200 +++ x/kernel/sys.c 2003-06-07 16:06:30.000000000 +0200 @@ -277,6 +277,37 @@ asmlinkage long sys_getpriority(int whic return retval; } +/* routines to trip various softlockup conditions, driven from reboot */ +static void kstack_test1 (void); +static void kstack_test2 (void); +static void kstack_test3 (void); +static void kstack_test4 (void); + +static void kstack_test1 (void) +{ + kstack_test2(); +} + +static void kstack_test2 (void) +{ + kstack_test3(); +} + +static void kstack_test3 (void) +{ + kstack_test4(); +} + +static void kstack_test4 (void) +{ + kstack_test1(); /* curse and recurse, stack overflow */ +} + +static volatile int softlockup_count=0; +void softlockup_looptest(void) +{ + softlockup_count++; +} /* * Reboot system call: for obvious reasons only root may call it, @@ -342,6 +373,34 @@ asmlinkage long sys_reboot(int magic1, i machine_restart(buffer); break; + case LINUX_REBOOT_CMD_OOPS: + /* Kernel oops, the machine should recover afterwards */ + *(char *)0=0; + break; + + /* Trip various software lockup conditions. Overloading sys_reboot + * because they do not justify their own syscall. These do not notify + * the reboot list. + */ + + case LINUX_REBOOT_CMD_STACKFAULT: + /* stack fault via endless recursion */ +#ifndef CONFIG_DEBUG_KSTACK + printk(KERN_WARNING "Invoking STACKFAULT without CONFIG_DEBUG_KSTACK\n" + "Machine may not recover!\n"); +#endif + kstack_test1(); + break; + + case LINUX_REBOOT_CMD_KERNEL_LOOP: + /* lockup via endless loop */ +#ifndef CONFIG_DEBUG_SOFTLOCKUP + printk(KERN_WARNING "Invoking KERNEL_LOOP without CONFIG_DEBUG_SOFTLOCKUP\n" + "Machine may not recover!\n"); +#endif + for (;;) softlockup_looptest(); + break; + default: unlock_kernel(); return -EINVAL; diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/sysctl.c x/kernel/sysctl.c --- x-ref/kernel/sysctl.c 2003-06-07 16:06:27.000000000 +0200 +++ x/kernel/sysctl.c 2003-06-07 16:06:30.000000000 +0200 @@ -354,7 +354,22 @@ static ctl_table fs_table[] = { {0} }; +#ifdef CONFIG_DEBUG_MCOUNT +extern int sysctl_disable_mcount; +#ifdef CONFIG_KSTACK_METER +extern int kstack_meter[]; +#endif +#endif + static ctl_table debug_table[] = { +#ifdef CONFIG_DEBUG_MCOUNT + {DEBUG_DISABLE_MCOUNT, "disable_mcount", &sysctl_disable_mcount, + sizeof(int), 0644, NULL, &proc_dointvec}, +#ifdef CONFIG_KSTACK_METER + {DEBUG_KSTACK_METER, "kstack_meter", &kstack_meter, 2*sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif +#endif {0} }; diff -urNp --exclude CVS --exclude BitKeeper x-ref/mm/page_alloc.c x/mm/page_alloc.c --- x-ref/mm/page_alloc.c 2003-06-07 16:06:25.000000000 +0200 +++ x/mm/page_alloc.c 2003-06-07 16:06:30.000000000 +0200 @@ -48,6 +48,18 @@ static int lower_zone_reserve_ratio[MAX_ int vm_gfp_debug = 0; +#ifdef CONFIG_GFP_POISON +static unsigned long poison(unsigned long addr, unsigned long order) +{ + memset((char *) addr, 0x6b, PAGE_SIZE<flags &= ~((1<flags & PF_FREE_PAGES)) goto local_freelist; back_local_freelist: diff -urNp --exclude CVS --exclude BitKeeper x-ref/scripts/Makefile x/scripts/Makefile --- x-ref/scripts/Makefile 2003-03-15 03:25:20.000000000 +0100 +++ x/scripts/Makefile 2003-06-07 16:06:30.000000000 +0200 @@ -1,6 +1,20 @@ HEADER=header.tk TAIL=tail.tk +# +# include dependency files they exist +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + +# +# Routines in this directory are external to the kernel but partake of the +# kernel namespace. Since they are external, they are not candidates for +# profiling. +# +override CFLAGS := $(CFLAGS:%-pg=%-g -c) + # Previous versions always remade kconfig.tk because they always depended # on soundscript. This runs fairly fast, and I can't find all the # Config.in files to depend on anyways. So I'll force it to remake. @@ -33,6 +47,9 @@ tkgen.o: tkgen.c tkparse.h tkparse.o tkcond.o tkgen.o: $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $(@:.o=.c) +ktrace: ktrace.o + $(CC) -o ktrace ktrace.o + docproc.o: docproc.c $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $(@:.o=.c) @@ -41,5 +58,6 @@ docproc: docproc.o clean: rm -f *~ kconfig.tk *.o tkparse mkdep split-include docproc + rm -f ktrace include $(TOPDIR)/Rules.make diff -urNp --exclude CVS --exclude BitKeeper x-ref/scripts/ktrace.c x/scripts/ktrace.c --- x-ref/scripts/ktrace.c 1970-01-01 01:00:00.000000000 +0100 +++ x/scripts/ktrace.c 2003-06-07 16:06:30.000000000 +0200 @@ -0,0 +1,481 @@ +/* ktrace.c + * + * Read /proc/trace and System.map (or equivalent) and print the trace entries. + * Prints the time taken between trace calls, "(????)" if the next entry for the + * current processor cannot be found. Prints the current pid, if the next entry + * for the current processor is for a different pid, prints "pid(old->new)". + * If compiled for SMP, the trace table contains the logical processor number, + * this is printed as "cpu(n)". + * + * The System.map can be the standard System.map for the kernel, in which case + * module traces will not resolve very well. It can be a merged System.map + * containing module entries as well, see make_System_map.pl for an example, + * ftp://ftp.ocs.com.au/pub/. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_TRACE +#include + +/* + * Dumbomarbitrary limits + */ + +#define LINE_LIMIT 100 +#define SYSMAP_LIMIT 30000 + +static struct trace_table * tt; + +struct sysmap_entry { + profiler_pc_t pc; + char * name; +}; + +static struct sysmap_entry sysmap_table [SYSMAP_LIMIT]; + +static int sysmap_entries = 0; +static int default_speed = 150, speed, lock = 0; +static char *default_map = "/usr/src/linux/System.map", *map, *dump; +static char *prog_name; + +static void usage(void) +{ + fprintf(stderr, "usage: %s\n", prog_name); + fprintf(stderr, "\t[--speed MHz] [-s MHz]\t\t\thow fast is your processor?\n"); + fprintf(stderr, "\t[--map System.map] [-m System.map]\twhere is your system map?\n"); + fprintf(stderr, "\t[--lock] [-l]\t\t\t\twait for the lock on /proc/trace\n"); + fprintf(stderr, "\t[--dump filename] [-d filename]\t\tread trace dump from filename\n"); + fprintf(stderr, "Default --speed is %d\n", default_speed); + fprintf(stderr, "Default --map is %s\n", default_map); + exit(1); +} + +static void read_sysmap (void) +{ + profiler_pc_t pc; + char type; + int i, len; + + FILE * file; + char str [LINE_LIMIT+1]; + + file=fopen(map, "r"); + + if (!file) { + fprintf(stderr,"System.map '%s' missing.\n", map); + usage(); + } + + sysmap_table[0].pc = 0; + sysmap_table[0].name = "
\n"; + + sysmap_entries=1; + + while (fscanf(file, "%lx %1c", &pc, &type) == 2) { + i=sysmap_entries++; + if (!fgets(str, LINE_LIMIT, file)) { + perror("ouch, System.map format error.\n"); + exit(-1); + } + sysmap_table[i].pc = pc; + sysmap_table[i].name = malloc(LINE_LIMIT); + if (!sysmap_table[i].name) { + perror("ouch, outta mem.\n"); + exit(-1); + } + /* + * Dirty trick to strip off end of line: + */ + len = strlen(str); + str[len-1]=0; + strcpy (sysmap_table[i].name, str); + } + + printf("read %d lines from System.map.\n", sysmap_entries-1); + + sysmap_table[sysmap_entries].pc = ~1; + sysmap_table[sysmap_entries].name = "
\n"; + sysmap_entries++; + + /* To be sure, to be sure :). */ + sysmap_table[sysmap_entries].pc = ~0; + sysmap_table[sysmap_entries++].name = ""; + sysmap_table[sysmap_entries].pc = ~0; + sysmap_table[sysmap_entries++].name = ""; + +/* + * for (i=0; i1) { + middle = first+(last-first)/2; + if (sysmap_table[middle].pc <= pc) + first = middle; + else + last = middle; + } + + return first; +} + +/* The trace table is a ring buffer. Convert 0 <= index < size to the + * corresponding entry, with wraparound as necessary. + */ +static inline int ring(int x) +{ + return ((x) % CONFIG_TRACE_SIZE); +} + +#if defined(CONFIG_TRACE_CPU) && defined(CONFIG_SMP) +#define CPU_PRESENT 1 +#else +#define CPU_PRESENT 0 +#endif + +static ssize_t read_dump(int fd, void *buf, size_t count) +{ + /* Find the start of the hex dump of /proc/trace, read + * and convert hex digits, storing in buf. Any garbage + * nibbles are silently ignored and treated as '0'. + */ + char line[BUFSIZ]; + int start = 0, value; + char *pline, c; + unsigned char *pbuf; + FILE *f = fdopen(fd, "r"); + if (!f) { + perror("read_dump fdopen failed"); + exit(-1); + } + pbuf = (unsigned char *) buf; + while (fgets(line, sizeof(line), f)) { + if (ferror(f)) { + perror("read_dump ferror detected"); + exit(-1); + } + if (strstr(line, "DAL: ktrace start")) { + start = 1; + continue; + } + if (start) { + if (strstr(line, "DAL: ktrace end")) + break; + pline = line; + while (*pline) { + while (*pline == '\r' || *pline == '\n') + ++pline; + if (!(c = *pline++)) + break; + value = 0; + if (c >= '0' && c <= '9') + value = c - '0'; + else if (c >= 'a' && c <= 'f') + value = c - 'a' + 10; + value <<= 4; + if (!(c = *pline++)) + break; + if (c >= '0' && c <= '9') + value += c - '0'; + else if (c >= 'a' && c <= 'f') + value += c - 'a' + 10; + if (count > 0) { + --count; + *(pbuf++) = (unsigned char) value; + } + if (count == 0) + break; + } + } + } + return(pbuf - (unsigned char *)buf); +} + +static void read_proc_info (void) +{ + int bytes, calibrate; + int i, j; +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t min_latency; +#endif + struct trace_entry *tep1 = NULL, *tep2 = NULL; + + char *filename = "/proc/trace"; + int file; + + if (dump) + filename = dump; + + file=open(filename, O_RDONLY); + + if (!file) { + char message[BUFSIZ]; + sprintf(message, "%s missing\n", filename); + perror(message); + exit(-1); + } + if (lock && !dump && flock(file, LOCK_EX)) { + char message[BUFSIZ]; + sprintf(message, "Cannot get exclusive lock on %s\n", filename); + perror(message); + exit(-1); + } + + tt=(struct trace_table *)malloc(sizeof(*trace_table)); + + if (dump) { + printf("Reading dumped /proc/trace from %s ...", dump); + fflush(stdout); + bytes = read_dump(file, tt, sizeof(*trace_table)); + printf(" done\n"); + fflush(stdout); + } + else + bytes = read(file, tt, sizeof(*trace_table)); + + if (sizeof(*trace_table) != bytes) { + printf("something went wrong, bytes read: %d, tried: %d.\n", bytes, sizeof(*trace_table)); + exit(-1); + } + + if (lock && !dump && flock(file, LOCK_UN)) { + char message[BUFSIZ]; + sprintf(message, "Release lock on %s failed\n", filename); + perror(message); + } + + /* + * Pass 1: look for ~0 which signals calibration latencies. + * Since read_trace (fs/proc/array.c) locks the table and turns + * off mcount processing, the calibration entries should be the + * current entry and the previous TRACE_CALIBRATION_CALLS-1. + */ +#define FIRST_CALIBRATE (tt->curr_call-(TRACE_CALIBRATION_CALLS-1)) + +#ifdef CONFIG_TRACE_TIMESTAMP + min_latency = ~0; +#endif + calibrate = 0; + + if (!dump) { + /* look for read_trace in 200 entries before FIRST_CALIBRATE. + * 200 is arbitrary, normally read_trace is immediately before + * the first calibration but there is a small window between + * read_trace starting and tracing being suspended, other cpu's + * and/or interrupts can appear in that window. KAO + */ + for (j = 1; j <= 200; ++j) { + tep1 = &(tt->entries[ring(FIRST_CALIBRATE-j)]); + i = match_pc(tep1->pc); + if (!strcmp(sysmap_table[i].name," read_trace")) + break; + } + if (strcmp(sysmap_table[i].name," read_trace")) { + tep1 = &(tt->entries[ring(FIRST_CALIBRATE-1)]); + i = match_pc(tep1->pc); + fprintf(stderr, + "hmm, no 'read_trace', possibly wrong System.map?.\npc %lx proc %s\n", + tep1->pc, sysmap_table[i].name); + } + } + + for (i = FIRST_CALIBRATE; i < tt->curr_call; i++) { + tep1 = &(tt->entries[ring(i)]); + tep2 = &(tt->entries[ring(i+1)]); + if (tep1->pc == ~0 && tep2->pc == ~0) { +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t delta; + delta = tep2->timestamp - tep1->timestamp; + if (delta < min_latency) + min_latency=delta; +#endif /* CONFIG_TRACE_TIMESTAMP */ + ++calibrate; + } + } + + if (calibrate != TRACE_CALIBRATION_CALLS-1) { + fprintf(stderr,"huh, incorrect number of calibration entries found (%d)?.\n", calibrate); +#ifdef CONFIG_TRACE_TIMESTAMP + fprintf(stderr,"using 0.13 usecs.\n"); /*MIKEDIDIT was .39 (p5-150?)*/ + min_latency = 0.13*speed; + } else { + printf("calibration done, estimated measurement latency: %3.2f microseconds.\n", min_latency/(double)speed); + if (min_latency == 0) { + printf("Warning: latency is zero, does your cpu really support timestamps?\n"); + } + else + min_latency -= 10; +#endif /* CONFIG_TRACE_TIMESTAMP */ + } + printf("\n"); + + + /* Pass 2. */ + + for (i = 1; i <= CONFIG_TRACE_SIZE; i++) { + unsigned int idx; +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t delta = -1; +#endif /* CONFIG_TRACE_TIMESTAMP */ + + tep1 = &(tt->entries[ring(tt->curr_call+i)]); + if (tep1->pc == 0) + continue; /* trace table has been cleared */ +#ifdef CONFIG_TRACE_TIMESTAMP +#if CPU_PRESENT + for (j = 1; j <= CONFIG_TRACE_SIZE-i; ++j) { + tep2 = &(tt->entries[ring(tt->curr_call+i+j)]); + if (tep2->pc == 0) + break; + if (tep1->cpu == tep2->cpu) { + delta = tep2->timestamp - tep1->timestamp; + break; + } + } +#else /* CPU_PRESENT */ + tep2 = &(tt->entries[ring(tt->curr_call+i+1)]); + if (tep2->pc != 0 && i < CONFIG_TRACE_SIZE) + delta = tep2->timestamp - tep1->timestamp; +#endif /* CPU_PRESENT */ +#endif /* CONFIG_TRACE_TIMESTAMP */ + + idx = match_pc(tep1->pc); + +#if 0 /* testing only */ +#ifdef CONFIG_TRACE_TIMESTAMP +#ifdef CONFIG_TRACE_TRUNCTIME + printf("%08x ", tep1->timestamp); +#else + printf("%08llx%08llx ", tep1->timestamp >> 32, + tep1->timestamp & 0xffffffff); +#endif +#endif /* CONFIG_TRACE_TIMESTAMP */ +#endif + printf("%08lx %s +<%lx/%lx>", + tep1->pc, + sysmap_table[idx].name, + tep1->pc-sysmap_table[idx].pc, + sysmap_table[idx+1].pc - sysmap_table[idx].pc); +#ifdef CONFIG_TRACE_TIMESTAMP + if (delta == -1) + printf(" (????)"); + else if (tep1->pc == ~0) + printf(" (%3.08f raw)", + (double)delta); + else + printf(" (%3.02f)", + (delta-min_latency)/(double)speed); +#endif /* CONFIG_TRACE_TIMESTAMP */ +#if CPU_PRESENT + printf(" cpu(%d)", tep1->cpu); +#endif +#ifdef CONFIG_TRACE_PID + if (tep1->pid == tep2->pid) + printf(" pid(%d)", tep1->pid); + else + printf(" pid(%d->%d)", tep1->pid, tep2->pid); +#endif /* CONFIG_TRACE_PID */ + printf("\n"); + } + + free(tt); + close(file); + + printf("\n"); +} + +int main(int argc, char * * argv) +{ + int c, option_index = 0; + char *endptr; + struct option long_options[] = { + {"speed", 1, 0, 's'}, + {"map", 1, 0, 'm'}, + {"lock", 0, 0, 'l'}, + {"dump", 1, 0, 'd'}, + {0, 0, 0, 0} + }; + + prog_name = argv[0]; + speed = default_speed; + map = default_map; + + while (1) { + c = getopt_long_only (argc, argv, "s:m:ld:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 's': + speed = strtol(optarg, &endptr, 0); + if (*endptr) { + fprintf(stderr, "speed is not numeric '%s'\n", + optarg); + usage(); + } + if (speed < 0 || speed > 1000) { + fprintf(stderr, "speed must be 1-1000\n"); + usage(); + } + break; + + case 'm': + map = optarg; + break; + + case 'l': + lock = !lock; + break; + + case 'd': + dump = optarg; + break; + + case '?': + usage(); + exit(-1); + + default: + printf ("?? getopt returned character code 0%o '%c' ??\n", c, c); + } + } + + if (optind < argc) { + fprintf (stderr, "Unknown parameter '%s'\n", argv[optind]); + usage(); + exit(-1); + } + + printf("Speed: %d. Map: %s\n", speed, map); + + read_sysmap(); + read_proc_info(); + return 0; +} + +#else +#warning ktrace does nothing unless CONFIG_TRACE is set +int main(void) { return 0; } +#endif /* CONFIG_TRACE */