diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/Makefile 320-kcg/Makefile --- 310-irqbal_fast/Makefile 2003-12-02 14:57:26.000000000 -0800 +++ 320-kcg/Makefile 2003-12-02 14:57:55.000000000 -0800 @@ -439,6 +439,10 @@ ifndef CONFIG_FRAME_POINTER CFLAGS += -fomit-frame-pointer endif +ifeq ($(CONFIG_MCOUNT),y) +CFLAGS += -pg +endif + ifdef CONFIG_DEBUG_INFO CFLAGS += -g endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/Kconfig 320-kcg/arch/i386/Kconfig --- 310-irqbal_fast/arch/i386/Kconfig 2003-12-02 14:57:27.000000000 -0800 +++ 320-kcg/arch/i386/Kconfig 2003-12-02 14:57:55.000000000 -0800 @@ -1544,6 +1544,14 @@ config X86_MPPARSE depends on X86_LOCAL_APIC && !X86_VISWS default y +config MCOUNT + bool "Generate function call graph" + depends on FRAME_POINTER + help + This option instruments the kernel to generate a deterministic + function call graph. Answering Y here will make your kernel run + ???% slower. + endmenu source "security/Kconfig" diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/boot/compressed/Makefile 320-kcg/arch/i386/boot/compressed/Makefile --- 310-irqbal_fast/arch/i386/boot/compressed/Makefile 2003-12-02 14:57:27.000000000 -0800 +++ 320-kcg/arch/i386/boot/compressed/Makefile 2003-12-02 14:57:55.000000000 -0800 @@ -10,6 +10,17 @@ EXTRA_AFLAGS := -traditional CFLAGS := $(CFLAGS_NOGCOV) LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 +ifeq ($(CONFIG_MCOUNT),y) +quiet_cmd_nopg = CC $@ + cmd_nopg = $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/$(*F).c -o $@ + +$(obj)/misc.o: alwayscc + $(call cmd,nopg) + +alwayscc: + $(Q)rm -f $(obj)/misc.o +endif + $(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/kernel/i386_ksyms.c 320-kcg/arch/i386/kernel/i386_ksyms.c --- 310-irqbal_fast/arch/i386/kernel/i386_ksyms.c 2003-10-14 15:50:11.000000000 -0700 +++ 320-kcg/arch/i386/kernel/i386_ksyms.c 2003-12-02 14:57:55.000000000 -0800 @@ -186,6 +186,11 @@ extern void * memcpy(void *,const void * EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(memset); +#ifdef CONFIG_MCOUNT +extern void mcount(void); +EXPORT_SYMBOL_NOVERS(mcount); +#endif + #ifdef CONFIG_HAVE_DEC_LOCK EXPORT_SYMBOL(atomic_dec_and_lock); #endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/lib/Makefile 320-kcg/arch/i386/lib/Makefile --- 310-irqbal_fast/arch/i386/lib/Makefile 2003-12-02 14:53:25.000000000 -0800 +++ 320-kcg/arch/i386/lib/Makefile 2003-12-02 14:57:55.000000000 -0800 @@ -11,3 +11,4 @@ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o lib-$(CONFIG_KGDB) += kgdb_serial.o lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o +lib-$(CONFIG_MCOUNT) += mcount.o diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/lib/mcount.S 320-kcg/arch/i386/lib/mcount.S --- 310-irqbal_fast/arch/i386/lib/mcount.S 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/arch/i386/lib/mcount.S 2003-12-02 14:57:55.000000000 -0800 @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2000 SGI + * + * Written by Dimitris Michailidis dimitris@sgi.com + * + * This file implements mcount(), which is used to collect profiling data. + * We provide several variants to accomodate different types of callers at + * the lowest possible overhead. + */ + +#include +#include + +#define MCOUNT_HEAD \ + pushl %ecx /* We must protect the arguments of FASTCALLs */; \ + movl mcount_hook, %ecx; \ + testl %ecx, %ecx; \ + jz 1f; \ + pushl %eax; \ + pushl %edx; \ + movl 12(%esp), %edx /* mcount()'s parent */ + +#define MCOUNT_TAIL \ + call *%ecx; \ + popl %edx; \ + popl %eax; \ +1: popl %ecx + +/* + * This is the main variant and is called by C code. GCC's -pg option + * automatically instruments every C function with a call to this. + */ +ENTRY(mcount) +#if defined(CONFIG_MCOUNT) + MCOUNT_HEAD +#ifdef CONFIG_FRAME_POINTER + movl 4(%ebp), %eax /* mcount()'s parent's parent */ +#endif + MCOUNT_TAIL +#endif + ret diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/Kconfig 320-kcg/arch/ppc64/Kconfig --- 310-irqbal_fast/arch/ppc64/Kconfig 2003-12-02 14:57:27.000000000 -0800 +++ 320-kcg/arch/ppc64/Kconfig 2003-12-02 14:57:55.000000000 -0800 @@ -408,6 +408,14 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config MCOUNT + bool "Generate function call graph" + depends on DEBUG_KERNEL + help + This option instruments the kernel to generate a deterministic + function call graph. Answering Y here will make your kernel run + 1-2% slower. + endmenu source "security/Kconfig" diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/kernel/Makefile 320-kcg/arch/ppc64/kernel/Makefile --- 310-irqbal_fast/arch/ppc64/kernel/Makefile 2003-10-01 11:40:44.000000000 -0700 +++ 320-kcg/arch/ppc64/kernel/Makefile 2003-12-02 14:57:55.000000000 -0800 @@ -5,6 +5,17 @@ EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds.s +ifeq ($(CONFIG_MCOUNT),y) +quiet_cmd_nopg = CC $@ + cmd_nopg = $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/$(*F).c -o $@ + +$(obj)/stab.o: alwayscc + $(call cmd,nopg) + +alwayscc: + $(Q)rm -f $(obj)/stab.o +endif + obj-y := setup.o entry.o traps.o irq.o idle.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o stab.o htab.o pacaData.o \ diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/lib/Makefile 320-kcg/arch/ppc64/lib/Makefile --- 310-irqbal_fast/arch/ppc64/lib/Makefile 2003-06-19 14:41:18.000000000 -0700 +++ 320-kcg/arch/ppc64/lib/Makefile 2003-12-02 14:57:55.000000000 -0800 @@ -4,3 +4,4 @@ lib-y := checksum.o dec_and_lock.o string.o strcase.o lib-y += copypage.o memcpy.o copyuser.o +lib-$(CONFIG_MCOUNT) += mcount.o diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/lib/mcount.S 320-kcg/arch/ppc64/lib/mcount.S --- 310-irqbal_fast/arch/ppc64/lib/mcount.S 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/arch/ppc64/lib/mcount.S 2003-12-02 14:57:55.000000000 -0800 @@ -0,0 +1,61 @@ +/* + * Written by Adam Litke (agl@us.ibm.com) + * + * This file implements mcount(), which is used to collect profiling data. + * + */ + +#include +#include +#include + +/* + * This is called by C code in all files compiled with -pg + */ + +_GLOBAL(_mcount) + /* Store parameter regs on stack */ + std r3, -16(r1) + std r4, -24(r1) + std r5, -32(r1) + std r6, -40(r1) + std r7, -48(r1) + std r8, -56(r1) + std r9, -64(r1) + std r10, -72(r1) + + /* Set up new stack frame */ + mflr r0 + std r0, 16(r1) + mfcr r0 + std r0, 8(r1) + stdu r1, -184(r1) + + /* If relocation is off skip mcount_entry */ + std r14, -8(r1) + mfmsr r14 + andi. r14, r14, MSR_IR + cmpldi r14, 0 + ld r14, -8(r1) + beq 1f + + /* Call mcount_entry */ + bl .mcount_entry + ori 0,0,0 + +1: + /* Put everything back */ + addi r1, r1, 184 + ld r0, 16(r1) + mtlr r0 + ld r0, 8(r1) + mtcr r0 + ld r3, -16(r1) + ld r4, -24(r1) + ld r5, -32(r1) + ld r6, -40(r1) + ld r7, -48(r1) + ld r8, -56(r1) + ld r9, -64(r1) + ld r10, -72(r1) + blr diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/fs/proc/proc_misc.c 320-kcg/fs/proc/proc_misc.c --- 310-irqbal_fast/fs/proc/proc_misc.c 2003-12-02 14:56:23.000000000 -0800 +++ 320-kcg/fs/proc/proc_misc.c 2003-12-02 14:57:55.000000000 -0800 @@ -51,6 +51,10 @@ #include #include +#ifdef CONFIG_MCOUNT +#include +#endif + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -854,4 +858,13 @@ void __init proc_misc_init(void) entry->proc_fops = &ppc_htab_operations; } #endif +#ifdef CONFIG_MCOUNT + { + extern struct file_operations mcount_operations; + extern struct proc_dir_entry *mcount_pde; + mcount_pde = create_proc_entry("mcount", S_IRUGO|S_IWUSR, NULL); + if (mcount_pde) + mcount_pde->proc_fops = &mcount_operations; + } +#endif } diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/include/asm-i386/atomic.h 320-kcg/include/asm-i386/atomic.h --- 310-irqbal_fast/include/asm-i386/atomic.h 2003-10-01 11:41:15.000000000 -0700 +++ 320-kcg/include/asm-i386/atomic.h 2003-12-02 14:57:56.000000000 -0800 @@ -58,6 +58,17 @@ static __inline__ void atomic_add(int i, :"ir" (i), "m" (v->counter)); } +/* Like the above but also returns the result */ +static __inline__ int atomic_add_return(int i, atomic_t *v) +{ + register int oldval; + __asm__ __volatile__( + LOCK "xaddl %2,%0" + :"=m" (v->counter), "=r" (oldval) + :"1" (i), "m" (v->counter) : "memory"); + return oldval + i; +} + /** * atomic_sub - subtract the atomic variable * @i: integer value to subtract diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/include/linux/mcount.h 320-kcg/include/linux/mcount.h --- 310-irqbal_fast/include/linux/mcount.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/include/linux/mcount.h 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,63 @@ +/* + * include/linux/mcount.h + * + * Implementation of kernel mcount handler and supporting functions. + * + * Code based on kernprof http://oss.sgi.com/projects/kernprof/ + * Copyright (C) SGI 1999, 2000, 2001 + * Written by Dimitris Michailidis (dimitris@engr.sgi.com) + * Modified by John Hawkes (hawkes@engr.sgi.com) + * Contributions from Niels Christiansen (nchr@us.ibm.com) + * Adapted for stand-alone call graphing by Adam Litke (agl@us.ibm.com) + */ + +#ifndef __MCOUNT_H +#define __MCOUNT_H + +#include +#include +#include + +#define DFL_PC_RES 4 /* default PC resolution for this platform */ +#define CG_MAX_ARCS (1 << (8 * sizeof(short))) +#define FUNCTIONPC(func) (*(unsigned long *)&(func)) + +#define pc_out_of_range(pc) \ + ((pc) < (unsigned long) &_stext || (pc) >= (unsigned long) &_etext) + +struct prof_mem_map +{ + unsigned long kernel_buckets; /* number of kernel buckets */ + unsigned long nr_cpus; /* number of processors whether profiled or not */ + unsigned long cg_from_size; /* size of one cg_from array */ + unsigned long cg_to_size; /* size of one cg_to array */ + unsigned long cg_to_offset; /* offset of cg_to array */ + unsigned long kernel_start; /* lowest text address in kernel */ + unsigned long kernel_end; /* highest text address in kernel */ +}; + +struct cg_arc_dest { + unsigned long address; + atomic_t count; + unsigned short link; + unsigned short pad; +}; + +#ifdef CONFIG_X86 +void cg_record_arc(unsigned long frompc, unsigned long selfpc) __attribute__((regparm(2))); +#endif + +int mcount_init(void); + +ssize_t mcount_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); + +ssize_t mcount_read(struct file * file, char * buf, + size_t count, loff_t *ppos); + +static struct file_operations mcount_operations = { + write: mcount_write, + read: mcount_read, +}; + +#endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/kernel/Makefile 320-kcg/kernel/Makefile --- 310-irqbal_fast/kernel/Makefile 2003-12-02 14:57:27.000000000 -0800 +++ 320-kcg/kernel/Makefile 2003-12-02 14:57:56.000000000 -0800 @@ -27,6 +27,17 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKCONFIG_PROC) += configs.o obj-$(CONFIG_X86_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_MCOUNT) += mcount.o + +ifeq ($(CONFIG_MCOUNT),y) +quiet_cmd_nopg = CC $@ + cmd_nopg = $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/$(*F).c -o $@ + +$(obj)/mcount.o: alwayscc + $(call cmd,nopg) +alwayscc: + $(Q)rm -f $(obj)/mcount.o +endif ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/kernel/mcount.c 320-kcg/kernel/mcount.c --- 310-irqbal_fast/kernel/mcount.c 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/kernel/mcount.c 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,203 @@ +/* + * kernel/mcount.c + * + * Implementation of kernel mcount handler and supporting functions. + * + * Code based on kernprof http://oss.sgi.com/projects/kernprof/ + * Copyright (C) SGI 1999, 2000, 2001 + * Written by Dimitris Michailidis (dimitris@engr.sgi.com) + * Modified by John Hawkes (hawkes@engr.sgi.com) + * Contributions from Niels Christiansen (nchr@us.ibm.com) + * Adapted for stand-alone call graphing by Adam Litke (agl@us.ibm.com) + */ + +#include +#include +#include +#include +#include +#include + +void UNKNOWN_KERNEL(void) {} /* Dummy functions to make profiles more */ +void UNKNOWN_MODULE(void) {} /* descriptive */ + +unsigned int mcount_shift, PC_resolution = DFL_PC_RES; + +char* memory_start = NULL; +unsigned short *cg_from_base = NULL; +struct cg_arc_dest *cg_to_base = NULL; +int cg_arc_overflow = 0; /* set when no new arcs can be added to the call graph */ +int n_buckets = 0; +size_t mem_needed; /* space needed for the call graph and the PC samples */ +extern char _stext, _etext, _sinittext, _einittext; + +void (*mcount_hook)(unsigned long, unsigned long) = NULL; +struct proc_dir_entry *mcount_pde; + +static int mcount_alloc_mem(void) +{ + unsigned long cg_from_size, cg_to_size; + size_t text_size = (unsigned long) &_etext - (unsigned long) &_stext; + struct prof_mem_map *memory_map; + + for (mcount_shift = 0; (1 << mcount_shift) < PC_resolution; mcount_shift++); + n_buckets = text_size >> mcount_shift; + cg_from_size = n_buckets * sizeof(short); + cg_to_size = CG_MAX_ARCS * sizeof(struct cg_arc_dest); + mem_needed = sizeof(struct prof_mem_map) + + ((cg_from_size + cg_to_size) * num_online_cpus()); + if ((memory_start = vmalloc(mem_needed)) == NULL) { + return -ENOMEM; + } + memset(memory_start, 0, mem_needed); + + cg_from_base = (unsigned short *) (memory_start + sizeof(struct prof_mem_map)); + cg_to_base = (struct cg_arc_dest *) (memory_start + sizeof(struct prof_mem_map) + + (cg_from_size * num_online_cpus())); + + memory_map = (struct prof_mem_map*) memory_start; + memory_map->kernel_buckets = n_buckets; + memory_map->nr_cpus = num_online_cpus(); + memory_map->cg_from_size = cg_from_size; + memory_map->cg_to_size = cg_to_size; + memory_map->cg_to_offset = cg_from_size * num_online_cpus(); + memory_map->kernel_start = (unsigned long)&_stext; + memory_map->kernel_end = (unsigned long)&_etext; + return 0; +} + +static void mcount_free_mem(void) +{ + vfree(memory_start); + memory_start = NULL; +} + +void mcount_entry(void) +{ + unsigned long frompc, selfpc; + + if(mcount_hook) { + frompc = (unsigned long)__builtin_return_address(2); + selfpc = (unsigned long)__builtin_return_address(1); + mcount_hook(frompc, selfpc); + } + return; +} + +/* Record an arc traversal in the call graph. Called by mcount(). SMP safe */ +void cg_record_arc(unsigned long frompc, unsigned long selfpc) +{ + static spinlock_t cg_record_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; + int toindex, fromindex, cpu; + unsigned short *q, *cg_from; + struct cg_arc_dest *p, *cg_to; + + cpu = smp_processor_id(); + + cg_from = &cg_from_base[n_buckets * cpu]; + cg_to = &cg_to_base[CG_MAX_ARCS * cpu]; + + if (pc_out_of_range(frompc)) + fromindex = (FUNCTIONPC(UNKNOWN_KERNEL) - (unsigned long) &_stext) + >> mcount_shift; + else + fromindex = (frompc - (unsigned long) &_stext) >> mcount_shift; + q = &cg_from[fromindex]; + + /* Easy case: the arc is already in the call graph */ + for (toindex = *q; toindex != 0; ) { + p = &cg_to[toindex]; + if (p->address == selfpc) { + atomic_inc(&p->count); + return; + } + toindex = p->link; + } + /* + * No luck. We need to add a new arc. Since cg_to[0] is unused, + * we use cg_to[0].count to keep track of the next available arc. + */ + if (cg_arc_overflow) { + return; + } + toindex = atomic_add_return(1, &cg_to->count); + if (toindex >= CG_MAX_ARCS) { + /* + * We have run out of space for arcs. We'll keep incrementing + * the existing ones but we won't try to add any more. + */ + cg_arc_overflow = 1; + atomic_set(&cg_to->count, CG_MAX_ARCS - 1); + return; + } + /* + * We have a secured slot for a new arc and all we need to do is + * initialize it and add it to a hash bucket. We use compare&swap, if + * possible, to avoid any spinlocks whatsoever. + */ + p = &cg_to[toindex]; + p->address = selfpc; + atomic_set(&p->count, 1); + + spin_lock_irqsave(&cg_record_lock, flags); + p->link = *q; + *q = toindex; + spin_unlock_irqrestore(&cg_record_lock, flags); + return; +} + +int mcount_start(void) +{ + if (!memory_start) { + if(mcount_alloc_mem()) + return -ENOMEM; + mcount_pde->size = mem_needed; + } + mcount_hook = cg_record_arc; + return 0; +} + +int mcount_stop(void) +{ + mcount_hook = NULL; + return 0; +} + +int mcount_cleanup(void) +{ + mcount_stop(); + mcount_pde->size = 0; + mcount_free_mem(); + return 0; +} + +ssize_t mcount_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + count = (count + *ppos >= mcount_pde->size) ? + mcount_pde->size - *ppos : count; + copy_to_user(buf, memory_start + *ppos, count); + *ppos += count; + return count; +} + +ssize_t mcount_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + int ret; + + switch (buf[0]) { + case '0': + ret = mcount_cleanup(); + break; + case '1': + ret = mcount_stop(); + break; + case '2': + ret = mcount_start(); + default: + ret = -EINVAL; + } + return (ret == 0) ? count : ret; +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/Makefile 320-kcg/scripts/readcg-0.3/Makefile --- 310-irqbal_fast/scripts/readcg-0.3/Makefile 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/Makefile 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,43 @@ +###################################################### +# readcg - Makefile # +# A super-simplistic make process. Any help making # +# this better is welcomed. # +###################################################### + +CROSS_COMPILE = /usr +CC = $(CROSS_COMPILE)/bin/g++ +CFLAGS = -g -Wall + +# Uncomment this for ppc64 machines +# CFLAGS += -D__powerpc64__ + +CPP_FILES = call_graph.cpp functions.cpp raw_graph.cpp readcg.cpp system_map.cpp +OBJ_FILES = call_graph.o functions.o raw_graph.o main.o system_map.o +CLEAN_FILES = $(OBJ_FILES) readcg +INSTALLROOT = /usr + +all: readcg + +readcg: $(OBJ_FILES) + $(CC) $(OBJ_FILES) -o $@ + +call_graph.o: call_graph.cpp + $(CC) $(CFLAGS) -c $< + +functions.o: functions.cpp + $(CC) $(CFLAGS) -c $< + +raw_graph.o: raw_graph.cpp + $(CC) $(CFLAGS) -c $< + +main.o: main.cpp + $(CC) $(CFLAGS) -c $< + +system_map.o: system_map.cpp + $(CC) $(CFLAGS) -c $< + +install: + install -s -m 0755 readcg $(INSTALLROOT)/sbin + +clean: + rm -f $(CLEAN_FILES) diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/README 320-kcg/scripts/readcg-0.3/README --- 310-irqbal_fast/scripts/readcg-0.3/README 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/README 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,44 @@ +READNE - readcg-0.2 + +10-14-03: Initial Release +10-20-03: Code rewrite in C++ to improve readabiliy and + flexibility. +10-30-03: Add support for 64-bit architectures (ppc64 for now). + +readcg is the utility for printing kernel call graphs. This +tool is in its early stages and most certainly has bugs. +Please email me to report bugs or to submit +patches. + + +KNOWN ISSUES: + +Some kernel functions call the same function from different +places. For this reason, seemingly duplicate entries can show +up in the call graph. There is some value in distinguishing +between these calls and I am currently working out the best +solution for displaying this case. Suggestions are welcome. + + +QUICK START: + +(1.) Obtain readcg-xx.tar.gz and kcg-X.X.X-V.patch from the lse + project page (www.sf.net/projects/lse). + +(2.) Patch your kernel and build it. Make sure 'Generate + function call graph' in the 'Kernel Hacking' section is + turned on. Install your kernel and boot it. + +(3.) Unpack readcg and descend into the readcg directory. + Edit the Makefile. Set CROSS_COMPILE to find the correct + g++ (the default should work for most people). If you + will be generating call graphs from a 64-bit kernel, + uncomment the line documented in the Makefile. Build the + tool by typing 'make'. + +(4.) To generate a call graph do the following: + readcg -c # Clear counters + readcg -e # Enable profiling + + readcg -d # Disable profiling + readcg -m # Generate call graph diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/call_graph.cpp 320-kcg/scripts/readcg-0.3/call_graph.cpp --- 310-irqbal_fast/scripts/readcg-0.3/call_graph.cpp 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/call_graph.cpp 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,103 @@ +#include +#include +#include +#include "call_graph.h" +#include "functions.h" + +call_graph::call_graph(string _graph_file, string map_file) +{ + graph_file = _graph_file; + raw_data = raw_graph(graph_file); + a2n = system_map(map_file); + parse_raw_graph(); +} + +void call_graph::cmd(const char c) +{ + ofstream out (graph_file.c_str()); + + if (!out.is_open()) { + perror(graph_file.c_str()); + exit(1); + } + + out.put(c); + out.close(); + return; +} + + +void call_graph::parse_raw_graph() +{ + unsigned long findex; + unsigned short from_bucket; + cg_arc_dest *to_bucket; + src_function src; + dest_function dest; + kaddress_t src_addr, dest_addr; + + for (findex = 0; findex < raw_data.get_num_buckets(); ++findex) { + src_addr = raw_data.get_from_address(findex); + from_bucket = raw_data.get_from_bucket(0, findex); + to_bucket = raw_data.get_to_bucket(0, from_bucket); + while (to_bucket) { + dest_addr = to_bucket->address; + src = src_function(a2n.lookup(src_addr)); + dest = dest_function(a2n.lookup(dest_addr), to_bucket->count); + insert(src, dest); + to_bucket = raw_data.get_to_bucket(0, to_bucket->link); + } + } + sort_graph(mapping); + sort_graph(rmapping); +} + +void call_graph::insert(src_function src, dest_function dest) +{ + vector *to_list; + src_function rsrc; + dest_function rdest; + + to_list = &mapping[src]; + to_list->insert(to_list->end(), dest); + + rsrc = src_function(dest.name); + rdest = dest_function(src.name, dest.count); + to_list = &rmapping[rsrc]; + to_list->insert(to_list->end(), rdest); +} + +void call_graph::sort_graph(map > &mapping) +{ + map >::iterator it; + + for (it = mapping.begin(); it != mapping.end(); ++it) + sort(it->second.begin(), it->second.end()); +} + +void call_graph::print_vec(vector vec) const +{ + unsigned int i; + string prev_name = ""; + for (i = 0; i < vec.size(); ++i) + printf(" %10u %s\n", vec[i].count, vec[i].name.c_str()); +} + +void call_graph::print() const +{ + map >::const_iterator it; + map >::const_iterator rit; + vector callee_list; + vector caller_list; + + for (it = mapping.begin(); it != mapping.end(); it++) { + callee_list = it->second; + rit = rmapping.find(it->first); + caller_list = rit->second; + + printf("==================================================\n"); + print_vec(caller_list); + printf("%s\n", it->first.name.c_str()); + print_vec(callee_list); + } +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/call_graph.h 320-kcg/scripts/readcg-0.3/call_graph.h --- 310-irqbal_fast/scripts/readcg-0.3/call_graph.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/call_graph.h 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,77 @@ +#ifndef __CALL_GRAPH_H +#define __CALL_GRAPH_H + +#include +#include +#include "readcg.h" +#include "functions.h" +#include "raw_graph.h" +#include "system_map.h" +using namespace std; + +#define CMD_CLEAR '0' +#define CMD_DISABLE '1' +#define CMD_ENABLE '2' + +/* + * A container for the object -- making the data easier to work with + */ +class call_graph { +private: + map > mapping; + map > rmapping; + string graph_file; + raw_graph raw_data; + system_map a2n; + + /* + * Create mapping and rmapping from raw data + */ + void parse_raw_graph(); + + /* + * Sort all vectors in a mapping + */ + void sort_graph(map > &mapping); + + /* + * Insert a function call into both msppings + */ + void insert(src_function src, dest_function dest); + + /* + * Print out the given vector + */ + void print_vec(vector vec) const; + + /* + * Send a command to kcg through the /proc interface + */ + void cmd(const char c); +public: + call_graph() { } + call_graph(string _graph_file) { graph_file = _graph_file; } + call_graph(string _graph_file, string map_file); + + /* + * Print a call graph report + */ + void print() const; + + /* + * Stop profiling and reset all data + */ + void clear() { cmd(CMD_CLEAR); } + + /* + * Start profiling + */ + void enable() { cmd(CMD_ENABLE); } + + /* + * Stop profiling + */ + void disable() { cmd(CMD_DISABLE); } +}; + +#endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/functions.cpp 320-kcg/scripts/readcg-0.3/functions.cpp --- 310-irqbal_fast/scripts/readcg-0.3/functions.cpp 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/functions.cpp 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,22 @@ +#include "functions.h" + +bool src_function::operator<(const src_function &RHS) const +{ + return name < RHS.name; +} + + +dest_function::dest_function(string _name, unsigned int _count) +{ + name = _name; + count = _count; +} + +bool dest_function::operator<(const dest_function &RHS) const +{ + if (count > RHS.count) + return true; + if ((count == RHS.count) && (name < RHS.name)) + return true; + return false; +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/functions.h 320-kcg/scripts/readcg-0.3/functions.h --- 310-irqbal_fast/scripts/readcg-0.3/functions.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/functions.h 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,29 @@ +#ifndef __FUNCTIONS_H +#define __FUNCTIONS_H + +#include +using namespace std; + +class src_function { +private: + +public: + string name; + src_function() { name = ""; } + src_function(string _name) { name = _name; } + bool operator<(const src_function &RHS) const; +}; + +class dest_function { +private: + +public: + string name; + unsigned int count; + dest_function() { name = ""; count = 0; } + dest_function(string _name) { name = _name; } + dest_function(string _name, unsigned int _count); + bool operator<(const dest_function &RHS) const; +}; + +#endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/main.cpp 320-kcg/scripts/readcg-0.3/main.cpp --- 310-irqbal_fast/scripts/readcg-0.3/main.cpp 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/main.cpp 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "readcg.h" +#include "call_graph.h" +using namespace std; + +#define CG_FILE "/proc/mcount" +#define MAP_FILE "System.map" + +void usage(char *prgname) +{ + cerr << "Usage: " << prgname << " [ -e | -d | -c | -m ]\n"; + cerr << "\t-m Print profile\n"; + cerr << "\t-e Enable profiler\n"; + cerr << "\t-d Disable profiler\n"; + cerr << "\t-c Clear profile data\n"; + exit(1); +} + +int main(int argc, char **argv) +{ + int c; + char *mapfile = NULL; + call_graph kcg; + bool print_flag = false, cmd_flag = false; + char cmd; + + while ((c = getopt(argc, argv, "edcm:")) != -1) { + switch(c) { + case 'm': + mapfile = optarg; + print_flag = true; + break; + case 'e': + cmd = CMD_ENABLE; + cmd_flag = true; + break; + case 'd': + cmd = CMD_DISABLE; + cmd_flag = true; + break; + case 'c': + cmd = CMD_CLEAR; + cmd_flag = true; + break; + default: + usage(argv[0]); + } + } + + if (print_flag) { + kcg = call_graph(CG_FILE, mapfile); + kcg.print(); + } else if (cmd_flag) { + kcg = call_graph(CG_FILE); + switch(cmd) { + case CMD_ENABLE: + kcg.enable(); + break; + case CMD_DISABLE: + kcg.disable(); + break; + case CMD_CLEAR: + kcg.clear(); + } + } else { + usage(argv[0]); + } + return 0; +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/raw_graph.cpp 320-kcg/scripts/readcg-0.3/raw_graph.cpp --- 310-irqbal_fast/scripts/readcg-0.3/raw_graph.cpp 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/raw_graph.cpp 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,144 @@ +#include +#include +#include "raw_graph.h" + +raw_graph::raw_graph(const string filename) { + graph_file = filename; + read(); + merge(); +} + +void raw_graph::read() +{ + ifstream file; + long size; + + file.open(graph_file.c_str(), ios::in|ios::binary|ios::ate); + + if (!file.is_open()) { + perror(graph_file.c_str()); + exit(1); + } + + size = file.tellg(); + file.seekg(0, ios::beg); + + // Get file header + file.read((char*) &header, sizeof(graph_header)); + + // Get raw data + data = new char[size - sizeof(graph_header)]; + + if (!data) { + perror("new"); + exit(1); + } + + file.read(data, size - sizeof(graph_header)); + file.close(); +} + +kaddress_t raw_graph::get_from_address(unsigned long index) { + return header.kernel_start + index * PC_RES; +} + +unsigned short raw_graph::get_from_bucket(int cpu, unsigned long index) { + unsigned short *from_segment; /* first bucket on this cpu */ + + if (index >= header.kernel_buckets) + return 0; + from_segment = (unsigned short *)\ + (data + cpu * header.cg_from_size); + return from_segment[index]; +} + +void raw_graph::set_from_bucket(int cpu, unsigned long index, unsigned short val) +{ + unsigned short *from_segment; /* first bucket on this cpu */ + + if (index >= header.kernel_buckets) + return; + from_segment = (unsigned short *)\ + (data + cpu * header.cg_from_size); + from_segment[index] = val; + return; +} + +cg_arc_dest *raw_graph::get_to_bucket(int cpu, int index) { + cg_arc_dest *to_segment; /* first bucket on this cpu */ + + if (index == 0) // bucket 0 is reserved + return NULL; // use get_to_count() + to_segment = (cg_arc_dest *)\ + (data + header.cg_to_offset + cpu * header.cg_to_size); + return &to_segment[index]; +} + +int raw_graph::get_to_count(int cpu) { + cg_arc_dest *to_segment; /* first bucket on this cpu */ + + to_segment = (cg_arc_dest *)\ + (data + header.cg_to_offset + cpu * header.cg_to_size); + return to_segment[0].count; +} + +void raw_graph::set_to_count(int cpu, int val) { + cg_arc_dest *to_segment; /* first bucket on this cpu */ + + to_segment = (cg_arc_dest *)\ + (data + header.cg_to_offset + cpu * header.cg_to_size); + to_segment[0].count = val; + return; +} + +cg_arc_dest *raw_graph::get_free_to_bucket(int cpu) { + int to_count; + + to_count = get_to_count(cpu); + if (to_count >= CG_MAX_ARCS) + return NULL; + else { + set_to_count(cpu, ++to_count); + return get_to_bucket(cpu, to_count); + } +} + +void raw_graph::combine_to_entries(const unsigned long findex, int cpu) { + cg_arc_dest *src, *base, *dst; + + src = get_to_bucket(cpu, get_from_bucket(cpu, findex)); + + while (src) { + base = get_to_bucket(0, get_from_bucket(0, findex)); + dst = base; + + /* Try to find a match on cpu 0 */ + while (dst) { + if (dst->address == src->address) { + dst->count += src->count; + break; + } + dst = get_to_bucket(0, dst->link); + } + /* No luck, try to add a new bucket */ + if (!dst) { + dst = get_free_to_bucket(0); + if (dst) { + dst->link = get_from_bucket(0, findex); + dst->address = src->address; + dst->count = src->count; + set_from_bucket(0, findex, get_to_count(0)); + } + } + src = get_to_bucket(cpu, src->link); + } +} + +void raw_graph::merge() +{ + unsigned long findex, cpu; + + for (findex = 0; findex < header.kernel_buckets; ++findex) + for(cpu = 1; cpu < header.nr_cpus; ++cpu) + combine_to_entries(findex, cpu); +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/raw_graph.h 320-kcg/scripts/readcg-0.3/raw_graph.h --- 310-irqbal_fast/scripts/readcg-0.3/raw_graph.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/raw_graph.h 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,138 @@ +#ifndef __RAW_GRAPH_H +#define __RAW_GRAPH_H + +#include "readcg.h" +#include +using namespace std; + +#define PC_RES 4 +#define CG_MAX_ARCS (1 << (8 * sizeof(short))) + + +struct graph_header { + /* Number of from_buckets needed to cover kernel text */ + kaddress_t kernel_buckets; + + /* Number of cpus profiled */ + kaddress_t nr_cpus; + + /* Bytes needed for from_buckets on one cpu */ + kaddress_t cg_from_size; + + /* Bytes needed for to_buckets on one cpu */ + kaddress_t cg_to_size; + + /* offset into file where first to_bucket starts */ + kaddress_t cg_to_offset; + + /* start address of kernel text */ + kaddress_t kernel_start; + + /* first address after end of kernel text */ + kaddress_t kernel_end; +}; + +struct cg_arc_dest { + kaddress_t address; + int count; + unsigned short link; + unsigned short pad; +}; + +/******************************************************************** + * raw_graph - representation of the data as stored in /proc/mcount * + * The data format is fairly complex and almost impossible to * + * figure out from the code so hopefully the following diagram will * + * help explain the format. * + * * + * /---------------------------------\ ... * + * | | F = Offset into * + * |T T->T T->T | CPU 1 array of T's * + * |I T T->T T->T T T->T->T->T | for on same CPU * + * |---------------------------------| * + * | | I = A 'T' used to * + * | | CPU 0 hold index of * + * |I T->T->T T T T->T T T T | next free slot * + * |---------------------------------| * + * | .... | ... T = cg_arg_dest: * + * |---------------------------------| 'to' address and * + * |FF F FFF F F F FFFFFFF | CPU 1 number of calls * + * |---------------------------------| * + * |FFF F FFF FFFFFF FFF F FFF| CPU 0 The offset into the * + * |---------------------------------| array of 'Fs' * + * | graph_header | translates into the * + * \---------------------------------/ from address * + * * + * A per-cpu hash table in a buffer 'Ts' belonging to * + * same 'F' are linked * + ********************************************************************/ + +class raw_graph { +private: + graph_header header; /* The first part is the header */ + char* data; /* Store the rest of the buffer */ + string graph_file; /* filename of raw graph */ + + /* + * Combine data from all CPUs into the CPU0 part of the table + */ + void merge(); + + /* + * Combine the hash buckets pointed to by findex on a cpu + * with the buckets at findex on CPU 0 + */ + void combine_to_entries(const unsigned long findex, int cpu); + + /* + * Return a pointer to the next available to_bucket on a cpu + */ + cg_arc_dest *get_free_to_bucket(int cpu); + + /* + * Return the number of to_buckets in use on a cpu + */ + int get_to_count(int cpu); + + /* + * Set the number of to_buckets in use on a cpu + */ + void set_to_count(int cpu, int val); + +public: + raw_graph() {} + raw_graph(const string filename); + + /* + * Read in a raw graph from the file 'graph_file' + */ + void read(); + + /* + * Return the total number of from buckets needed to cover the + * kernel text. + */ + unsigned long get_num_buckets() { return header.kernel_buckets; } + + /* + * Return the to_bucket on a cpu given an index from a from_bucket + */ + cg_arc_dest *get_to_bucket(int cpu, int index); + + /* + * Get the value of a from_bucket on a cpu + */ + unsigned short get_from_bucket(int cpu, unsigned long index); + + /* + * Set the value of a from_bucket on a cpu + */ + void set_from_bucket(int cpu, unsigned long index, unsigned short val); + + /* + * Given an index, calculate the approximate from address in kernel text + */ + kaddress_t get_from_address(unsigned long index); +}; + +#endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/readcg.h 320-kcg/scripts/readcg-0.3/readcg.h --- 310-irqbal_fast/scripts/readcg-0.3/readcg.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/readcg.h 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,10 @@ +#ifndef __READCG_H +#define __READCG_H + +#if defined(__powerpc64__) +typedef unsigned long long kaddress_t; +#else +typedef unsigned long kaddress_t; +#endif + +#endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/system_map.cpp 320-kcg/scripts/readcg-0.3/system_map.cpp --- 310-irqbal_fast/scripts/readcg-0.3/system_map.cpp 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/system_map.cpp 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,48 @@ +#include +#include +#include "system_map.h" + +void system_map::read(const string filename) +{ + ifstream mapfile(filename.c_str()); + pair tuple; + char line[LINE_LEN], mode[5], name[NAME_LEN]; + kaddress_t address; + + if (!mapfile.is_open()) { + perror(filename.c_str()); + exit(1); + } + + while (!mapfile.eof()) { + mapfile.getline(line, LINE_LEN); + sscanf(line, MAP_FMT, &address, mode, name); + if (*mode != 'T' && *mode != 't' && + *mode != 'W' && *mode != 'w') continue; + tuple.first = address; + tuple.second = string(name); + mapping.insert(mapping.end(), tuple); + } +} + +string system_map::lookup(const kaddress_t address) +{ + unsigned long low, high, mid; + + low = 0; + high = mapping.size(); + + while (low <= high) { + mid = low + (high - low) / 2; + if (mapping[mid].first == address || + (mapping[mid].first < address && + mapping[mid + 1].first > address)) + return mapping[mid].second; + else if (mapping[mid].first > address) + high = mid - 1; + else + low = mid + 1; + } + return ""; +} + diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.3/system_map.h 320-kcg/scripts/readcg-0.3/system_map.h --- 310-irqbal_fast/scripts/readcg-0.3/system_map.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.3/system_map.h 2003-12-02 14:57:56.000000000 -0800 @@ -0,0 +1,40 @@ +#ifndef __SYSTEM_MAP_H +#define __SYSTEM_MAP_H + +#include +#include +#include "readcg.h" +using namespace std; + +#define LINE_LEN 100 +#define NAME_LEN 35 + +#ifdef __powerpc64__ +#define MAP_FMT "%llx %s %s" +#else +#define MAP_FMT "%lx %s %s" +#endif + +/* + * Mapping of kernel text addresses to function names + */ +class system_map { +private: + vector > mapping; + + /* + * Read data from a System.map file + */ + void read(const string filename); +public: + system_map() { } + system_map(const string filename) { read(filename); } + + /* + * Return a function name corresponding to the given address + * A match is defined as: map[n] <= addr && map[n+1] > addr + */ + string lookup(const kaddress_t address); +}; + +#endif