From: David Mosberger This is a modified and slightly expanded version of Roland's earlier patch (see http://marc.theaimsgroup.com/?l=linux-kernel&m=106551701731039). What's modified is that I added a gate_map() macro which evaluates to NULL on platforms that don't use the gate DSO. With that, the new code in task_mmu.c will get optimized away without any ugly #ifdefs. The other difference is that I left vm_flags at 0, rather than turning on VM_READ+VM_EXEC. The reason is that I wanted to discourage apps from trying to read the fix-map area directly, because that may or may not work (e.g., on ia64, the executable portion of the gate-DSO is not readable). Roland, if this is a problem for gdb's gcore command, we need to find a better solution. What's new is that I added a sysctl that allows setting the path of the gate DSO so that users apps relying on /proc/PID/maps can read the DSO contents just like any other file. By default, the path is the empty string. The idea here is that there would be an init script which dumps the kernel's gate DSO to a file and then registers the path to that file via /proc/sys/kernel/gate_dso. I considered several other options, but this seems to me the most lightweight and least intrusive solution to the problem. Perhaps for 2.7.x the whole issue of special kernel mappings could be revisited. Here is a concrete example of how this works with a simple "bt" test-program which generates a stack trace that goes across a signal-handler: $ ./bt # (no gate DSO registered) 4000000000001210 40000000000017c0 a0000000000207e0 a000000000020641 2000000800183ca0 4000000000001c40 200000080015c9d0 <__libc_start_main+0x3e0> 4000000000001000 <_start+0x80> $ echo /boot/gate.so > /proc/sys/kernel/gate_dso $ ./bt 4000000000001210 40000000000017c0 a0000000000207e0 <__kernel_sigtramp+0xc0> a000000000020641 <__kernel_syscall_via_break+0x1> 2000000800183ca0 4000000000001990 200000080015c9d0 <__libc_start_main+0x3e0> 4000000000001000 <_start+0x80> Note that with the gate-DSO path in /proc/PID/maps, one process can unwind another process and still get the correct output. fs/proc/task_mmu.c | 31 ++++++++++++++++++++++++++++--- include/linux/sysctl.h | 1 + kernel/sysctl.c | 15 +++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff -puN fs/proc/task_mmu.c~fixmap-in-proc-pid-maps-ng fs/proc/task_mmu.c --- 25/fs/proc/task_mmu.c~fixmap-in-proc-pid-maps-ng 2003-12-14 22:31:10.000000000 -0800 +++ 25-akpm/fs/proc/task_mmu.c 2003-12-14 22:31:10.000000000 -0800 @@ -1,6 +1,7 @@ #include #include #include +#include #include char *task_mem(struct mm_struct *mm, char *buffer) @@ -75,6 +76,23 @@ int task_statm(struct mm_struct *mm, int return size; } +#ifdef AT_SYSINFO_EHDR + +char gate_dso_path[256] = ""; +static struct vm_area_struct gate_vmarea = { + /* Do _not_ mark this area as readable, cuz not the entire range may be readable + (e.g., due to execute-only pages or holes) and the tools that read + /proc/PID/maps should read the interesting bits from the gate-DSO file + instead. */ + .vm_start = FIXADDR_USER_START, + .vm_end = FIXADDR_USER_END +}; + +# define gate_map() &gate_vmarea +#else +# define gate_map() NULL +#endif + static int show_map(struct seq_file *m, void *v) { struct vm_area_struct *map = v; @@ -100,12 +118,15 @@ static int show_map(struct seq_file *m, map->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); - if (map->vm_file) { + if (map->vm_file || map == gate_map()) { len = 25 + sizeof(void*) * 6 - len; if (len < 1) len = 1; seq_printf(m, "%*c", len, ' '); - seq_path(m, file->f_vfsmnt, file->f_dentry, " \t\n\\"); + if (map == gate_map()) + seq_printf (m, "%s", gate_dso_path); + else + seq_path(m, file->f_vfsmnt, file->f_dentry, " \t\n\\"); } seq_putc(m, '\n'); return 0; @@ -128,6 +149,8 @@ static void *m_start(struct seq_file *m, if (!map) { up_read(&mm->mmap_sem); mmput(mm); + if (l == -1) + map = gate_map(); } return map; } @@ -135,7 +158,7 @@ static void *m_start(struct seq_file *m, static void m_stop(struct seq_file *m, void *v) { struct vm_area_struct *map = v; - if (map) { + if (map && map != gate_map()) { struct mm_struct *mm = map->vm_mm; up_read(&mm->mmap_sem); mmput(mm); @@ -149,6 +172,8 @@ static void *m_next(struct seq_file *m, if (map->vm_next) return map->vm_next; m_stop(m, v); + if (map != gate_map()) + return gate_map(); return NULL; } diff -puN include/linux/sysctl.h~fixmap-in-proc-pid-maps-ng include/linux/sysctl.h --- 25/include/linux/sysctl.h~fixmap-in-proc-pid-maps-ng 2003-12-14 22:31:10.000000000 -0800 +++ 25-akpm/include/linux/sysctl.h 2003-12-14 22:31:10.000000000 -0800 @@ -127,6 +127,7 @@ enum KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ KERN_HPPA_PWRSW=58, /* int: hppa soft-power enable */ KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */ + KERN_GATE_DSO=60, /* string: path to gate DSO file */ }; diff -puN kernel/sysctl.c~fixmap-in-proc-pid-maps-ng kernel/sysctl.c --- 25/kernel/sysctl.c~fixmap-in-proc-pid-maps-ng 2003-12-14 22:31:10.000000000 -0800 +++ 25-akpm/kernel/sysctl.c 2003-12-14 22:31:10.000000000 -0800 @@ -37,6 +37,7 @@ #include #include #include +#include #include #ifdef CONFIG_ROOT_NFS @@ -65,6 +66,9 @@ extern int min_free_kbytes; static int maxolduid = 65535; static int minolduid; +#ifdef AT_SYSINFO_EHDR +extern char gate_dso_path[]; +#endif #ifdef CONFIG_KMOD extern char modprobe_path[]; #endif @@ -395,6 +399,17 @@ static ctl_table kern_table[] = { .strategy = &sysctl_string, }, #endif +#ifdef AT_SYSINFO_EHDR + { + .ctl_name = KERN_GATE_DSO, + .procname = "gate_dso", + .data = &gate_dso_path, + .maxlen = 256, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, +#endif #ifdef CONFIG_CHR_DEV_SG { .ctl_name = KERN_SG_BIG_BUFF, _