From: Prasanna Meda This patch uses find_vma() to improve the read response of /proc/pid/maps. It attempts to make the liner scan instead of quadratic walk and utilise rb tree. Reading the file was doing sequential scan from the begining to file position all the time, and taking a quite long time. The improvements came from f_version/m_version and resulting in mmap_cache match. Even if mmap_cache does not match, rb tree walk should be faster than sequential walk. First attempt was to put the state across read system calls into private data. Later got inspiration from wli's pid patch using f_version in readdir of /proc. Other advantage is, f_version will be cleared automatically by lseek. The test program creates 32K maps and splits them into two(limited by max_map_count sysctl) using mprotect(0). After the patch, the read time improves from many seconds to milliseconds, and does not grow superlinearly with number of read calls. Help taken from Peter Swain in idea and testing. After the patch: Reading /proc/self/maps:65528 time: 0 secs and 780728 usecs buf:4096 bytes:3811362 Reading /proc/self/maps:65528 time: 1 secs and 117573 usecs buf:1024 bytes:3866627 Reading /proc/self/maps:65528 time: 0 secs and 473459 usecs buf: 256 bytes:3866627 Reading /proc/self/maps:65528 time: 0 secs and 901288 usecs buf: 64 bytes:3866627 Reading /proc/self/maps:65528 time: 1 secs and 480185 usecs buf: 16 bytes:3866627 Reading /proc/self/maps:65528 time: 1 secs and 636268 usecs buf: 4 bytes:3866627 Reading /proc/self/maps:65528 time: 4 secs and 118327 usecs buf: 1 bytes:3866627 Before the patch: Reading /proc/self/maps:65528 time: 4 secs and 359556 usecs buf:4096 bytes:3866647 Reading /proc/self/maps:65528 time:16 secs and 218584 usecs buf:1024 bytes:3866688 Reading /proc/self/maps:65528 time:67 secs and 870200 usecs buf: 256 bytes:3866688 Reading /proc/self/maps:65528 time:255 secs and 186934 usecs buf: 64 bytes:3866688 Small reads never completed. Signed-off-by: Prasanna Meda Signed-off-by: Andrew Morton --- 25-akpm/fs/proc/task_mmu.c | 55 ++++++++++++++++++++++++++++++--------- 25-akpm/fs/seq_file.c | 20 ++++++++++++++ 25-akpm/include/linux/seq_file.h | 1 3 files changed, 64 insertions(+), 12 deletions(-) diff -puN fs/proc/task_mmu.c~speedup-proc-pid-maps fs/proc/task_mmu.c --- 25/fs/proc/task_mmu.c~speedup-proc-pid-maps Tue Jan 11 14:39:53 2005 +++ 25-akpm/fs/proc/task_mmu.c Tue Jan 11 14:39:53 2005 @@ -116,24 +116,52 @@ static int show_map(struct seq_file *m, static void *m_start(struct seq_file *m, loff_t *pos) { struct task_struct *task = m->private; - struct mm_struct *mm = get_task_mm(task); - struct vm_area_struct * map; + unsigned long last_addr = m->version; + struct mm_struct *mm; + struct vm_area_struct *map, *tail_map; loff_t l = *pos; - if (!mm) + if (last_addr == -1UL) return NULL; + mm = get_task_mm(task); + if (!mm) { + m->version = -1UL; + return NULL; + } + + tail_map = get_gate_vma(task); down_read(&mm->mmap_sem); - map = mm->mmap; - while (l-- && map) + + /* Start with last addr hint */ + map = find_vma(mm, last_addr); + if (map) { map = map->vm_next; - if (!map) { - up_read(&mm->mmap_sem); - mmput(mm); - if (l == -1) - map = get_gate_vma(task); + goto out; } - return map; + + /* Check the map index is within the range */ + if ((unsigned long)l < mm->map_count) { + map = mm->mmap; + while (l-- && map) + map = map->vm_next; + goto out; + } + + if (l != mm->map_count) + tail_map = NULL; /* After gate map */ + +out: + if (map && map != tail_map) { + m->version = map->vm_start; + return map; + } + + /* End of maps has reached */ + m->version = -1UL; + up_read(&mm->mmap_sem); + mmput(mm); + return tail_map; } static void m_stop(struct seq_file *m, void *v) @@ -152,9 +180,12 @@ static void *m_next(struct seq_file *m, struct task_struct *task = m->private; struct vm_area_struct *map = v; (*pos)++; - if (map->vm_next) + if (map && map->vm_next) { + m->version = map->vm_next->vm_start; return map->vm_next; + } m_stop(m, v); + m->version = -1UL; if (map != get_gate_vma(task)) return get_gate_vma(task); return NULL; diff -puN fs/seq_file.c~speedup-proc-pid-maps fs/seq_file.c --- 25/fs/seq_file.c~speedup-proc-pid-maps Tue Jan 11 14:39:53 2005 +++ 25-akpm/fs/seq_file.c Tue Jan 11 14:39:53 2005 @@ -36,6 +36,13 @@ int seq_open(struct file *file, struct s p->op = op; file->private_data = p; + /* + * Wrappers around seq_open(e.g. swaps_open) need to be + * aware of this. If they set f_version themselves, they + * should call seq_open first and then set f_version. + */ + file->f_version = 0; + /* SEQ files support lseek, but not pread/pwrite */ file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE); return 0; @@ -58,6 +65,18 @@ ssize_t seq_read(struct file *file, char int err = 0; down(&m->sem); + /* + * seq_file->op->..m_start/m_stop/m_next may do special actions + * or optimisations based on the file->f_version, so we want to + * pass the file->f_version to those methods. + * + * seq_file->version is just copy of f_version, and seq_file + * methods can treat it simply as file version. + * It is copied in first and copied out after all operations. + * It is convenient to have it as part of structure to avoid the + * need of passing another argument to all the seq_file methods. + */ + m->version = file->f_version; /* grab buffer if we didn't have one */ if (!m->buf) { m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); @@ -136,6 +155,7 @@ Done: copied = err; else *ppos += copied; + file->f_version = m->version; up(&m->sem); return copied; Enomem: diff -puN include/linux/seq_file.h~speedup-proc-pid-maps include/linux/seq_file.h --- 25/include/linux/seq_file.h~speedup-proc-pid-maps Tue Jan 11 14:39:53 2005 +++ 25-akpm/include/linux/seq_file.h Tue Jan 11 14:39:53 2005 @@ -18,6 +18,7 @@ struct seq_file { size_t from; size_t count; loff_t index; + loff_t version; struct semaphore sem; struct seq_operations *op; void *private; _