From: David Mosberger Below is a warmed up version of a patch originally done by Werner Almesberger (see http://tinyurl.com/25zra) to replace the MAX_MAP_COUNT limit with a sysctl variable. I thought this had gone into the tree a long time ago but alas it has not and as luck would have it, the hard limit bit someone today once again with a large app on a large machine. Here is a small test app: ----------------------------------------------------------------- #include #include #include int main (int argc, char **argv) { long n = 0; printf ("Starting mmap test...\n"); while (1) if (mmap (0, 1, (n++ & 1) ? PROT_READ : PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0) == MAP_FAILED) { printf ("Failed after %ld successful maps\n", n - 1); exit (0); } return -1; } ----------------------------------------------------------------- After applying the patch, I was able to do 20,000,000 successful maps while consuming about 3.6GB of memory (~ 180 bytes/map) which matches well enough with the actual vm_area_struct size of 128 bytes. If I set the max_map_count insanely high, the above test keeps running until we're out of memory and then the OOM killer kicks in. Basically: no surprises. --- 25-akpm/Documentation/sysctl/vm.txt | 16 ++++++++++++++++ 25-akpm/include/linux/sched.h | 4 +++- 25-akpm/include/linux/sysctl.h | 1 + 25-akpm/kernel/sysctl.c | 8 ++++++++ 25-akpm/mm/mmap.c | 8 +++++--- 5 files changed, 33 insertions(+), 4 deletions(-) diff -puN Documentation/sysctl/vm.txt~max_map_count-sysctl Documentation/sysctl/vm.txt --- 25/Documentation/sysctl/vm.txt~max_map_count-sysctl 2004-03-31 23:29:10.725806504 -0800 +++ 25-akpm/Documentation/sysctl/vm.txt 2004-03-31 23:29:10.735804984 -0800 @@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/ - dirty_background_ratio - dirty_expire_centisecs - dirty_writeback_centisecs +- max_map_count - min_free_kbytes ============================================================== @@ -77,6 +78,21 @@ for swap because we only cluster swap da ============================================================== +max_map_count: + +This file contains the maximum number of memory map areas a process +may have. Memory map areas are used as a side-effect of calling +malloc, directly by mmap and mprotect, and also when loading shared +libraries. + +While most applications need less than a thousand maps, certain +programs, particularly malloc debuggers, may consume lots of them, +e.g., up to one or two maps per allocation. + +The default value is 65536. + +============================================================== + min_free_kbytes: This is used to force the Linux VM to keep a minimum number diff -puN include/linux/sched.h~max_map_count-sysctl include/linux/sched.h --- 25/include/linux/sched.h~max_map_count-sysctl 2004-03-31 23:29:10.726806352 -0800 +++ 25-akpm/include/linux/sched.h 2004-03-31 23:29:10.736804832 -0800 @@ -180,7 +180,9 @@ asmlinkage void schedule(void); struct namespace; /* Maximum number of active map areas.. This is a random (large) number */ -#define MAX_MAP_COUNT (65536) +#define DEFAULT_MAX_MAP_COUNT 65536 + +extern int sysctl_max_map_count; #include diff -puN include/linux/sysctl.h~max_map_count-sysctl include/linux/sysctl.h --- 25/include/linux/sysctl.h~max_map_count-sysctl 2004-03-31 23:29:10.727806200 -0800 +++ 25-akpm/include/linux/sysctl.h 2004-03-31 23:29:10.736804832 -0800 @@ -158,6 +158,7 @@ enum VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ VM_LOWER_ZONE_PROTECTION=20,/* Amount of protection of lower zones */ VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ + VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ }; diff -puN kernel/sysctl.c~max_map_count-sysctl kernel/sysctl.c --- 25/kernel/sysctl.c~max_map_count-sysctl 2004-03-31 23:29:10.729805896 -0800 +++ 25-akpm/kernel/sysctl.c 2004-03-31 23:29:10.738804528 -0800 @@ -736,6 +736,14 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_MAX_MAP_COUNT, + .procname = "max_map_count", + .data = &sysctl_max_map_count, + .maxlen = sizeof(sysctl_max_map_count), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { .ctl_name = 0 } }; diff -puN mm/mmap.c~max_map_count-sysctl mm/mmap.c --- 25/mm/mmap.c~max_map_count-sysctl 2004-03-31 23:29:10.731805592 -0800 +++ 25-akpm/mm/mmap.c 2004-03-31 23:29:10.739804376 -0800 @@ -54,10 +54,12 @@ pgprot_t protection_map[16] = { int sysctl_overcommit_memory = 0; /* default is heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ +int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; atomic_t vm_committed_space = ATOMIC_INIT(0); EXPORT_SYMBOL(sysctl_overcommit_memory); EXPORT_SYMBOL(sysctl_overcommit_ratio); +EXPORT_SYMBOL(sysctl_max_map_count); EXPORT_SYMBOL(vm_committed_space); /* @@ -516,7 +518,7 @@ unsigned long do_mmap_pgoff(struct file return -EINVAL; /* Too many mappings? */ - if (mm->map_count > MAX_MAP_COUNT) + if (mm->map_count > sysctl_max_map_count) return -ENOMEM; /* Obtain the address to map to. we verify (or select) it and ensure @@ -1203,7 +1205,7 @@ int split_vma(struct mm_struct * mm, str struct vm_area_struct *new; struct address_space *mapping = NULL; - if (mm->map_count >= MAX_MAP_COUNT) + if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); @@ -1381,7 +1383,7 @@ unsigned long do_brk(unsigned long addr, > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; - if (mm->map_count > MAX_MAP_COUNT) + if (mm->map_count > sysctl_max_map_count) return -ENOMEM; if (security_vm_enough_memory(len >> PAGE_SHIFT)) _