From: "Serge E. Hallyn" The attached patch introduces a __vm_enough_memory function in security/security.c which is used by cap_vm_enough_memory, dummy_vm_enough_memory, and selinux_vm_enough_memory. This has been discussed on the lsm mailing list. Signed-off-by: Serge Hallyn Signed-off-by: Andrew Morton --- 25-akpm/include/linux/mm.h | 1 25-akpm/mm/mmap.c | 88 +++++++++++++++++++++++++++++++++++++++ 25-akpm/security/commoncap.c | 81 +---------------------------------- 25-akpm/security/dummy.c | 64 +--------------------------- 25-akpm/security/selinux/hooks.c | 70 ++++++------------------------- 5 files changed, 112 insertions(+), 192 deletions(-) diff -puN include/linux/mm.h~merge-_vm_enough_memorys-into-a-common-helper include/linux/mm.h --- 25/include/linux/mm.h~merge-_vm_enough_memorys-into-a-common-helper 2005-01-05 15:58:16.422418944 -0800 +++ 25-akpm/include/linux/mm.h 2005-01-05 15:58:16.432417424 -0800 @@ -706,6 +706,7 @@ static inline void vma_nonlinear_insert( } /* mmap.c */ +extern int __vm_enough_memory(long pages, int cap_sys_admin); extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, diff -puN mm/mmap.c~merge-_vm_enough_memorys-into-a-common-helper mm/mmap.c --- 25/mm/mmap.c~merge-_vm_enough_memorys-into-a-common-helper 2005-01-05 15:58:16.423418792 -0800 +++ 25-akpm/mm/mmap.c 2005-01-05 15:58:16.434417120 -0800 @@ -61,10 +61,98 @@ int sysctl_overcommit_ratio = 50; /* def int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; atomic_t vm_committed_space = ATOMIC_INIT(0); +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 0 means there is enough memory for the allocation to + * succeed and -ENOMEM implies there is not. + * + * We currently support three overcommit policies, which are set via the + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. + * Additional code 2002 Jul 20 by Robert Love. + * + * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. + * + * Note this is a helper function intended to be used by LSMs which + * wish to use this logic. + */ +int __vm_enough_memory(long pages, int cap_sys_admin) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) + return 0; + + if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { + unsigned long n; + + free = get_page_cache_size(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (!cap_sys_admin) + free -= free / 32; + + if (free > pages) + return 0; + + /* + * nr_free_pages() is very expensive on large systems, + * only call if we're about to fail. + */ + n = nr_free_pages(); + if (!cap_sys_admin) + n -= n / 32; + free += n; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = (totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100; + /* + * Leave the last 3% for root + */ + if (!cap_sys_admin) + allowed -= allowed / 32; + allowed += total_swap_pages; + + /* Don't let a single process grow too big: + leave 3% of the size of this process for other processes */ + allowed -= current->mm->total_vm / 32; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + EXPORT_SYMBOL(sysctl_overcommit_memory); EXPORT_SYMBOL(sysctl_overcommit_ratio); EXPORT_SYMBOL(sysctl_max_map_count); EXPORT_SYMBOL(vm_committed_space); +EXPORT_SYMBOL(__vm_enough_memory); /* * Requires inode->i_mapping->i_mmap_lock diff -puN security/commoncap.c~merge-_vm_enough_memorys-into-a-common-helper security/commoncap.c --- 25/security/commoncap.c~merge-_vm_enough_memorys-into-a-common-helper 2005-01-05 15:58:16.425418488 -0800 +++ 25-akpm/security/commoncap.c 2005-01-05 15:58:16.435416968 -0800 @@ -316,86 +316,13 @@ int cap_syslog (int type) return 0; } -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 0 means there is enough memory for the allocation to - * succeed and -ENOMEM implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting - * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. - */ int cap_vm_enough_memory(long pages) { - unsigned long free, allowed; + int cap_sys_admin = 0; - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) - return 0; - - if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { - unsigned long n; - - free = get_page_cache_size(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - free -= free / 32; - - if (free > pages) - return 0; - - /* - * nr_free_pages() is very expensive on large systems, - * only call if we're about to fail. - */ - n = nr_free_pages(); - if (!capable(CAP_SYS_ADMIN)) - n -= n / 32; - free += n; - - if (free > pages) - return 0; - vm_unacct_memory(pages); - return -ENOMEM; - } - - allowed = (totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100; - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - allowed -= allowed / 32; - allowed += total_swap_pages; - - /* Don't let a single process grow too big: - leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; - - if (atomic_read(&vm_committed_space) < allowed) - return 0; - - vm_unacct_memory(pages); - - return -ENOMEM; + if (cap_capable(current, CAP_SYS_ADMIN) == 0) + cap_sys_admin = 1; + return __vm_enough_memory(pages, cap_sys_admin); } EXPORT_SYMBOL(cap_capable); diff -puN security/dummy.c~merge-_vm_enough_memorys-into-a-common-helper security/dummy.c --- 25/security/dummy.c~merge-_vm_enough_memorys-into-a-common-helper 2005-01-05 15:58:16.426418336 -0800 +++ 25-akpm/security/dummy.c 2005-01-05 15:58:16.436416816 -0800 @@ -108,69 +108,13 @@ static int dummy_settime(struct timespec return 0; } -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 0 means there is enough memory for the allocation to - * succeed and -ENOMEM implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting - */ static int dummy_vm_enough_memory(long pages) { - unsigned long free, allowed; + int cap_sys_admin = 0; - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) - return 0; - - if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (current->euid) - free -= free / 32; - - if (free > pages) - return 0; - vm_unacct_memory(pages); - return -ENOMEM; - } - - allowed = (totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - /* Leave the last 3% for root */ - if (current->euid) - allowed -= allowed / 32; - - /* Don't let a single process grow too big: - leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; - - if (atomic_read(&vm_committed_space) < allowed) - return 0; - - vm_unacct_memory(pages); - - return -ENOMEM; + if (dummy_capable(current, CAP_SYS_ADMIN) == 0) + cap_sys_admin = 1; + return __vm_enough_memory(pages, cap_sys_admin); } static int dummy_bprm_alloc_security (struct linux_binprm *bprm) diff -puN security/selinux/hooks.c~merge-_vm_enough_memorys-into-a-common-helper security/selinux/hooks.c --- 25/security/selinux/hooks.c~merge-_vm_enough_memorys-into-a-common-helper 2005-01-05 15:58:16.428418032 -0800 +++ 25-akpm/security/selinux/hooks.c 2005-01-05 15:58:16.440416208 -0800 @@ -1515,69 +1515,29 @@ static int selinux_syslog(int type) * mapping. 0 means there is enough memory for the allocation to * succeed and -ENOMEM implies there is not. * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting + * Note that secondary_ops->capable and task_has_perm_noaudit return 0 + * if the capability is granted, but __vm_enough_memory requires 1 if + * the capability is granted. * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. + * Do not audit the selinux permission check, as this is applied to all + * processes that allocate mappings. */ static int selinux_vm_enough_memory(long pages) { - unsigned long free, allowed; - int rc; + int rc, cap_sys_admin = 0; struct task_security_struct *tsec = current->security; - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) - return 0; - - if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for privileged processes. - * Don't audit the check, as it is applied to all processes - * that allocate mappings. - */ - rc = secondary_ops->capable(current, CAP_SYS_ADMIN); - if (!rc) { - rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, - SECCLASS_CAPABILITY, - CAP_TO_MASK(CAP_SYS_ADMIN), NULL); - } - if (rc) - free -= free / 32; - - if (free > pages) - return 0; - vm_unacct_memory(pages); - return -ENOMEM; - } - - allowed = (totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - if (atomic_read(&vm_committed_space) < allowed) - return 0; + rc = secondary_ops->capable(current, CAP_SYS_ADMIN); + if (rc == 0) + rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, + SECCLASS_CAPABILITY, + CAP_TO_MASK(CAP_SYS_ADMIN), + NULL); - vm_unacct_memory(pages); + if (rc == 0) + cap_sys_admin = 1; - return -ENOMEM; + return __vm_enough_memory(pages, cap_sys_admin); } /* binprm security operations */ _