From: Stephen Smalley This patch against 2.5.73 replaces vm_enough_memory with a security hook per Alan Cox's suggestion so that security modules can completely replace the logic if desired. Note that the patch changes the interface to follow the convention of the other security hooks, i.e. return 0 if ok or -errno on failure (-ENOMEM in this case) rather than returning a boolean. It also exports various variables and functions required for the vm_enough_memory logic. fs/exec.c | 2 - include/linux/mman.h | 3 + include/linux/security.h | 16 ++++++++++ include/linux/slab.h | 2 + kernel/fork.c | 2 - mm/mmap.c | 71 +++++------------------------------------------ mm/mprotect.c | 2 - mm/mremap.c | 3 + mm/page_alloc.c | 5 +++ mm/shmem.c | 9 +++-- mm/slab.c | 2 + mm/swap.c | 2 + mm/swapfile.c | 6 +++ security/capability.c | 65 +++++++++++++++++++++++++++++++++++++++++++ security/dummy.c | 52 ++++++++++++++++++++++++++++++++++ 15 files changed, 169 insertions(+), 73 deletions(-) diff -puN fs/exec.c~security_vm_enough_memory fs/exec.c --- 25/fs/exec.c~security_vm_enough_memory 2003-06-27 14:17:59.000000000 -0700 +++ 25-akpm/fs/exec.c 2003-06-27 14:18:00.000000000 -0700 @@ -392,7 +392,7 @@ int setup_arg_pages(struct linux_binprm if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -puN include/linux/mman.h~security_vm_enough_memory include/linux/mman.h --- 25/include/linux/mman.h~security_vm_enough_memory 2003-06-27 14:17:59.000000000 -0700 +++ 25-akpm/include/linux/mman.h 2003-06-27 14:18:00.000000000 -0700 @@ -9,7 +9,8 @@ #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 -extern int vm_enough_memory(long pages); +extern int sysctl_overcommit_memory; +extern int sysctl_overcommit_ratio; extern atomic_t vm_committed_space; #ifdef CONFIG_SMP diff -puN include/linux/security.h~security_vm_enough_memory include/linux/security.h --- 25/include/linux/security.h~security_vm_enough_memory 2003-06-27 14:17:59.000000000 -0700 +++ 25-akpm/include/linux/security.h 2003-06-27 14:18:00.000000000 -0700 @@ -49,6 +49,7 @@ extern int cap_bprm_secureexec(struct li extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); +extern int cap_vm_enough_memory (long pages); static inline int cap_netlink_send (struct sk_buff *skb) { @@ -958,6 +959,10 @@ struct swap_info_struct; * See the syslog(2) manual page for an explanation of the @type values. * @type contains the type of action. * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @pages contains the number of pages. + * Return 0 if permission is granted. * * @register_security: * allow module stacking. @@ -989,6 +994,7 @@ struct security_operations { int (*quotactl) (int cmds, int type, int id, struct super_block * sb); int (*quota_on) (struct file * f); int (*syslog) (int type); + int (*vm_enough_memory) (long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1238,6 +1244,11 @@ static inline int security_syslog(int ty return security_ops->syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return security_ops->vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return security_ops->bprm_alloc_security (bprm); @@ -1898,6 +1909,11 @@ static inline int security_syslog(int ty return cap_syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return cap_vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return 0; diff -puN include/linux/slab.h~security_vm_enough_memory include/linux/slab.h --- 25/include/linux/slab.h~security_vm_enough_memory 2003-06-27 14:17:59.000000000 -0700 +++ 25-akpm/include/linux/slab.h 2003-06-27 14:18:00.000000000 -0700 @@ -116,6 +116,8 @@ extern kmem_cache_t *bio_cachep; void ptrinfo(unsigned long addr); +extern atomic_t slab_reclaim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff -puN kernel/fork.c~security_vm_enough_memory kernel/fork.c --- 25/kernel/fork.c~security_vm_enough_memory 2003-06-27 14:17:59.000000000 -0700 +++ 25-akpm/kernel/fork.c 2003-06-27 14:18:00.000000000 -0700 @@ -296,7 +296,7 @@ static inline int dup_mmap(struct mm_str continue; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - if (!vm_enough_memory(len)) + if (security_vm_enough_memory(len)) goto fail_nomem; charge += len; } diff -puN mm/mmap.c~security_vm_enough_memory mm/mmap.c --- 25/mm/mmap.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/mmap.c 2003-06-27 14:18:00.000000000 -0700 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -53,65 +54,9 @@ int sysctl_overcommit_memory = 0; /* def int sysctl_overcommit_ratio = 50; /* default is 50% */ atomic_t vm_committed_space = ATOMIC_INIT(0); -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 1 means there is enough memory for the allocation to - * succeed and 0 implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting - * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. - */ -extern atomic_t slab_reclaim_pages; -int vm_enough_memory(long pages) -{ - unsigned long free, allowed; - - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == 1) - return 1; - - if (sysctl_overcommit_memory == 0) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - free -= free / 32; - - if (free > pages) - return 1; - vm_unacct_memory(pages); - return 0; - } - - allowed = totalram_pages * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - if (atomic_read(&vm_committed_space) < allowed) - return 1; - - vm_unacct_memory(pages); - - return 0; -} +EXPORT_SYMBOL(sysctl_overcommit_memory); +EXPORT_SYMBOL(sysctl_overcommit_ratio); +EXPORT_SYMBOL(vm_committed_space); /* * Requires inode->i_mapping->i_shared_sem @@ -646,7 +591,7 @@ munmap_back: * Private writable mapping: check memory availability */ charged = len >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; } @@ -965,7 +910,7 @@ int expand_stack(struct vm_area_struct * grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1019,7 +964,7 @@ int expand_stack(struct vm_area_struct * grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1391,7 +1336,7 @@ unsigned long do_brk(unsigned long addr, if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; diff -puN mm/mprotect.c~security_vm_enough_memory mm/mprotect.c --- 25/mm/mprotect.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/mprotect.c 2003-06-27 14:18:00.000000000 -0700 @@ -175,7 +175,7 @@ mprotect_fixup(struct vm_area_struct *vm if (newflags & VM_WRITE) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } diff -puN mm/mremap.c~security_vm_enough_memory mm/mremap.c --- 25/mm/mremap.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/mremap.c 2003-06-27 14:18:00.000000000 -0700 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -385,7 +386,7 @@ unsigned long do_mremap(unsigned long ad if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) goto out_nc; } diff -puN mm/page_alloc.c~security_vm_enough_memory mm/page_alloc.c --- 25/mm/page_alloc.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2003-06-27 14:18:00.000000000 -0700 @@ -43,6 +43,9 @@ int nr_swap_pages; int numnodes = 1; int sysctl_lower_zone_protection = 0; +EXPORT_SYMBOL(totalram_pages); +EXPORT_SYMBOL(nr_swap_pages); + /* * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags @@ -733,6 +736,7 @@ unsigned int nr_free_pages(void) return sum; } +EXPORT_SYMBOL(nr_free_pages); unsigned int nr_used_zone_pages(void) { @@ -825,6 +829,7 @@ DEFINE_PER_CPU(struct page_state, page_s EXPORT_PER_CPU_SYMBOL(page_states); atomic_t nr_pagecache = ATOMIC_INIT(0); +EXPORT_SYMBOL(nr_pagecache); #ifdef CONFIG_SMP DEFINE_PER_CPU(long, nr_pagecache_local) = 0; #endif diff -puN mm/shmem.c~security_vm_enough_memory mm/shmem.c --- 25/mm/shmem.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/shmem.c 2003-06-27 14:18:00.000000000 -0700 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -507,7 +508,7 @@ static int shmem_notify_change(struct de */ change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size); if (change > 0) { - if (!vm_enough_memory(change)) + if (security_vm_enough_memory(change)) return -ENOMEM; } else if (attr->ia_size < inode->i_size) { vm_unacct_memory(-change); @@ -1139,7 +1140,7 @@ shmem_file_write(struct file *file, cons maxpos = inode->i_size; if (maxpos < pos + count) { maxpos = pos + count; - if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { + if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { err = -ENOMEM; goto out; } @@ -1495,7 +1496,7 @@ static int shmem_symlink(struct inode *d memcpy(info, symname, len); inode->i_op = &shmem_symlink_inline_operations; } else { - if (!vm_enough_memory(VM_ACCT(1))) { + if (security_vm_enough_memory(VM_ACCT(1))) { iput(inode); return -ENOMEM; } @@ -1889,7 +1890,7 @@ struct file *shmem_file_setup(char *name if (size > SHMEM_MAX_BYTES) return ERR_PTR(-EINVAL); - if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size))) + if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size))) return ERR_PTR(-ENOMEM); error = -ENOMEM; diff -puN mm/slab.c~security_vm_enough_memory mm/slab.c --- 25/mm/slab.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/slab.c 2003-06-27 14:18:00.000000000 -0700 @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -462,6 +463,7 @@ struct list_head cache_chain; * SLAB_RECLAIM_ACCOUNT turns this on per-slab */ atomic_t slab_reclaim_pages; +EXPORT_SYMBOL(slab_reclaim_pages); /* * chicken and egg problem: delay the per-cpu array allocation diff -puN mm/swap.c~security_vm_enough_memory mm/swap.c --- 25/mm/swap.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/swap.c 2003-06-27 14:18:00.000000000 -0700 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* for try_to_release_page() */ #include @@ -371,6 +372,7 @@ void vm_acct_memory(long pages) } preempt_enable(); } +EXPORT_SYMBOL(vm_acct_memory); #endif diff -puN mm/swapfile.c~security_vm_enough_memory mm/swapfile.c --- 25/mm/swapfile.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/mm/swapfile.c 2003-06-27 14:18:00.000000000 -0700 @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -30,6 +32,8 @@ unsigned int nr_swapfiles; int total_swap_pages; static int swap_overflow; +EXPORT_SYMBOL(total_swap_pages); + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1042,7 +1046,7 @@ asmlinkage long sys_swapoff(const char _ swap_list_unlock(); goto out_dput; } - if (vm_enough_memory(p->pages)) + if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; diff -puN security/capability.c~security_vm_enough_memory security/capability.c --- 25/security/capability.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/security/capability.c 2003-06-27 14:18:00.000000000 -0700 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -275,6 +278,65 @@ int cap_syslog (int type) return 0; } +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 0 means there is enough memory for the allocation to + * succeed and -ENOMEM implies there is not. + * + * We currently support three overcommit policies, which are set via the + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. + * Additional code 2002 Jul 20 by Robert Love. + */ +int cap_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (!capable(CAP_SYS_ADMIN)) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + EXPORT_SYMBOL(cap_capable); EXPORT_SYMBOL(cap_ptrace); EXPORT_SYMBOL(cap_capget); @@ -286,6 +348,7 @@ EXPORT_SYMBOL(cap_bprm_secureexec); EXPORT_SYMBOL(cap_task_post_setuid); EXPORT_SYMBOL(cap_task_reparent_to_init); EXPORT_SYMBOL(cap_syslog); +EXPORT_SYMBOL(cap_vm_enough_memory); #ifdef CONFIG_SECURITY @@ -307,6 +370,8 @@ static struct security_operations capabi .task_reparent_to_init = cap_task_reparent_to_init, .syslog = cap_syslog, + + .vm_enough_memory = cap_vm_enough_memory, }; #if defined(CONFIG_SECURITY_CAPABILITIES_MODULE) diff -puN security/dummy.c~security_vm_enough_memory security/dummy.c --- 25/security/dummy.c~security_vm_enough_memory 2003-06-27 14:18:00.000000000 -0700 +++ 25-akpm/security/dummy.c 2003-06-27 14:18:00.000000000 -0700 @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -97,6 +100,54 @@ static int dummy_syslog (int type) return 0; } +static int dummy_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (current->euid) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + static int dummy_bprm_alloc_security (struct linux_binprm *bprm) { return 0; @@ -793,6 +844,7 @@ void security_fixup_ops (struct security set_to_dummy_if_null(ops, quota_on); set_to_dummy_if_null(ops, sysctl); set_to_dummy_if_null(ops, syslog); + set_to_dummy_if_null(ops, vm_enough_memory); set_to_dummy_if_null(ops, bprm_alloc_security); set_to_dummy_if_null(ops, bprm_free_security); set_to_dummy_if_null(ops, bprm_compute_creds); _