diff -urN linux-2.4.18/Documentation/sysctl/vm.txt linux/Documentation/sysctl/vm.txt --- linux-2.4.18/Documentation/sysctl/vm.txt Wed Jul 10 15:22:02 2002 +++ linux/Documentation/sysctl/vm.txt Thu Jul 11 14:11:20 2002 @@ -166,7 +166,11 @@ programs that malloc() huge amounts of memory "just-in-case" and don't use much of it. -Look at: mm/mmap.c::vm_enough_memory() for more information. +Flag values of 2 - 4 introduce a new "strict overcommit" +policy that attempt to prevent any overcommit of memory. + +See Documentation/vm/overcommit-accounting and +mm/mmap.c::vm_enough_memory() for more information. ============================================================== diff -urN linux-2.4.18/Documentation/vm/overcommit-accounting linux/Documentation/vm/overcommit-accounting --- linux-2.4.18/Documentation/vm/overcommit-accounting Wed Dec 31 16:00:00 1969 +++ linux/Documentation/vm/overcommit-accounting Thu Jul 11 14:11:30 2002 @@ -0,0 +1,77 @@ +The Linux kernel supports four overcommit handling modes + +0 - Heuristic overcommit handling. Obvious overcommits of + address space are refused. Used for a typical system. It + ensures a seriously wild allocation fails while allowing + overcommit to reduce swap usage. This is the default. + +1 - No overcommit handling. Appropriate for some scientific + applications. + +2 - (NEW) swapless strict overcommit. The total address space + commit for the system is not permitted to exceed 90% of + free memory. This mode utilizes the new stricter accounting + but does not impose a very strict rule. It is possible that + the system could kill a process accessing pages in certain + cases. If mode 3 is too strict when no swap is present + this is the best you can do. + +3 - (NEW) strict overcommit. The total address space commit + for the system is not permitted to exceed swap + half ram. + In almost all situations this means a process will not be + killed while accessing pages but only by malloc failures + that are reported back by the kernel mmap/brk code. + +4 - (NEW) paranoid overcommit. The total address space commit + for the system is not permitted to exceed swap. The machine + will never kill a process accessing pages it has mapped + except due to a bug (ie report it!). + +Gotchas +------- + +The C language stack growth does an implicit mremap. If you want absolute +guarantees and run close to the edge you MUST mmap your stack for the +largest size you think you will need. For typical stack usage is does +not matter much but its a corner case if you really really care + +In modes 2 and 3 the MAP_NORESERVE flag is ignored. + + +How It Works +------------ + +The overcommit is based on the following rules + +For a file backed map + SHARED or READ only - 0 cost (the file is the map not swap) + + WRITABLE SHARED - size of mapping per instance + +For a direct map + SHARED or READ only - size of mapping + PRIVATE WRITEABLE - size of mapping per instance + +Additional accounting + Pages made writable copies by mmap + shmfs memory drawn from the same pool + +Status +------ + +o We account mmap memory mappings +o We account mprotect changes in commit +o We account mremap changes in size +o We account brk +o We account munmap +o We report the commit status in /proc +o Account and check on fork +o Review stack handling/building on exec +o SHMfs accounting +o Implement actual limit enforcement + +To Do +----- +o Account ptrace pages (this is hard) +o Account for shared anonymous mappings properly + - right now we account them per instance diff -urN linux-2.4.18/fs/exec.c linux/fs/exec.c --- linux-2.4.18/fs/exec.c Wed Jul 10 15:21:23 2002 +++ linux/fs/exec.c Thu Jul 11 14:11:20 2002 @@ -306,8 +306,13 @@ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!mpnt) - return -ENOMEM; - + return -ENOMEM; + + if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) >> PAGE_SHIFT,1)) { + kmem_cache_free(vm_area_cachep, mpnt); + return -ENOMEM; + } + down_write(¤t->mm->mmap_sem); { mpnt->vm_mm = current->mm; diff -urN linux-2.4.18/fs/proc/proc_misc.c linux/fs/proc/proc_misc.c --- linux-2.4.18/fs/proc/proc_misc.c Wed Jul 10 15:21:23 2002 +++ linux/fs/proc/proc_misc.c Thu Jul 11 14:11:20 2002 @@ -128,12 +128,14 @@ return proc_calc_metrics(page, start, off, count, eof, len); } +extern atomic_t vm_committed_space; + static int meminfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { struct sysinfo i; int len; - int pg_size ; + int pg_size, committed; /* * display in kilobytes. @@ -143,6 +145,7 @@ si_meminfo(&i); si_swapinfo(&i); pg_size = atomic_read(&page_cache_size) - i.bufferram ; + committed = atomic_read(&vm_committed_space); len = sprintf(page, " total: used: free: shared: buffers: cached:\n" "Mem: %8Lu %8Lu %8Lu %8Lu %8Lu %8Lu\n" @@ -170,7 +173,8 @@ "LowTotal: %8lu kB\n" "LowFree: %8lu kB\n" "SwapTotal: %8lu kB\n" - "SwapFree: %8lu kB\n", + "SwapFree: %8lu kB\n" + "Committed_AS: %8u kB\n", K(i.totalram), K(i.freeram), K(i.sharedram), @@ -184,7 +188,8 @@ K(i.totalram-i.totalhigh), K(i.freeram-i.freehigh), K(i.totalswap), - K(i.freeswap)); + K(i.freeswap), + K(committed)); return proc_calc_metrics(page, start, off, count, eof, len); #undef B diff -urN linux-2.4.18/include/linux/mm.h linux/include/linux/mm.h --- linux-2.4.18/include/linux/mm.h Wed Jul 10 15:21:25 2002 +++ linux/include/linux/mm.h Thu Jul 11 14:11:20 2002 @@ -102,8 +102,9 @@ #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ #define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ +#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ -#define VM_STACK_FLAGS 0x00000177 +#define VM_STACK_FLAGS (0x00000177 | VM_ACCOUNT) #define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) #define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK @@ -489,7 +490,7 @@ return ret; } -extern int do_munmap(struct mm_struct *, unsigned long, size_t); +extern int do_munmap(struct mm_struct *, unsigned long, size_t, int); extern unsigned long do_brk(unsigned long, unsigned long); @@ -557,33 +558,8 @@ return gfp_mask; } -/* vma is the first one with address < vma->vm_end, - * and even address < vma->vm_start. Have to extend vma. */ -static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) -{ - unsigned long grow; - - /* - * vma->vm_start/vm_end cannot change under us because the caller is required - * to hold the mmap_sem in write mode. We need to get the spinlock only - * before relocating the vma range ourself. - */ - address &= PAGE_MASK; - spin_lock(&vma->vm_mm->page_table_lock); - grow = (vma->vm_start - address) >> PAGE_SHIFT; - if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); - return -ENOMEM; - } - vma->vm_start = address; - vma->vm_pgoff -= grow; - vma->vm_mm->total_vm += grow; - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; - spin_unlock(&vma->vm_mm->page_table_lock); - return 0; -} +/* Do stack extension */ +extern int expand_stack(struct vm_area_struct * vma, unsigned long address); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); diff -urN linux-2.4.18/include/linux/mman.h linux/include/linux/mman.h --- linux-2.4.18/include/linux/mman.h Wed Jul 10 15:21:25 2002 +++ linux/include/linux/mman.h Thu Jul 11 14:11:20 2002 @@ -6,4 +6,8 @@ #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 +extern int vm_enough_memory(long pages, int charge); +extern void vm_unacct_memory(long pages); +extern void vm_unacct_vma(struct vm_area_struct *vma); + #endif /* _LINUX_MMAN_H */ diff -urN linux-2.4.18/ipc/shm.c linux/ipc/shm.c --- linux-2.4.18/ipc/shm.c Wed Jul 10 15:21:35 2002 +++ linux/ipc/shm.c Thu Jul 11 14:11:20 2002 @@ -678,7 +678,7 @@ shmdnext = shmd->vm_next; if (shmd->vm_ops == &shm_vm_ops && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr) - do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start); + do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start, 1); } up_write(&mm->mmap_sem); return 0; diff -urN linux-2.4.18/kernel/fork.c linux/kernel/fork.c --- linux-2.4.18/kernel/fork.c Wed Jul 10 15:21:25 2002 +++ linux/kernel/fork.c Thu Jul 11 14:11:20 2002 @@ -131,6 +131,7 @@ { struct vm_area_struct * mpnt, *tmp, **pprev; int retval; + unsigned long charge = 0; flush_cache_mm(current->mm); mm->locked_vm = 0; @@ -159,6 +160,17 @@ retval = -ENOMEM; if(mpnt->vm_flags & VM_DONTCOPY) continue; + + /* + * FIXME: shared writable map accounting should be one off + */ + if (mpnt->vm_flags & VM_ACCOUNT) { + unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + if (!vm_enough_memory(len, 1)) + goto fail_nomem; + charge += len; + } + tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!tmp) goto fail_nomem; @@ -203,9 +215,12 @@ retval = 0; build_mmap_rb(mm); -fail_nomem: +out: flush_tlb_mm(current->mm); return retval; +fail_nomem: + vm_unacct_memory(charge); + goto out; } spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; diff -urN linux-2.4.18/mm/mmap.c linux/mm/mmap.c --- linux-2.4.18/mm/mmap.c Wed Jul 10 15:21:25 2002 +++ linux/mm/mmap.c Thu Jul 11 14:11:55 2002 @@ -1,8 +1,11 @@ /* - * linux/mm/mmap.c + * mm/mmap.c * * Written by obz. + * + * Address space accounting code */ + #include #include #include @@ -45,51 +48,97 @@ }; int sysctl_overcommit_memory; +atomic_t vm_committed_space = ATOMIC_INIT(0); -/* Check that a process has enough memory to allocate a - * new virtual mapping. +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 1 means there is enough memory for the allocation to + * succeed and 0 implies there is not. + * + * We currently support four overcommit policies, which are set via the + * overcommit_memory sysctl. See Documentation/vm/overcommit-acounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. */ -int vm_enough_memory(long pages) +int vm_enough_memory(long pages, int charge) { - /* Stupid algorithm to decide if we have enough memory: while - * simple, it hopefully works in most obvious cases.. Easy to - * fool it, but this should catch most mistakes. - */ - /* 23/11/98 NJC: Somewhat less stupid version of algorithm, - * which tries to do "TheRightThing". Instead of using half of - * (buffers+cache), use the minimum values. Allow an extra 2% - * of num_physpages for safety margin. - */ + unsigned long free, allowed; - unsigned long free; - - /* Sometimes we want to use more memory than we have. */ - if (sysctl_overcommit_memory) - return 1; - - /* The page cache contains buffer pages these days.. */ - free = atomic_read(&page_cache_size); - free += nr_free_pages(); - free += nr_swap_pages; + if (charge) + atomic_add(pages, &vm_committed_space); - /* - * This double-counts: the nrpages are both in the page-cache - * and in the swapper space. At the same time, this compensates - * for the swap-space over-allocation (ie "nr_swap_pages" being - * too small. + /* + * Sometimes we want to use more memory than we have */ - free += swapper_space.nrpages; + if (sysctl_overcommit_memory == 1) + return 1; - /* - * The code below doesn't account for free space in the inode - * and dentry slab cache, slab cache fragmentation, inodes and - * dentries which will become freeable under VM load, etc. - * Lets just hope all these (complex) factors balance out... - */ - free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> PAGE_SHIFT; - free += (inodes_stat.nr_unused * sizeof(struct inode)) >> PAGE_SHIFT; + if (sysctl_overcommit_memory == 0) { + free = atomic_read(&page_cache_size); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * This double-counts: the nrpages are both in the + * page-cache and in the swapper space. At the same time, + * this compensates for the swap-space over-allocation + * (ie "nr_swap_pages" being too small). + */ + free += swapper_space.nrpages; + + /* + * The code below doesn't account for free space in the + * inode and dentry slab cache, slab cache fragmentation, + * inodes and dentries which will become freeable under + * VM load, etc. Lets just hope all these (complex) + * factors balance out... + */ + free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> + PAGE_SHIFT; + free += (inodes_stat.nr_unused * sizeof(struct inode)) >> + PAGE_SHIFT; + + if (free > pages) + return 1; + if (charge) + atomic_sub(pages, &vm_committed_space); + return 0; + } - return free > pages; + if (sysctl_overcommit_memory == 2) { + /* + * FIXME: need to add arch hooks to get the bits we need + * without the higher overhead crap + */ + struct sysinfo i; + si_meminfo(&i); + allowed = i.totalram - (i.totalram / 20); + } else if (sysctl_overcommit_memory == 3) { + struct sysinfo i; + si_meminfo(&i); + allowed = total_swap_pages + (i.totalram >> 1); + } else /* sysctl_overcommit_memory == 4 */ + allowed = total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 1; + + if (charge) + atomic_sub(pages, &vm_committed_space); + + return 0; +} + +void inline vm_unacct_memory(long pages) +{ + atomic_sub(pages, &vm_committed_space); +} + +void vm_unacct_vma(struct vm_area_struct *vma) +{ + int len = vma->vm_end - vma->vm_start; + if (vma->vm_flags & VM_ACCOUNT) + vm_unacct_memory(len >> PAGE_SHIFT); } /* Remove one vm structure from the inode's i_mapping address space. */ @@ -160,7 +209,7 @@ /* Always allow shrinking brk. */ if (brk <= mm->brk) { - if (!do_munmap(mm, newbrk, oldbrk-newbrk)) + if (!do_munmap(mm, newbrk, oldbrk-newbrk, 1)) goto set_brk; goto out; } @@ -174,8 +223,11 @@ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) goto out; - /* Check if we have enough memory.. */ - if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) + /* + * Check if we have enough memory.. + * FIXME: this seems to be checked in do_brk ... + */ + if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT, 0)) goto out; /* Ok, looks good - let it rip. */ @@ -389,8 +441,9 @@ return 0; } -unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, unsigned long pgoff) +unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff) { struct mm_struct * mm = current->mm; struct vm_area_struct * vma, * prev; @@ -398,16 +451,20 @@ int correct_wcount = 0; int error; rb_node_t ** rb_link, * rb_parent; + unsigned long charged = 0; if (file && (!file->f_op || !file->f_op->mmap)) return -ENODEV; - if ((len = PAGE_ALIGN(len)) == 0) + if (!len) return addr; if (len > TASK_SIZE) return -EINVAL; + /* This cannot be zero now */ + len = PAGE_ALIGN(len); + /* offset overflow? */ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) return -EINVAL; @@ -482,7 +539,7 @@ munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { - if (do_munmap(mm, addr, len)) + if (do_munmap(mm, addr, len, 1)) return -ENOMEM; goto munmap_back; } @@ -492,11 +549,17 @@ > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; + if (sysctl_overcommit_memory > 1) + vm_flags &= ~MAP_NORESERVE; + /* Private writable mapping? Check memory availability.. */ - if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && - !(flags & MAP_NORESERVE) && - !vm_enough_memory(len >> PAGE_SHIFT)) - return -ENOMEM; + if ((((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE) || + (file == NULL)) && !(flags & MAP_NORESERVE)) { + charged = len >> PAGE_SHIFT; + if (!vm_enough_memory(charged, 1)) + return -ENOMEM; + vm_flags |= VM_ACCOUNT; + } /* Can we just expand an old anonymous mapping? */ if (!file && !(vm_flags & VM_SHARED) && rb_parent) @@ -508,8 +571,9 @@ * not unmapped, but the maps are removed from the list. */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + error = -ENOMEM; if (!vma) - return -ENOMEM; + goto unacct_error; vma->vm_mm = mm; vma->vm_start = addr; @@ -572,6 +636,9 @@ zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start); free_vma: kmem_cache_free(vm_area_cachep, vma); +unacct_error: + if (charged) + vm_unacct_memory(charged); return error; } @@ -714,6 +781,45 @@ return NULL; } +/* + * vma is the first one with address < vma->vm_end, + * and even address < vma->vm_start. Have to extend vma. + */ +int expand_stack(struct vm_area_struct * vma, unsigned long address) +{ + unsigned long grow; + + /* + * vma->vm_start/vm_end cannot change under us because the caller + * is required to hold the mmap_sem in write mode. We need to get + * the spinlock only before relocating the vma range ourself. + */ + address &= PAGE_MASK; + spin_lock(&vma->vm_mm->page_table_lock); + grow = (vma->vm_start - address) >> PAGE_SHIFT; + + /* Overcommit.. */ + if(!vm_enough_memory(grow, 1)) { + spin_unlock(&vma->vm_mm->page_table_lock); + return -ENOMEM; + } + + if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || + ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + current->rlim[RLIMIT_AS].rlim_cur) { + spin_unlock(&vma->vm_mm->page_table_lock); + vm_unacct_memory(grow); + return -ENOMEM; + } + vma->vm_start = address; + vma->vm_pgoff -= grow; + vma->vm_mm->total_vm += grow; + if (vma->vm_flags & VM_LOCKED) + vma->vm_mm->locked_vm += grow; + spin_unlock(&vma->vm_mm->page_table_lock); + return 0; +} + struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr) { struct vm_area_struct * vma; @@ -761,7 +867,7 @@ */ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, struct vm_area_struct *area, unsigned long addr, size_t len, - struct vm_area_struct *extra) + struct vm_area_struct *extra, int acct) { struct vm_area_struct *mpnt; unsigned long end = addr + len; @@ -776,10 +882,15 @@ area->vm_ops->close(area); if (area->vm_file) fput(area->vm_file); + if (acct) + vm_unacct_vma(area); kmem_cache_free(vm_area_cachep, area); return extra; } + if (acct && (area->vm_flags & VM_ACCOUNT)) + vm_unacct_memory(len >> PAGE_SHIFT); + /* Work out to one of the ends. */ if (end == area->vm_end) { /* @@ -894,7 +1005,7 @@ * work. This now handles partial unmappings. * Jeremy Fitzhardine */ -int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) +int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) { struct vm_area_struct *mpnt, *prev, **npp, *free, *extra; @@ -970,9 +1081,10 @@ zap_page_range(mm, st, size); /* - * Fix the mapping, and free the old area if it wasn't reused. + * Fix the mapping, and free the old area if it was not + * reused. */ - extra = unmap_fixup(mm, mpnt, st, size, extra); + extra = unmap_fixup(mm, mpnt, st, size, extra, acct); if (file) atomic_inc(&file->f_dentry->d_inode->i_writecount); } @@ -993,7 +1105,7 @@ struct mm_struct *mm = current->mm; down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); + ret = do_munmap(mm, addr, len, 1); up_write(&mm->mmap_sem); return ret; } @@ -1030,7 +1142,7 @@ munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { - if (do_munmap(mm, addr, len)) + if (do_munmap(mm, addr, len, 1)) return -ENOMEM; goto munmap_back; } @@ -1043,11 +1155,12 @@ if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (!vm_enough_memory(len >> PAGE_SHIFT, 1)) return -ENOMEM; flags = calc_vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_FIXED|MAP_PRIVATE) | mm->def_flags; + MAP_FIXED|MAP_PRIVATE) | + VM_ACCOUNT | mm->def_flags; flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; @@ -1059,8 +1172,10 @@ * create a vma struct for an anonymous mapping */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) + if (!vma) { + vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; + } vma->vm_mm = mm; vma->vm_start = addr; @@ -1121,10 +1236,18 @@ unsigned long end = mpnt->vm_end; unsigned long size = end - start; + /* + * If the VMA has been charged for, account for its + * removal + */ + if (mpnt->vm_flags & VM_ACCOUNT) + vm_unacct_vma(mpnt); + if (mpnt->vm_ops) { if (mpnt->vm_ops->close) mpnt->vm_ops->close(mpnt); } + mm->map_count--; remove_shared_vm_struct(mpnt); zap_page_range(mm, start, size); diff -urN linux-2.4.18/mm/mprotect.c linux/mm/mprotect.c --- linux-2.4.18/mm/mprotect.c Wed Jul 10 15:21:25 2002 +++ linux/mm/mprotect.c Thu Jul 11 14:11:20 2002 @@ -1,7 +1,10 @@ /* - * linux/mm/mprotect.c + * mm/mprotect.c * * (C) Copyright 1994 Linus Torvalds + * + * Address space accounting code + * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include #include @@ -241,6 +244,7 @@ { pgprot_t newprot; int error; + unsigned long charged = 0; if (newflags == vma->vm_flags) { *pprev = vma; @@ -257,9 +261,18 @@ else error = mprotect_fixup_middle(vma, pprev, start, end, newflags, newprot); - if (error) + if (error) { + if(newflags & PROT_WRITE) + vm_unacct_memory(charged); return error; + } + /* + * Delayed accounting for reduction of memory use - done last to + * avoid allocation races + */ + if (charged && !(newflags & PROT_WRITE)) + vm_unacct_memory(charged); change_protection(start, end, newprot); return 0; } diff -urN linux-2.4.18/mm/mremap.c linux/mm/mremap.c --- linux-2.4.18/mm/mremap.c Wed Jul 10 15:21:25 2002 +++ linux/mm/mremap.c Thu Jul 11 14:11:20 2002 @@ -1,7 +1,10 @@ /* - * linux/mm/remap.c + * mm/remap.c * * (C) Copyright 1996 Linus Torvalds + * + * Address space accounting code + * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include @@ -13,8 +16,6 @@ #include #include -extern int vm_enough_memory(long pages); - static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) { pgd_t * pgd; @@ -189,7 +190,11 @@ new_vma->vm_ops->open(new_vma); insert_vm_struct(current->mm, new_vma); } - do_munmap(current->mm, addr, old_len); + /* + * The old VMA has been accounted for, + * don't double account + */ + do_munmap(current->mm, addr, old_len, 0); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { current->mm->locked_vm += new_len >> PAGE_SHIFT; @@ -204,6 +209,8 @@ return -ENOMEM; } +extern int sysctl_overcommit_memory; /* FIXME!! */ + /* * Expand (or shrink) an existing mapping, potentially moving it at the * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) @@ -217,6 +224,7 @@ { struct vm_area_struct *vma; unsigned long ret = -EINVAL; + unsigned long charged = 0; if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -246,16 +254,17 @@ if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - do_munmap(current->mm, new_addr, new_len); + do_munmap(current->mm, new_addr, new_len, 1); } /* * Always allow a shrinking remap: that just unmaps * the unnecessary pages.. + * do_munmap does all the needed commit accounting */ ret = addr; if (old_len >= new_len) { - do_munmap(current->mm, addr+new_len, old_len - new_len); + do_munmap(current->mm, addr+new_len, old_len - new_len, 1); if (!(flags & MREMAP_FIXED) || (new_addr == addr)) goto out; } @@ -285,11 +294,14 @@ if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) > current->rlim[RLIMIT_AS].rlim_cur) goto out; - /* Private writable mapping? Check memory availability.. */ - if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && - !(flags & MAP_NORESERVE) && - !vm_enough_memory((new_len - old_len) >> PAGE_SHIFT)) - goto out; + + if (sysctl_overcommit_memory > 1) + flags &= ~MAP_NORESERVE; + if (vma->vm_flags & VM_ACCOUNT) { + charged = (new_len - old_len) >> PAGE_SHIFT; + if (!vm_enough_memory(charged, 1)) + goto out_nc; + } /* old_len exactly to the end of the area.. * And we're not relocating the area. @@ -336,6 +348,9 @@ ret = move_vma(vma, addr, old_len, new_len, new_addr); } out: + if (ret & ~PAGE_MASK) + vm_unacct_memory(charged); +out_nc: return ret; } diff -urN linux-2.4.18/mm/shmem.c linux/mm/shmem.c --- linux-2.4.18/mm/shmem.c Wed Jul 10 15:21:25 2002 +++ linux/mm/shmem.c Thu Jul 11 14:11:20 2002 @@ -5,7 +5,8 @@ * 2000 Transmeta Corp. * 2000-2001 Christoph Rohland * 2000-2001 SAP AG - * + * 2002 Red Hat Inc. + * * This file is released under the GPL. */ @@ -21,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -330,10 +332,38 @@ up(&info->sem); } +static int shmem_notify_change(struct dentry * dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + if (attr->ia_valid & ATTR_SIZE) { + /* + * Account swap file usage based on new file size + */ + long change = (attr->ia_size>>PAGE_SHIFT) - (inode->i_size >> PAGE_SHIFT); + + if (attr->ia_size > inode->i_size) { + if (!vm_enough_memory(change,1)) + return -ENOMEM; + } else + vm_unacct_memory(-change); + } + + error = inode_change_ok(inode, attr); + if (!error) + error = inode_setattr(inode, attr); + + return error; +} + + static void shmem_delete_inode(struct inode * inode) { struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + vm_unacct_memory((inode->i_size) >> PAGE_SHIFT); + inode->i_size = 0; if (inode->i_op->truncate == shmem_truncate){ spin_lock (&shmem_ilock); @@ -751,6 +781,7 @@ unsigned long written; long status; int err; + loff_t maxpos; if ((ssize_t) count < 0) return -EINVAL; @@ -763,12 +794,12 @@ pos = *ppos; err = -EINVAL; if (pos < 0) - goto out; + goto out_nc; err = file->f_error; if (err) { file->f_error = 0; - goto out; + goto out_nc; } written = 0; @@ -776,6 +807,15 @@ if (file->f_flags & O_APPEND) pos = inode->i_size; + maxpos = inode->i_size; + if (pos + count > inode->i_size) { + maxpos = pos + count; + if (!vm_enough_memory((maxpos - inode->i_size) >> PAGE_SHIFT, 1)) { + err = -ENOMEM; + goto out_nc; + } + } + /* * Check whether we've reached the file size limit. */ @@ -865,6 +905,10 @@ err = written ? written : status; out: + /* Short writes give back address space */ + if (inode->i_size != maxpos) + vm_unacct_memory((maxpos - inode->i_size) >> PAGE_SHIFT); +out_nc: up(&inode->i_sem); return err; fail_write: @@ -1344,6 +1388,7 @@ static struct inode_operations shmem_inode_operations = { truncate: shmem_truncate, + setattr: shmem_notify_change, }; static struct file_operations shmem_dir_operations = { @@ -1442,17 +1487,16 @@ */ struct file *shmem_file_setup(char * name, loff_t size) { - int error; + int error = -ENOMEM; struct file *file; struct inode * inode; struct dentry *dentry, *root; struct qstr this; - int vm_enough_memory(long pages); if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT) return ERR_PTR(-EINVAL); - if (!vm_enough_memory((size) >> PAGE_CACHE_SHIFT)) + if (!vm_enough_memory((size) >> PAGE_CACHE_SHIFT, 1)) return ERR_PTR(-ENOMEM); this.name = name; @@ -1461,7 +1505,7 @@ root = shm_mnt->mnt_root; dentry = d_alloc(root, &this); if (!dentry) - return ERR_PTR(-ENOMEM); + goto put_memory; error = -ENFILE; file = get_empty_filp(); @@ -1487,6 +1531,8 @@ put_filp(file); put_dentry: dput (dentry); +put_memory: + vm_unacct_memory((size) >> PAGE_CACHE_SHIFT); return ERR_PTR(error); } /*