From: Andy Whitcroft Two problems: a) The memory overcommit code fails oto take into account all the pages which are pinned by being reserved for the hugetlbpage pool b) We're performing overcommit accounting and checking on behalf of hugetlbpage vmas. The main thrust is to ensure that VM_ACCOUNT actually only gets set on vma's which are indeed accountable. With that ensured much of the rest comes out in the wash. It also removes the hugetlb memory for the overcommit_memory=2 case. --- 25-akpm/arch/i386/mm/hugetlbpage.c | 6 ++++++ 25-akpm/arch/ia64/mm/hugetlbpage.c | 6 ++++++ 25-akpm/arch/ppc64/mm/hugetlbpage.c | 6 ++++++ 25-akpm/arch/sh/mm/hugetlbpage.c | 6 ++++++ 25-akpm/arch/sparc64/mm/hugetlbpage.c | 6 ++++++ 25-akpm/include/linux/hugetlb.h | 5 +++++ 25-akpm/include/linux/mm.h | 3 +++ 25-akpm/mm/mmap.c | 7 ++++++- 25-akpm/mm/mprotect.c | 3 ++- 25-akpm/security/commoncap.c | 4 +++- 25-akpm/security/dummy.c | 4 +++- 25-akpm/security/selinux/hooks.c | 4 +++- 12 files changed, 55 insertions(+), 5 deletions(-) diff -puN arch/i386/mm/hugetlbpage.c~hugetlb-overcommit-fix arch/i386/mm/hugetlbpage.c --- 25/arch/i386/mm/hugetlbpage.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.161352024 -0800 +++ 25-akpm/arch/i386/mm/hugetlbpage.c 2004-03-26 02:03:30.177349592 -0800 @@ -548,6 +548,12 @@ int is_hugepage_mem_enough(size_t size) return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; } +/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ +unsigned long hugetlb_total_pages(void) +{ + return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE); +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the diff -puN arch/ia64/mm/hugetlbpage.c~hugetlb-overcommit-fix arch/ia64/mm/hugetlbpage.c --- 25/arch/ia64/mm/hugetlbpage.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.163351720 -0800 +++ 25-akpm/arch/ia64/mm/hugetlbpage.c 2004-03-26 02:03:30.178349440 -0800 @@ -592,6 +592,12 @@ int is_hugepage_mem_enough(size_t size) return 1; } +/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ +unsigned long hugetlb_total_pages(void) +{ + return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE); +} + static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int *unused) { BUG(); diff -puN arch/ppc64/mm/hugetlbpage.c~hugetlb-overcommit-fix arch/ppc64/mm/hugetlbpage.c --- 25/arch/ppc64/mm/hugetlbpage.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.164351568 -0800 +++ 25-akpm/arch/ppc64/mm/hugetlbpage.c 2004-03-26 02:03:30.179349288 -0800 @@ -912,6 +912,12 @@ int is_hugepage_mem_enough(size_t size) return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpage_free; } +/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ +unsigned long hugetlb_total_pages(void) +{ + return htlbpage_total * (HPAGE_SIZE / PAGE_SIZE); +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the diff -puN arch/sparc64/mm/hugetlbpage.c~hugetlb-overcommit-fix arch/sparc64/mm/hugetlbpage.c --- 25/arch/sparc64/mm/hugetlbpage.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.166351264 -0800 +++ 25-akpm/arch/sparc64/mm/hugetlbpage.c 2004-03-26 02:03:30.180349136 -0800 @@ -497,6 +497,12 @@ int is_hugepage_mem_enough(size_t size) return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; } +/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ +unsigned long hugetlb_total_pages(void) +{ + return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE); +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the diff -puN include/linux/hugetlb.h~hugetlb-overcommit-fix include/linux/hugetlb.h --- 25/include/linux/hugetlb.h~hugetlb-overcommit-fix 2004-03-26 02:03:30.167351112 -0800 +++ 25-akpm/include/linux/hugetlb.h 2004-03-26 02:03:30.180349136 -0800 @@ -19,6 +19,7 @@ int hugetlb_prefault(struct address_spac void huge_page_release(struct page *); int hugetlb_report_meminfo(char *); int is_hugepage_mem_enough(size_t); +unsigned long hugetlb_total_pages(void); struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write); struct vm_area_struct *hugepage_vma(struct mm_struct *mm, @@ -48,6 +49,10 @@ static inline int is_vm_hugetlb_page(str { return 0; } +static inline unsigned long hugetlb_total_pages(void) +{ + return 0; +} #define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; }) #define follow_huge_addr(mm, vma, addr, write) 0 diff -puN include/linux/mm.h~hugetlb-overcommit-fix include/linux/mm.h --- 25/include/linux/mm.h~hugetlb-overcommit-fix 2004-03-26 02:03:30.168350960 -0800 +++ 25-akpm/include/linux/mm.h 2004-03-26 02:03:30.181348984 -0800 @@ -112,6 +112,9 @@ struct vm_area_struct { #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ +/* It makes sense to apply VM_ACCOUNT to this vma. */ +#define VM_MAYACCT(vma) (!!((vma)->vm_flags & VM_HUGETLB)) + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif diff -puN mm/mmap.c~hugetlb-overcommit-fix mm/mmap.c --- 25/mm/mmap.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.170350656 -0800 +++ 25-akpm/mm/mmap.c 2004-03-26 02:03:30.182348832 -0800 @@ -489,9 +489,13 @@ unsigned long do_mmap_pgoff(struct file int correct_wcount = 0; int error; struct rb_node ** rb_link, * rb_parent; + int accountable = 1; unsigned long charged = 0; if (file) { + if (is_file_hugepages(file)) + accountable = 0; + if (!file->f_op || !file->f_op->mmap) return -ENODEV; @@ -608,7 +612,8 @@ munmap_back: > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; - if (!(flags & MAP_NORESERVE) || sysctl_overcommit_memory > 1) { + if (accountable && (!(flags & MAP_NORESERVE) || + sysctl_overcommit_memory > 1)) { if (vm_flags & VM_SHARED) { /* Check memory availability in shmem_file_setup? */ vm_flags |= VM_ACCOUNT; diff -puN mm/mprotect.c~hugetlb-overcommit-fix mm/mprotect.c --- 25/mm/mprotect.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.171350504 -0800 +++ 25-akpm/mm/mprotect.c 2004-03-26 02:03:30.182348832 -0800 @@ -173,7 +173,8 @@ mprotect_fixup(struct vm_area_struct *vm * a MAP_NORESERVE private mapping to writable will now reserve. */ if (newflags & VM_WRITE) { - if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { + if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED)) + && VM_MAYACCT(vma)) { charged = (end - start) >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) return -ENOMEM; diff -puN security/commoncap.c~hugetlb-overcommit-fix security/commoncap.c --- 25/security/commoncap.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.172350352 -0800 +++ 25-akpm/security/commoncap.c 2004-03-26 02:03:30.183348680 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include int cap_capable (struct task_struct *tsk, int cap) { @@ -358,7 +359,8 @@ int cap_vm_enough_memory(long pages) return -ENOMEM; } - allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed = (totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100; allowed += total_swap_pages; if (atomic_read(&vm_committed_space) < allowed) diff -puN security/dummy.c~hugetlb-overcommit-fix security/dummy.c --- 25/security/dummy.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.174350048 -0800 +++ 25-akpm/security/dummy.c 2004-03-26 02:03:30.184348528 -0800 @@ -25,6 +25,7 @@ #include #include #include +#include static int dummy_ptrace (struct task_struct *parent, struct task_struct *child) { @@ -146,7 +147,8 @@ static int dummy_vm_enough_memory(long p return -ENOMEM; } - allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed = (totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100; allowed += total_swap_pages; if (atomic_read(&vm_committed_space) < allowed) diff -puN security/selinux/hooks.c~hugetlb-overcommit-fix security/selinux/hooks.c --- 25/security/selinux/hooks.c~hugetlb-overcommit-fix 2004-03-26 02:03:30.175349896 -0800 +++ 25-akpm/security/selinux/hooks.c 2004-03-26 02:03:30.186348224 -0800 @@ -59,6 +59,7 @@ #include /* for Unix socket types */ #include #include +#include #include "avc.h" #include "objsec.h" @@ -1545,7 +1546,8 @@ static int selinux_vm_enough_memory(long return -ENOMEM; } - allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed = (totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100; allowed += total_swap_pages; if (atomic_read(&vm_committed_space) < allowed) diff -puN arch/sh/mm/hugetlbpage.c~hugetlb-overcommit-fix arch/sh/mm/hugetlbpage.c --- 25/arch/sh/mm/hugetlbpage.c~hugetlb-overcommit-fix 2004-03-26 02:07:12.911488856 -0800 +++ 25-akpm/arch/sh/mm/hugetlbpage.c 2004-03-26 02:07:49.756887504 -0800 @@ -501,6 +501,12 @@ int is_hugepage_mem_enough(size_t size) return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; } +/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ +unsigned long hugetlb_total_pages(void) +{ + return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE); +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the _