From: Andi Kleen NUMA API adds a policy to each VMA. During VMA creattion, merging and splitting these policies must be handled properly. This patch adds the calls to this. It is a nop when CONFIG_NUMA is not defined. --- 25-akpm/arch/ia64/ia32/binfmt_elf32.c | 2 ++ 25-akpm/arch/ia64/kernel/perfmon.c | 1 + 25-akpm/arch/ia64/mm/init.c | 2 ++ 25-akpm/arch/m68k/atari/stram.c | 2 +- 25-akpm/arch/s390/kernel/compat_exec.c | 1 + 25-akpm/arch/x86_64/ia32/ia32_binfmt.c | 1 + 25-akpm/fs/exec.c | 1 + 25-akpm/kernel/exit.c | 1 + 25-akpm/kernel/fork.c | 18 +++++++++++++++++- 25-akpm/mm/mmap.c | 31 ++++++++++++++++++++++++++----- 25-akpm/mm/mprotect.c | 5 +++++ 11 files changed, 58 insertions(+), 7 deletions(-) diff -puN arch/ia64/ia32/binfmt_elf32.c~numa-api-vma-policy-hooks arch/ia64/ia32/binfmt_elf32.c --- 25/arch/ia64/ia32/binfmt_elf32.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.703390120 -0700 +++ 25-akpm/arch/ia64/ia32/binfmt_elf32.c 2004-04-10 01:35:05.720387536 -0700 @@ -104,6 +104,7 @@ ia64_elf32_init (struct pt_regs *regs) vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; + mpol_set_vma_default(vma); down_write(¤t->mm->mmap_sem); { insert_vm_struct(current->mm, vma); @@ -190,6 +191,7 @@ ia32_setup_arg_pages (struct linux_binpr mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; mpnt->vm_private_data = 0; + mpol_set_vma_default(mpnt); insert_vm_struct(current->mm, mpnt); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; } diff -puN arch/ia64/kernel/perfmon.c~numa-api-vma-policy-hooks arch/ia64/kernel/perfmon.c --- 25/arch/ia64/kernel/perfmon.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.705389816 -0700 +++ 25-akpm/arch/ia64/kernel/perfmon.c 2004-04-10 01:35:05.724386928 -0700 @@ -2273,6 +2273,7 @@ pfm_smpl_buffer_alloc(struct task_struct vma->vm_ops = &pfm_vm_ops; vma->vm_pgoff = 0; vma->vm_file = NULL; + mpol_set_vma_default(vma); vma->vm_private_data = ctx; /* information needed by the pfm_vm_close() function */ /* diff -puN arch/ia64/mm/init.c~numa-api-vma-policy-hooks arch/ia64/mm/init.c --- 25/arch/ia64/mm/init.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.706389664 -0700 +++ 25-akpm/arch/ia64/mm/init.c 2004-04-10 01:35:05.725386776 -0700 @@ -131,6 +131,7 @@ ia64_init_addr_space (void) vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; + mpol_set_vma_default(vma); insert_vm_struct(current->mm, vma); } @@ -143,6 +144,7 @@ ia64_init_addr_space (void) vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; + mpol_set_vma_default(vma); insert_vm_struct(current->mm, vma); } } diff -puN arch/m68k/atari/stram.c~numa-api-vma-policy-hooks arch/m68k/atari/stram.c --- 25/arch/m68k/atari/stram.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.707389512 -0700 +++ 25-akpm/arch/m68k/atari/stram.c 2004-04-10 01:35:05.726386624 -0700 @@ -752,7 +752,7 @@ static int unswap_by_read(unsigned short /* Get a page for the entry, using the existing swap cache page if there is one. Otherwise, get a clean page and read the swap into it. */ - page = read_swap_cache_async(entry); + page = read_swap_cache_async(entry, NULL, 0); if (!page) { swap_free(entry); return -ENOMEM; diff -puN arch/s390/kernel/compat_exec.c~numa-api-vma-policy-hooks arch/s390/kernel/compat_exec.c --- 25/arch/s390/kernel/compat_exec.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.709389208 -0700 +++ 25-akpm/arch/s390/kernel/compat_exec.c 2004-04-10 01:35:05.726386624 -0700 @@ -72,6 +72,7 @@ int setup_arg_pages32(struct linux_binpr mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; + mpol_set_vma_default(mpnt); INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); diff -puN arch/x86_64/ia32/ia32_binfmt.c~numa-api-vma-policy-hooks arch/x86_64/ia32/ia32_binfmt.c --- 25/arch/x86_64/ia32/ia32_binfmt.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.710389056 -0700 +++ 25-akpm/arch/x86_64/ia32/ia32_binfmt.c 2004-04-10 01:35:05.727386472 -0700 @@ -365,6 +365,7 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; + mpol_set_vma_default(mpnt); INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); diff -puN fs/exec.c~numa-api-vma-policy-hooks fs/exec.c --- 25/fs/exec.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.711388904 -0700 +++ 25-akpm/fs/exec.c 2004-04-10 01:35:05.728386320 -0700 @@ -438,6 +438,7 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; + mpol_set_vma_default(mpnt); INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); diff -puN kernel/exit.c~numa-api-vma-policy-hooks kernel/exit.c --- 25/kernel/exit.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.713388600 -0700 +++ 25-akpm/kernel/exit.c 2004-04-10 01:35:05.729386168 -0700 @@ -777,6 +777,7 @@ asmlinkage NORET_TYPE void do_exit(long __exit_fs(tsk); exit_namespace(tsk); exit_thread(); + mpol_free(tsk->mempolicy); if (tsk->signal->leader) disassociate_ctty(1); diff -puN kernel/fork.c~numa-api-vma-policy-hooks kernel/fork.c --- 25/kernel/fork.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.714388448 -0700 +++ 25-akpm/kernel/fork.c 2004-04-10 01:35:05.730386016 -0700 @@ -272,6 +272,7 @@ static inline int dup_mmap(struct mm_str struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge = 0; + struct mempolicy *pol; down_write(&oldmm->mmap_sem); flush_cache_mm(current->mm); @@ -313,6 +314,11 @@ static inline int dup_mmap(struct mm_str if (!tmp) goto fail_nomem; *tmp = *mpnt; + pol = mpol_copy(vma_policy(mpnt)); + retval = PTR_ERR(pol); + if (IS_ERR(pol)) + goto fail_nomem_policy; + vma_set_policy(tmp, pol); tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; tmp->vm_next = NULL; @@ -359,6 +365,8 @@ out: flush_tlb_mm(current->mm); up_write(&oldmm->mmap_sem); return retval; +fail_nomem_policy: + kmem_cache_free(vm_area_cachep, tmp); fail_nomem: retval = -ENOMEM; fail: @@ -953,10 +961,16 @@ struct task_struct *copy_process(unsigne p->security = NULL; p->io_context = NULL; p->audit_context = NULL; + p->mempolicy = mpol_copy(p->mempolicy); + if (IS_ERR(p->mempolicy)) { + retval = PTR_ERR(p->mempolicy); + p->mempolicy = NULL; + goto bad_fork_cleanup; + } retval = -ENOMEM; if ((retval = security_task_alloc(p))) - goto bad_fork_cleanup; + goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ @@ -1102,6 +1116,8 @@ bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); +bad_fork_cleanup_policy: + mpol_free(p->mempolicy); bad_fork_cleanup: if (p->pid > 0) free_pidmap(p->pid); diff -puN mm/mmap.c~numa-api-vma-policy-hooks mm/mmap.c --- 25/mm/mmap.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.716388144 -0700 +++ 25-akpm/mm/mmap.c 2004-04-10 01:35:05.732385712 -0700 @@ -389,7 +389,8 @@ static struct vm_area_struct *vma_merge( struct vm_area_struct *prev, struct rb_node *rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags, - struct file *file, unsigned long pgoff) + struct file *file, unsigned long pgoff, + struct mempolicy *policy) { spinlock_t *lock = &mm->page_table_lock; struct inode *inode = file ? file->f_dentry->d_inode : NULL; @@ -413,6 +414,7 @@ static struct vm_area_struct *vma_merge( * Can it merge with the predecessor? */ if (prev->vm_end == addr && + mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, file, pgoff)) { struct vm_area_struct *next; int need_up = 0; @@ -430,6 +432,7 @@ static struct vm_area_struct *vma_merge( */ next = prev->vm_next; if (next && prev->vm_end == next->vm_start && + vma_mpol_equal(prev, next) && can_vma_merge_before(next, vm_flags, file, pgoff, (end - addr) >> PAGE_SHIFT)) { prev->vm_end = next->vm_end; @@ -442,6 +445,7 @@ static struct vm_area_struct *vma_merge( fput(file); mm->map_count--; + mpol_free(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); return prev; } @@ -457,6 +461,8 @@ static struct vm_area_struct *vma_merge( prev = prev->vm_next; if (prev) { merge_next: + if (!mpol_equal(policy, vma_policy(prev))) + return 0; if (!can_vma_merge_before(prev, vm_flags, file, pgoff, (end - addr) >> PAGE_SHIFT)) return NULL; @@ -633,7 +639,7 @@ munmap_back: /* Can we just expand an old anonymous mapping? */ if (!file && !(vm_flags & VM_SHARED) && rb_parent) if (vma_merge(mm, prev, rb_parent, addr, addr + len, - vm_flags, NULL, 0)) + vm_flags, NULL, pgoff, NULL)) goto out; /* @@ -656,6 +662,7 @@ munmap_back: vma->vm_file = NULL; vma->vm_private_data = NULL; vma->vm_next = NULL; + mpol_set_vma_default(vma); INIT_LIST_HEAD(&vma->shared); if (file) { @@ -695,7 +702,9 @@ munmap_back: addr = vma->vm_start; if (!file || !rb_parent || !vma_merge(mm, prev, rb_parent, addr, - addr + len, vma->vm_flags, file, pgoff)) { + vma->vm_end, + vma->vm_flags, file, pgoff, + vma_policy(vma))) { vma_link(mm, vma, prev, rb_link, rb_parent); if (correct_wcount) atomic_inc(&inode->i_writecount); @@ -705,6 +714,7 @@ munmap_back: atomic_inc(&inode->i_writecount); fput(file); } + mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); } out: @@ -1120,6 +1130,7 @@ static void unmap_vma(struct mm_struct * remove_shared_vm_struct(area); + mpol_free(vma_policy(area)); if (area->vm_ops && area->vm_ops->close) area->vm_ops->close(area); if (area->vm_file) @@ -1202,6 +1213,7 @@ detach_vmas_to_be_unmapped(struct mm_str int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below) { + struct mempolicy *pol; struct vm_area_struct *new; struct address_space *mapping = NULL; @@ -1224,6 +1236,13 @@ int split_vma(struct mm_struct * mm, str new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } + pol = mpol_copy(vma_policy(vma)); + if (IS_ERR(pol)) { + kmem_cache_free(vm_area_cachep, new); + return PTR_ERR(pol); + } + vma_set_policy(new, pol); + if (new->vm_file) get_file(new->vm_file); @@ -1393,7 +1412,7 @@ unsigned long do_brk(unsigned long addr, /* Can we just expand an old anonymous mapping? */ if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, - flags, NULL, 0)) + flags, NULL, 0, NULL)) goto out; /* @@ -1414,6 +1433,7 @@ unsigned long do_brk(unsigned long addr, vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; + mpol_set_vma_default(vma); INIT_LIST_HEAD(&vma->shared); vma_link(mm, vma, prev, rb_link, rb_parent); @@ -1474,6 +1494,7 @@ void exit_mmap(struct mm_struct *mm) } if (vma->vm_file) fput(vma->vm_file); + mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); vma = next; } @@ -1507,7 +1528,7 @@ struct vm_area_struct *copy_vma(struct v find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len, - vma->vm_flags, vma->vm_file, pgoff); + vma->vm_flags, vma->vm_file, pgoff, vma->vm_policy); if (!new_vma) { new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (new_vma) { diff -puN mm/mprotect.c~numa-api-vma-policy-hooks mm/mprotect.c --- 25/mm/mprotect.c~numa-api-vma-policy-hooks 2004-04-10 01:35:05.717387992 -0700 +++ 25-akpm/mm/mprotect.c 2004-04-10 01:35:05.733385560 -0700 @@ -124,6 +124,8 @@ mprotect_attempt_merge(struct vm_area_st return 0; if (vma->vm_file || (vma->vm_flags & VM_SHARED)) return 0; + if (!vma_mpol_equal(vma, prev)) + return 0; /* * If the whole area changes to the protection of the previous one @@ -135,6 +137,7 @@ mprotect_attempt_merge(struct vm_area_st __vma_unlink(mm, vma, prev); spin_unlock(&mm->page_table_lock); + mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); mm->map_count--; return 1; @@ -317,12 +320,14 @@ sys_mprotect(unsigned long start, size_t if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) && + vma_mpol_equal(prev, next) && !prev->vm_file && !(prev->vm_flags & VM_SHARED)) { spin_lock(&prev->vm_mm->page_table_lock); prev->vm_end = next->vm_end; __vma_unlink(prev->vm_mm, next, prev); spin_unlock(&prev->vm_mm->page_table_lock); + mpol_free(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); prev->vm_mm->map_count--; } _