From: Hugh Dickins Here's a second patch to add to the first: mremap's cows can't come home without releasing the i_shared_lock, better move the whole "Subtle point" locking from move_vma into move_page_tables. And it's possible for the file that was behind an anonymous page to be truncated while we drop that lock, don't want to abort mremap because of VM_FAULT_SIGBUS. (Eek, should we be checking do_swap_page of a vm_file area against the truncate_count sequence? Technically yes, but I doubt we need bother.) --- 25-akpm/include/linux/rmap.h | 10 ++-------- 25-akpm/mm/mremap.c | 34 ++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff -puN include/linux/rmap.h~i_shared_lock-fix-2 include/linux/rmap.h --- 25/include/linux/rmap.h~i_shared_lock-fix-2 Wed Apr 21 14:21:46 2004 +++ 25-akpm/include/linux/rmap.h Wed Apr 21 14:21:46 2004 @@ -69,15 +69,9 @@ static inline int mremap_moved_anon_rmap static inline int make_page_exclusive(struct vm_area_struct *vma, unsigned long addr) { - switch (handle_mm_fault(vma->vm_mm, vma, addr, 1)) { - case VM_FAULT_MINOR: - case VM_FAULT_MAJOR: + if (handle_mm_fault(vma->vm_mm, vma, addr, 1) != VM_FAULT_OOM) return 0; - case VM_FAULT_OOM: - return -ENOMEM; - default: - return -EFAULT; - } + return -ENOMEM; } /* diff -puN mm/mremap.c~i_shared_lock-fix-2 mm/mremap.c --- 25/mm/mremap.c~i_shared_lock-fix-2 Wed Apr 21 14:21:46 2004 +++ 25-akpm/mm/mremap.c Wed Apr 21 14:21:46 2004 @@ -143,10 +143,22 @@ static int move_page_tables(struct vm_ar unsigned long new_addr, unsigned long old_addr, unsigned long len, int *cows) { + struct address_space *mapping = NULL; unsigned long offset; flush_cache_range(vma, old_addr, old_addr + len); + if (vma->vm_file) { + /* + * Subtle point from Rajesh Venkatasubramanian: before + * moving file-based ptes, we must lock vmtruncate out, + * since it might clean the dst vma before the src vma, + * and we propagate stale pages into the dst afterward. + */ + mapping = vma->vm_file->f_mapping; + spin_lock(&mapping->i_shared_lock); + } + /* * This is not the clever way to do this, but we're taking the * easy way out on the assumption that most remappings will be @@ -163,14 +175,21 @@ static int move_page_tables(struct vm_ar * brought back in (if it's still shared by then). */ if (ret == -EAGAIN) { + if (mapping) + spin_unlock(&mapping->i_shared_lock); + cond_resched(); ret = make_page_exclusive(vma, old_addr+offset); + if (mapping) + spin_lock(&mapping->i_shared_lock); offset -= PAGE_SIZE; (*cows)++; } if (ret) break; - cond_resched(); } + + if (mapping) + spin_unlock(&mapping->i_shared_lock); return offset; } @@ -179,7 +198,6 @@ static unsigned long move_vma(struct vm_ unsigned long new_len, unsigned long new_addr) { struct mm_struct *mm = vma->vm_mm; - struct address_space *mapping = NULL; struct vm_area_struct *new_vma; unsigned long vm_flags = vma->vm_flags; unsigned long new_pgoff; @@ -200,16 +218,6 @@ static unsigned long move_vma(struct vm_ if (!new_vma) return -ENOMEM; - if (vma->vm_file) { - /* - * Subtle point from Rajesh Venkatasubramanian: before - * moving file-based ptes, we must lock vmtruncate out, - * since it might clean the dst vma before the src vma, - * and we propagate stale pages into the dst afterward. - */ - mapping = vma->vm_file->f_mapping; - spin_lock(&mapping->i_shared_lock); - } moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows); if (moved_len < old_len) { /* @@ -226,8 +234,6 @@ static unsigned long move_vma(struct vm_ if (cows) /* Downgrade or remove this message later */ printk(KERN_WARNING "%s: mremap moved %d cows\n", current->comm, cows); - if (mapping) - spin_unlock(&mapping->i_shared_lock); /* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT) { _