From: Ingo Molnar - fixed the 'else' branch in both filemap_populate and shmem_populate. We cannot just skip setting the file-pte if the new mapping is linear - a nonlinear entry can be linear just by chance as well. So we must overwrite the pte in every case. I've pushed one linear/nonlinear optimization into install_file_pte: if the pte is empty _and_ the mapping is linear then we can leave the pte alone. - fixed MAP_POPULATE: since 'prot' is nonzero in all the interesting do_mmap() situations, the old version of sys_remap_file_pages() just punted on it. - minor detail: filemap_getpage(nonblock) case now returns NULL if a page is present but not uptodate. (because in the !uptodate case we start a wait which is contrary to nonblock.) - minor detail: cleaned up the exit path of do_no_page(). 25-akpm/include/linux/mm.h | 2 +- 25-akpm/mm/filemap.c | 24 ++++++++++-------------- 25-akpm/mm/fremap.c | 32 ++++++++++++++++++++++++++++---- 25-akpm/mm/memory.c | 9 ++++----- 25-akpm/mm/mmap.c | 2 +- 25-akpm/mm/shmem.c | 19 +++++++------------ 6 files changed, 51 insertions(+), 37 deletions(-) diff -puN include/linux/mm.h~remap_file_pages-fixes-2.6.1-A3 include/linux/mm.h --- 25/include/linux/mm.h~remap_file_pages-fixes-2.6.1-A3 Mon Jan 5 14:27:10 2004 +++ 25-akpm/include/linux/mm.h Mon Jan 5 14:27:10 2004 @@ -441,7 +441,7 @@ extern pmd_t *FASTCALL(__pmd_alloc(struc extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); -extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); +extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot, int linear); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); diff -puN mm/filemap.c~remap_file_pages-fixes-2.6.1-A3 mm/filemap.c --- 25/mm/filemap.c~remap_file_pages-fixes-2.6.1-A3 Mon Jan 5 14:27:10 2004 +++ 25-akpm/mm/filemap.c Mon Jan 5 14:27:10 2004 @@ -1206,8 +1206,13 @@ retry_find: * Ok, found a page in the page cache, now we need to check * that it's up-to-date. */ - if (!PageUptodate(page)) + if (!PageUptodate(page)) { + if (nonblock) { + page_cache_release(page); + return NULL; + } goto page_not_uptodate; + } success: /* @@ -1300,6 +1305,7 @@ static int filemap_populate(struct vm_ar { struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; + int linear = !(vma->vm_flags & VM_NONLINEAR); struct inode *inode = mapping->host; unsigned long size; struct mm_struct *mm = vma->vm_mm; @@ -1325,19 +1331,9 @@ repeat: return err; } } else { - /* - * If a nonlinear mapping then store the file page offset - * in the pte. - */ - unsigned long pgidx; - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (pgoff != pgidx) { - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } + err = install_file_pte(mm, vma, addr, pgoff, prot, linear); + if (err) + return err; } len -= PAGE_SIZE; diff -puN mm/fremap.c~remap_file_pages-fixes-2.6.1-A3 mm/fremap.c --- 25/mm/fremap.c~remap_file_pages-fixes-2.6.1-A3 Mon Jan 5 14:27:10 2004 +++ 25-akpm/mm/fremap.c Mon Jan 5 14:27:10 2004 @@ -19,7 +19,7 @@ #include #include -static inline int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, +static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; @@ -55,7 +55,7 @@ static inline int zap_pte(struct mm_stru * previously existing mapping. */ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, struct page *page, pgprot_t prot) + unsigned long addr, struct page *page, pgprot_t pgprot) { int err = -ENOMEM, flush; pte_t *pte; @@ -78,11 +78,19 @@ int install_page(struct mm_struct *mm, s if (!pte) goto err_unlock; + /* + * Only install a new page for a non-shared mapping if it's + * not existent yet: + */ + err = -EEXIST; + if (!pte_none(*pte) && !(vma->vm_flags & VM_SHARED)) + goto err_unlock; + flush = zap_pte(mm, vma, addr, pte); mm->rss++; flush_icache_page(vma, page); - set_pte(pte, mk_pte(page, prot)); + set_pte(pte, mk_pte(page, pgprot)); pte_chain = page_add_rmap(page, pte, pte_chain); pte_val = *pte; pte_unmap(pte); @@ -107,7 +115,7 @@ EXPORT_SYMBOL(install_page); * previously existing mapping. */ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long pgoff, pgprot_t prot) + unsigned long addr, unsigned long pgoff, pgprot_t pgprot, int linear) { int err = -ENOMEM, flush; pte_t *pte; @@ -115,6 +123,8 @@ int install_file_pte(struct mm_struct *m pmd_t *pmd; pte_t pte_val; + BUG_ON(!linear && !(vma->vm_flags & VM_SHARED)); + pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); @@ -126,6 +136,20 @@ int install_file_pte(struct mm_struct *m if (!pte) goto err_unlock; + /* + * Skip linear non-existent ptes: + */ + err = 0; + if (linear && pte_none(*pte)) + goto err_unlock; + /* + * Only install a new page for a non-shared mapping if it's + * not existent yet: + */ + err = -EEXIST; + if (!pte_none(*pte) && !(vma->vm_flags & VM_SHARED)) + goto err_unlock; + flush = zap_pte(mm, vma, addr, pte); set_pte(pte, pgoff_to_pte(pgoff)); diff -puN mm/memory.c~remap_file_pages-fixes-2.6.1-A3 mm/memory.c --- 25/mm/memory.c~remap_file_pages-fixes-2.6.1-A3 Mon Jan 5 14:27:10 2004 +++ 25-akpm/mm/memory.c Mon Jan 5 14:27:10 2004 @@ -1439,9 +1439,10 @@ retry: if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; + ret = VM_FAULT_MAJOR; pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) - goto oom; + goto out; /* * Should we do an early C-O-W break? @@ -1450,7 +1451,7 @@ retry: struct page * page = alloc_page(GFP_HIGHUSER); if (!page) { page_cache_release(new_page); - goto oom; + goto out; } copy_user_highpage(page, new_page, address); page_cache_release(new_page); @@ -1506,9 +1507,7 @@ retry: /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); spin_unlock(&mm->page_table_lock); - goto out; -oom: - ret = VM_FAULT_OOM; + ret = VM_FAULT_MAJOR; out: pte_chain_free(pte_chain); return ret; diff -puN mm/mmap.c~remap_file_pages-fixes-2.6.1-A3 mm/mmap.c --- 25/mm/mmap.c~remap_file_pages-fixes-2.6.1-A3 Mon Jan 5 14:27:10 2004 +++ 25-akpm/mm/mmap.c Mon Jan 5 14:27:10 2004 @@ -690,7 +690,7 @@ out: } if (flags & MAP_POPULATE) { up_write(&mm->mmap_sem); - sys_remap_file_pages(addr, len, prot, + sys_remap_file_pages(addr, len, 0, pgoff, flags & MAP_NONBLOCK); down_write(&mm->mmap_sem); } diff -puN mm/shmem.c~remap_file_pages-fixes-2.6.1-A3 mm/shmem.c --- 25/mm/shmem.c~remap_file_pages-fixes-2.6.1-A3 Mon Jan 5 14:27:10 2004 +++ 25-akpm/mm/shmem.c Mon Jan 5 14:27:10 2004 @@ -1002,6 +1002,7 @@ static int shmem_populate(struct vm_area struct mm_struct *mm = vma->vm_mm; enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; unsigned long size; + int linear = !(vma->vm_flags & VM_NONLINEAR); size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) @@ -1023,20 +1024,14 @@ static int shmem_populate(struct vm_area page_cache_release(page); return err; } - } else if (nonblock) { + } else { /* - * If a nonlinear mapping then store the file page - * offset in the pte. + * Store the file page offset in the pte: */ - unsigned long pgidx; - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (pgoff != pgidx) { - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } + err = install_file_pte(mm, vma, addr, + pgoff, prot, linear); + if (err) + return err; } len -= PAGE_SIZE; _