From: Ingo Molnar - enable the 'prot' parameter for shared-writable mappings (the ones which are the primary target for fremap), without breaking up the vma. - added MAP_INHERIT: this both cleanly implements the old sys_remap_file_pages() semantics, and might be useful as well. it adds a new syscall because the old sys_remap_file_pages() syscall was messed up in an unfixable way :-( [prot == 0 means PROT_NONE and now we need this.] But the patch is ABI-compatible, it keeps the old syscall and wraps it cleanly internally. patch applies, compiles & boots cleanly on SMP x86 and ppc, it compiles cleanly on x86_64. Other architectures wont compile, they need to do this: - add MAP_INHERIT - add pte_to_pgprot() - pgoff_to_pte() is replaced by pgoff_prot_to_pte() - chop a bit off __swp_type() if there's no more space. - add the new sys_remap_file_pages() syscall slot - rename the old sys_remap_file_pages() syscall slot to old_remap_file_pages() it should be quite straightforward for them, but needs to be tested. other changes: - generate SIGSEGV on PROT_NONE mappings - instead of an infinite pagefault loop. Note to non-x86 arch maintainers: do_page_fault() now needs to handle a VM_FAULT_SIGSEGV return value from handle_pte_fault(), and the pte_to_pgprot() macro has to be added. - fixed the 'else' branch in both filemap_populate and shmem_populate. We cannot just skip setting the file-pte if the new mapping is linear - a nonlinear entry can be linear just by chance as well. So we must overwrite the pte in every case. I've pushed one linear/nonlinear optimization into install_file_pte: if the pte is empty _and_ the mapping is linear then we can leave the pte alone. - fixed MAP_POPULATE: since 'prot' is nonzero in all the interesting do_mmap() situations, the old version of sys_remap_file_pages() just punted on it. - minor detail: filemap_getpage(nonblock) case now returns NULL if a page is present but not uptodate. (because in the !uptodate case we start a wait which is contrary to nonblock.) DESC remap_file_pages error retun fix EDESC From: Hugh Dickins sys_remap_file_pages currently liable to return inappropriate -EPERM: restore the -EINVAL we prepared earlier. --- 25-akpm/arch/i386/kernel/entry.S | 3 25-akpm/arch/i386/mm/fault.c | 2 25-akpm/arch/x86_64/ia32/ia32entry.S | 2 25-akpm/include/asm-i386/mman.h | 1 25-akpm/include/asm-i386/pgtable-2level.h | 15 ++- 25-akpm/include/asm-i386/pgtable-3level.h | 11 ++ 25-akpm/include/asm-i386/unistd.h | 5 - 25-akpm/include/asm-ppc/mman.h | 1 25-akpm/include/asm-ppc/pgtable.h | 15 ++- 25-akpm/include/asm-x86_64/ia32_unistd.h | 2 25-akpm/include/asm-x86_64/mman.h | 1 25-akpm/include/asm-x86_64/pgtable.h | 12 ++ 25-akpm/include/asm-x86_64/unistd.h | 4 25-akpm/include/linux/mm.h | 12 +- 25-akpm/include/linux/syscalls.h | 5 - 25-akpm/mm/filemap.c | 34 ++++--- 25-akpm/mm/fremap.c | 137 ++++++++++++++++++++---------- 25-akpm/mm/memory.c | 13 ++ 25-akpm/mm/mmap.c | 8 + 25-akpm/mm/rmap.c | 7 + 25-akpm/mm/shmem.c | 19 +--- 21 files changed, 212 insertions(+), 97 deletions(-) diff -puN arch/i386/kernel/entry.S~remap-file-pages-prot-2.6.4-rc1-mm1-A1 arch/i386/kernel/entry.S --- 25/arch/i386/kernel/entry.S~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.181333864 -0800 +++ 25-akpm/arch/i386/kernel/entry.S 2004-04-01 01:03:57.214328848 -0800 @@ -891,7 +891,7 @@ ENTRY(sys_call_table) .long sys_epoll_create .long sys_epoll_ctl /* 255 */ .long sys_epoll_wait - .long sys_remap_file_pages + .long old_remap_file_pages .long sys_set_tid_address .long sys_timer_create .long sys_timer_settime /* 260 */ @@ -914,5 +914,6 @@ ENTRY(sys_call_table) .long sys_mq_timedreceive .long sys_mq_notify .long sys_mq_getsetattr + .long sys_remap_file_pages /* 280 */ syscall_table_size=(.-sys_call_table) diff -puN arch/i386/mm/fault.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 arch/i386/mm/fault.c --- 25/arch/i386/mm/fault.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.183333560 -0800 +++ 25-akpm/arch/i386/mm/fault.c 2004-04-01 01:03:57.215328696 -0800 @@ -326,6 +326,8 @@ good_area: goto do_sigbus; case VM_FAULT_OOM: goto out_of_memory; + case VM_FAULT_SIGSEGV: + goto bad_area; default: BUG(); } diff -puN arch/x86_64/ia32/ia32entry.S~remap-file-pages-prot-2.6.4-rc1-mm1-A1 arch/x86_64/ia32/ia32entry.S --- 25/arch/x86_64/ia32/ia32entry.S~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.184333408 -0800 +++ 25-akpm/arch/x86_64/ia32/ia32entry.S 2004-04-01 01:03:57.215328696 -0800 @@ -562,7 +562,7 @@ ia32_sys_call_table: .quad sys_epoll_create .quad sys_epoll_ctl .quad sys_epoll_wait - .quad sys_remap_file_pages + .quad old_remap_file_pages .quad sys_set_tid_address .quad sys32_timer_create .quad compat_timer_settime diff -puN include/asm-i386/mman.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-i386/mman.h --- 25/include/asm-i386/mman.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.186333104 -0800 +++ 25-akpm/include/asm-i386/mman.h 2004-04-01 01:03:57.216328544 -0800 @@ -22,6 +22,7 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_INHERIT 0x20000 /* inherit the protection bits of the underlying vma*/ #define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_INVALIDATE 2 /* invalidate the caches */ diff -puN include/asm-i386/pgtable-2level.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-i386/pgtable-2level.h --- 25/include/asm-i386/pgtable-2level.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.187332952 -0800 +++ 25-akpm/include/asm-i386/pgtable-2level.h 2004-04-01 01:03:57.216328544 -0800 @@ -64,15 +64,20 @@ static inline pmd_t * pmd_offset(pgd_t * #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) /* - * Bits 0, 6 and 7 are taken, split up the 29 bits of offset + * Bits 0, 1, 6 and 7 are taken, split up the 28 bits of offset * into this range: */ -#define PTE_FILE_MAX_BITS 29 +#define PTE_FILE_MAX_BITS 28 #define pte_to_pgoff(pte) \ - ((((pte).pte_low >> 1) & 0x1f ) + (((pte).pte_low >> 8) << 5 )) + ((((pte).pte_low >> 2) & 0xf ) + (((pte).pte_low >> 8) << 4 )) +#define pte_to_pgprot(pte) \ + __pgprot(((pte).pte_low & (_PAGE_RW | _PAGE_PROTNONE)) \ + | (((pte).pte_low & _PAGE_PROTNONE) ? 0 : \ + (_PAGE_USER | _PAGE_PRESENT)) | _PAGE_ACCESSED) -#define pgoff_to_pte(off) \ - ((pte_t) { (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE }) +#define pgoff_prot_to_pte(off, prot) \ + ((pte_t) { (((off) & 0xf) << 2) + (((off) >> 4) << 8) + \ + (pgprot_val(prot) & (_PAGE_RW | _PAGE_PROTNONE)) + _PAGE_FILE }) #endif /* _I386_PGTABLE_2LEVEL_H */ diff -puN include/asm-i386/pgtable-3level.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-i386/pgtable-3level.h --- 25/include/asm-i386/pgtable-3level.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.188332800 -0800 +++ 25-akpm/include/asm-i386/pgtable-3level.h 2004-04-01 01:03:57.217328392 -0800 @@ -120,7 +120,16 @@ static inline pmd_t pfn_pmd(unsigned lon * put the 32 bits of offset into the high part. */ #define pte_to_pgoff(pte) ((pte).pte_high) -#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) + +#define pte_to_pgprot(pte) \ + __pgprot(((pte).pte_low & (_PAGE_RW | _PAGE_PROTNONE)) \ + | (((pte).pte_low & _PAGE_PROTNONE) ? 0 : \ + (_PAGE_USER | _PAGE_PRESENT)) | _PAGE_ACCESSED) + +#define pgoff_prot_to_pte(off, prot) \ + ((pte_t) { _PAGE_FILE + \ + (pgprot_val(prot) & (_PAGE_RW | _PAGE_PROTNONE)) , (off) }) + #define PTE_FILE_MAX_BITS 32 extern struct kmem_cache_s *pae_pgd_cachep; diff -puN include/asm-i386/unistd.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-i386/unistd.h --- 25/include/asm-i386/unistd.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.190332496 -0800 +++ 25-akpm/include/asm-i386/unistd.h 2004-04-01 01:03:57.217328392 -0800 @@ -262,7 +262,7 @@ #define __NR_epoll_create 254 #define __NR_epoll_ctl 255 #define __NR_epoll_wait 256 -#define __NR_remap_file_pages 257 +#define __NR_old_remap_file_pages 257 #define __NR_set_tid_address 258 #define __NR_timer_create 259 #define __NR_timer_settime (__NR_timer_create+1) @@ -285,8 +285,9 @@ #define __NR_mq_timedreceive (__NR_mq_open+3) #define __NR_mq_notify (__NR_mq_open+4) #define __NR_mq_getsetattr (__NR_mq_open+5) +#define __NR_remap_file_pages 280 -#define NR_syscalls 280 +#define NR_syscalls 281 /* user-visible error numbers are in the range -1 - -124: see */ diff -puN include/asm-ppc/mman.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-ppc/mman.h --- 25/include/asm-ppc/mman.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.191332344 -0800 +++ 25-akpm/include/asm-ppc/mman.h 2004-04-01 01:03:57.218328240 -0800 @@ -23,6 +23,7 @@ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_INHERIT 0x20000 /* inherit prot of underlying vma */ #define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_INVALIDATE 2 /* invalidate the caches */ diff -puN include/asm-ppc/pgtable.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-ppc/pgtable.h --- 25/include/asm-ppc/pgtable.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.192332192 -0800 +++ 25-akpm/include/asm-ppc/pgtable.h 2004-04-01 01:03:57.218328240 -0800 @@ -264,8 +264,8 @@ extern unsigned long ioremap_bot, iorema /* Definitions for 60x, 740/750, etc. */ #define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ #define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */ #define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_FILE 0x008 /* when !present: nonlinear file mapping */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -628,9 +628,16 @@ extern void add_hash_page(unsigned conte #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) /* Encode and decode a nonlinear file mapping entry */ -#define PTE_FILE_MAX_BITS 29 -#define pte_to_pgoff(pte) (pte_val(pte) >> 3) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE }) +#define PTE_FILE_MAX_BITS 27 +#define pte_to_pgoff(pte) (((pte_val(pte) & ~0x7ff) >> 5) \ + | ((pte_val(pte) & 0x3f0) >> 4)) +#define pte_to_pgprot(pte) \ +__pgprot((pte_val(pte) & (_PAGE_USER|_PAGE_RW|_PAGE_PRESENT)) | _PAGE_ACCESSED) + +#define pgoff_prot_to_pte(off, prot) \ + ((pte_t) { (((off) << 5) & ~0x7ff) | (((off) << 4) & 0x3f0) \ + | (pgprot_val(prot) & (_PAGE_USER|_PAGE_RW)) \ + | _PAGE_FILE }) /* CONFIG_APUS */ /* For virtual address to physical address conversion */ diff -puN include/asm-x86_64/ia32_unistd.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-x86_64/ia32_unistd.h --- 25/include/asm-x86_64/ia32_unistd.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.194331888 -0800 +++ 25-akpm/include/asm-x86_64/ia32_unistd.h 2004-04-01 01:03:57.219328088 -0800 @@ -262,7 +262,7 @@ #define __NR_ia32_sys_epoll_create 254 #define __NR_ia32_sys_epoll_ctl 255 #define __NR_ia32_sys_epoll_wait 256 -#define __NR_ia32_remap_file_pages 257 +#define __NR_ia32_old_remap_file_pages 257 #define __NR_ia32_set_tid_address 258 #define __NR_ia32_timer_create 259 #define __NR_ia32_timer_settime (__NR_ia32_timer_create+1) diff -puN include/asm-x86_64/mman.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-x86_64/mman.h --- 25/include/asm-x86_64/mman.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.195331736 -0800 +++ 25-akpm/include/asm-x86_64/mman.h 2004-04-01 01:03:57.219328088 -0800 @@ -23,6 +23,7 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_INHERIT 0x20000 /* inherit the protection bits of the underlying vma*/ #define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_INVALIDATE 2 /* invalidate the caches */ diff -puN include/asm-x86_64/pgtable.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-x86_64/pgtable.h --- 25/include/asm-x86_64/pgtable.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.196331584 -0800 +++ 25-akpm/include/asm-x86_64/pgtable.h 2004-04-01 01:03:57.220327936 -0800 @@ -344,9 +344,19 @@ static inline pgd_t *current_pgd_offset_ #define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) -#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT +#define pte_to_pgprot(pte) \ + __pgprot((pte_val(pte) & (_PAGE_RW | _PAGE_PROTNONE)) \ + | ((pte_val(pte) & _PAGE_PROTNONE) ? 0 : \ + (_PAGE_USER | _PAGE_PRESENT)) | _PAGE_ACCESSED) + +#define pgoff_prot_to_pte(off, prot) \ + ((pte_t) { _PAGE_FILE + \ + (pgprot_val(prot) & (_PAGE_RW | _PAGE_PROTNONE)) + \ + ((off) << PAGE_SHIFT) }) + + /* PTE - Level 1 access. */ /* page, protection -> pte */ diff -puN include/asm-x86_64/unistd.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/asm-x86_64/unistd.h --- 25/include/asm-x86_64/unistd.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.198331280 -0800 +++ 25-akpm/include/asm-x86_64/unistd.h 2004-04-01 01:03:57.221327784 -0800 @@ -490,8 +490,8 @@ __SYSCALL(__NR_epoll_create, sys_epoll_c __SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall) #define __NR_epoll_wait_old 215 __SYSCALL(__NR_epoll_wait_old, sys_ni_syscall) -#define __NR_remap_file_pages 216 -__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) +#define __NR_old_remap_file_pages 216 +__SYSCALL(__NR_old_remap_file_pages, old_remap_file_pages) #define __NR_getdents64 217 __SYSCALL(__NR_getdents64, sys_getdents64) #define __NR_set_tid_address 218 diff -puN include/linux/mm.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/linux/mm.h --- 25/include/linux/mm.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.200330976 -0800 +++ 25-akpm/include/linux/mm.h 2004-04-01 01:03:57.222327632 -0800 @@ -423,10 +423,11 @@ static inline int page_mapped(struct pag * Used to decide whether a process gets delivered SIGBUS or * just gets major/minor fault counters bumped up. */ -#define VM_FAULT_OOM (-1) -#define VM_FAULT_SIGBUS 0 -#define VM_FAULT_MINOR 1 -#define VM_FAULT_MAJOR 2 +#define VM_FAULT_OOM (-1) +#define VM_FAULT_SIGBUS 0 +#define VM_FAULT_MINOR 1 +#define VM_FAULT_MAJOR 2 +#define VM_FAULT_SIGSEGV 3 #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) @@ -459,9 +460,10 @@ extern pmd_t *FASTCALL(__pmd_alloc(struc extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); -extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); +extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot, int linear); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); +extern long __remap_file_pages(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); void put_dirty_page(struct task_struct *tsk, struct page *page, unsigned long address, pgprot_t prot); diff -puN include/linux/syscalls.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 include/linux/syscalls.h --- 25/include/linux/syscalls.h~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.201330824 -0800 +++ 25-akpm/include/linux/syscalls.h 2004-04-01 01:03:57.223327480 -0800 @@ -254,7 +254,10 @@ asmlinkage long sys_mprotect(unsigned lo asmlinkage unsigned long sys_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); -long sys_remap_file_pages(unsigned long start, unsigned long size, +asmlinkage long old_remap_file_pages(unsigned long start, unsigned long size, + unsigned long __prot, unsigned long pgoff, + unsigned long flags); +asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); asmlinkage long sys_msync(unsigned long start, size_t len, int flags); diff -puN mm/filemap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 mm/filemap.c --- 25/mm/filemap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.203330520 -0800 +++ 25-akpm/mm/filemap.c 2004-04-01 01:03:57.224327328 -0800 @@ -39,6 +39,7 @@ #include #include +#include /* * Shared mappings implemented 30.11.1994. It's not fully working yet, @@ -1237,8 +1238,13 @@ retry_find: * Ok, found a page in the page cache, now we need to check * that it's up-to-date. */ - if (!PageUptodate(page)) + if (!PageUptodate(page)) { + if (nonblock) { + page_cache_release(page); + return NULL; + } goto page_not_uptodate; + } success: /* @@ -1331,12 +1337,22 @@ static int filemap_populate(struct vm_ar { struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; + int linear = !(vma->vm_flags & VM_NONLINEAR); struct inode *inode = mapping->host; unsigned long size; struct mm_struct *mm = vma->vm_mm; struct page *page; int err; + /* + * mapping-removal fastpath: + */ + if ((vma->vm_flags & VM_SHARED) && + (pgprot_val(prot) == pgprot_val(PAGE_NONE))) { + zap_page_range(vma, addr, len); + return 0; + } + if (!nonblock) force_page_cache_readahead(mapping, vma->vm_file, pgoff, len >> PAGE_CACHE_SHIFT); @@ -1356,19 +1372,9 @@ repeat: return err; } } else { - /* - * If a nonlinear mapping then store the file page offset - * in the pte. - */ - unsigned long pgidx; - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (pgoff != pgidx) { - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } + err = install_file_pte(mm, vma, addr, pgoff, prot, linear); + if (err) + return err; } len -= PAGE_SIZE; diff -puN mm/fremap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 mm/fremap.c --- 25/mm/fremap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.204330368 -0800 +++ 25-akpm/mm/fremap.c 2004-04-01 01:04:09.422472928 -0800 @@ -53,7 +53,7 @@ static inline void zap_pte(struct mm_str * previously existing mapping. */ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, struct page *page, pgprot_t prot) + unsigned long addr, struct page *page, pgprot_t pgprot) { int err = -ENOMEM; pte_t *pte; @@ -76,11 +76,19 @@ int install_page(struct mm_struct *mm, s if (!pte) goto err_unlock; + /* + * Only install a new page for a non-shared mapping if it's + * not existent yet: + */ + err = -EEXIST; + if (!pte_none(*pte) && !(vma->vm_flags & VM_SHARED)) + goto err_unlock; + zap_pte(mm, vma, addr, pte); mm->rss++; flush_icache_page(vma, page); - set_pte(pte, mk_pte(page, prot)); + set_pte(pte, mk_pte(page, pgprot)); pte_chain = page_add_rmap(page, pte, pte_chain); pte_val = *pte; pte_unmap(pte); @@ -103,7 +111,7 @@ EXPORT_SYMBOL(install_page); * previously existing mapping. */ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long pgoff, pgprot_t prot) + unsigned long addr, unsigned long pgoff, pgprot_t pgprot, int linear) { int err = -ENOMEM; pte_t *pte; @@ -111,6 +119,8 @@ int install_file_pte(struct mm_struct *m pmd_t *pmd; pte_t pte_val; + BUG_ON(!linear && !(vma->vm_flags & VM_SHARED)); + pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); @@ -121,10 +131,23 @@ int install_file_pte(struct mm_struct *m pte = pte_alloc_map(mm, pmd, addr); if (!pte) goto err_unlock; + /* + * Skip linear non-existent ptes: + */ + err = 0; + if (linear && pte_none(*pte)) + goto err_unlock; + /* + * Only install a new page for a non-shared mapping if it's + * not existent yet: + */ + err = -EEXIST; + if (!pte_none(*pte) && !(vma->vm_flags & VM_SHARED)) + goto err_unlock; zap_pte(mm, vma, addr, pte); - set_pte(pte, pgoff_to_pte(pgoff)); + set_pte(pte, pgoff_prot_to_pte(pgoff, pgprot)); pte_val = *pte; pte_unmap(pte); update_mmu_cache(vma, addr, pte_val); @@ -144,27 +167,22 @@ err_unlock: * @size: size of the remapped virtual memory range * @prot: new protection bits of the range * @pgoff: to be mapped page of the backing store file - * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. + * @flags: bits MAP_INHERIT or MAP_NONBLOCKED - the later will cause no IO. * * this syscall works purely via pagetables, so it's the most efficient * way to map the same (large) file into a given virtual window. Unlike * mmap()/mremap() it does not create any new vmas. The new mappings are * also safe across swapout. - * - * NOTE: the 'prot' parameter right now is ignored, and the vma's default - * protection is used. Arbitrary protections might be implemented in the - * future. */ -asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long __prot, unsigned long pgoff, unsigned long flags) +long __remap_file_pages(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long prot, + unsigned long pgoff, unsigned long flags) { - struct mm_struct *mm = current->mm; + pgprot_t pgprot = protection_map[calc_vm_prot_bits(prot) | VM_SHARED]; unsigned long end = start + size; struct vm_area_struct *vma; int err = -EINVAL; - if (__prot) - return err; /* * Sanitize the syscall parameters: */ @@ -184,37 +202,72 @@ asmlinkage long sys_remap_file_pages(uns /* We need down_write() to change vma->vm_flags. */ down_write(&mm->mmap_sem); vma = find_vma(mm, start); - /* - * Make sure the vma is shared, that it supports prefaulting, - * and that the remapped range is valid and fully within - * the single existing vma: - */ - if (vma && (vma->vm_flags & VM_SHARED) && - vma->vm_ops && vma->vm_ops->populate && - end > start && start >= vma->vm_start && - end <= vma->vm_end) { - - /* Must set VM_NONLINEAR before any pages are populated. */ - if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff) - vma->vm_flags |= VM_NONLINEAR; - - /* ->populate can take a long time, so downgrade the lock. */ - downgrade_write(&mm->mmap_sem); - err = vma->vm_ops->populate(vma, start, size, - vma->vm_page_prot, - pgoff, flags & MAP_NONBLOCK); - - /* - * We can't clear VM_NONLINEAR because we'd have to do - * it after ->populate completes, and that would prevent - * downgrading the lock. (Locks can't be upgraded). - */ - up_read(&mm->mmap_sem); - } else { - up_write(&mm->mmap_sem); + * Make sure the permissions are right, the vma is shared + * (or linearly remapped - ie. prefaulted), that it supports + * prefaulting, and that the remapped range is valid and fully + * within the single existing vma: + */ + if (!vma) + goto out_unlock; + if (unlikely(flags & MAP_INHERIT)) + pgprot = vma->vm_page_prot; + else { + err = -EPERM; + if (((prot & PROT_READ) && !(vma->vm_flags & VM_MAYREAD))) + goto out_unlock; + if (((prot & PROT_WRITE) && !(vma->vm_flags & VM_MAYWRITE))) + goto out_unlock; + if (((prot & PROT_EXEC) && !(vma->vm_flags & VM_MAYEXEC))) + goto out_unlock; + err = -EINVAL; + } + + if (!vma->vm_ops || !vma->vm_ops->populate || end <= start || + start < vma->vm_start || end > vma->vm_end) + goto out_unlock; + + if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff) { + if (!(vma->vm_flags & VM_SHARED)) + goto out_unlock; + vma->vm_flags |= VM_NONLINEAR; } + /* + * ->populate can take a long time, so downgrade the lock: + */ + downgrade_write(&mm->mmap_sem); + err = vma->vm_ops->populate(vma, start, size, + pgprot, pgoff, flags & MAP_NONBLOCK); + + /* + * We can't clear VM_NONLINEAR because we'd have to do + * it after ->populate completes, and that would prevent + * downgrading the lock. (Locks can't be upgraded). + */ + up_read(&mm->mmap_sem); + return err; + +out_unlock: + up_write(&mm->mmap_sem); return err; } +asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, + unsigned long prot, unsigned long pgoff, unsigned long flags) +{ + return __remap_file_pages(current->mm, start, size, prot, pgoff, flags); +} + +/* + * sys_remap_file_pages - the old API. Implies MAP_INHERIT. + */ +asmlinkage long old_remap_file_pages(unsigned long start, unsigned long size, + unsigned long __prot, unsigned long pgoff, unsigned long flags) +{ + if (__prot) + return -EINVAL; + + return __remap_file_pages(current->mm, start, size, PROT_NONE, + pgoff, flags | MAP_INHERIT); +} diff -puN mm/memory.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 mm/memory.c --- 25/mm/memory.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.206330064 -0800 +++ 25-akpm/mm/memory.c 2004-04-01 01:03:57.227326872 -0800 @@ -1534,6 +1534,7 @@ static int do_file_page(struct mm_struct unsigned long address, int write_access, pte_t *pte, pmd_t *pmd) { unsigned long pgoff; + pgprot_t pgprot; int err; BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage); @@ -1548,11 +1549,12 @@ static int do_file_page(struct mm_struct } pgoff = pte_to_pgoff(*pte); + pgprot = pte_to_pgprot(*pte); pte_unmap(pte); spin_unlock(&mm->page_table_lock); - err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0); + err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, pgprot, pgoff, 0); if (err == -ENOMEM) return VM_FAULT_OOM; if (err) @@ -1601,6 +1603,15 @@ static inline int handle_pte_fault(struc return do_swap_page(mm, vma, address, pte, pmd, entry, write_access); } + /* + * Generate a SIGSEGV if a PROT_NONE page is accessed: + */ + if (pgprot_val(pte_to_pgprot(entry)) == pgprot_val(__P000)) { + pte_unmap(pte); + spin_unlock(&mm->page_table_lock); + return VM_FAULT_SIGSEGV; + } + if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, pmd, entry); diff -puN mm/mmap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 mm/mmap.c --- 25/mm/mmap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.207329912 -0800 +++ 25-akpm/mm/mmap.c 2004-04-01 01:03:57.229326568 -0800 @@ -715,8 +715,12 @@ out: } if (flags & MAP_POPULATE) { up_write(&mm->mmap_sem); - sys_remap_file_pages(addr, len, prot, - pgoff, flags & MAP_NONBLOCK); + /* + * remap_file_pages() works even if the mapping is private, + * in the linearly-mapped case: + */ + __remap_file_pages(mm, addr, len, PROT_NONE, pgoff, + MAP_INHERIT | (flags & MAP_NONBLOCK)); down_write(&mm->mmap_sem); } return addr; diff -puN mm/rmap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 mm/rmap.c --- 25/mm/rmap.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.209329608 -0800 +++ 25-akpm/mm/rmap.c 2004-04-01 01:03:57.230326416 -0800 @@ -343,6 +343,7 @@ static int fastcall try_to_unmap_one(str BUG_ON(pte_file(*ptep)); } else { unsigned long pgidx; + pgprot_t pgprot = pte_to_pgprot(pte); /* * If a nonlinear mapping then store the file page offset * in the pte. @@ -350,8 +351,10 @@ static int fastcall try_to_unmap_one(str pgidx = (address - vma->vm_start) >> PAGE_SHIFT; pgidx += vma->vm_pgoff; pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (page->index != pgidx) { - set_pte(ptep, pgoff_to_pte(page->index)); + if (page->index != pgidx || + pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot)) { + + set_pte(ptep, pgoff_prot_to_pte(page->index, pgprot)); BUG_ON(!pte_file(*ptep)); } } diff -puN mm/shmem.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 mm/shmem.c --- 25/mm/shmem.c~remap-file-pages-prot-2.6.4-rc1-mm1-A1 2004-04-01 01:03:57.210329456 -0800 +++ 25-akpm/mm/shmem.c 2004-04-01 01:03:57.231326264 -0800 @@ -1029,6 +1029,7 @@ static int shmem_populate(struct vm_area struct mm_struct *mm = vma->vm_mm; enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; unsigned long size; + int linear = !(vma->vm_flags & VM_NONLINEAR); size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) @@ -1050,20 +1051,14 @@ static int shmem_populate(struct vm_area page_cache_release(page); return err; } - } else if (nonblock) { + } else { /* - * If a nonlinear mapping then store the file page - * offset in the pte. + * Store the file page offset in the pte: */ - unsigned long pgidx; - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (pgoff != pgidx) { - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } + err = install_file_pte(mm, vma, addr, + pgoff, prot, linear); + if (err) + return err; } len -= PAGE_SIZE; _