From: David Howells The attached patch applies some further fixes and extensions to the nommu mmap implementation: (1) /proc/maps distinguishes shareable private mappings and real shared mappings by marking the former with 's' and the latter with 'S'. (2) Rearrange and optimise the checking portion of do_mmap_pgoff() to make it easier to follow. (3) Only set VM_SHARED on MAP_SHARED mappings. Its presence indicates that the backing memory is supplied by the underlying file or chardev. VM_MAYSHARE indicates that a VMA may be shared if it's a private VMA. The memory for a private VMA is allocated by do_mmap_pgoff() from a kmalloc slab and then the file contents are read into it before returning. (4) Permit MAP_SHARED + PROT_WRITE on memory-backed files[*] and chardevs to indicate a contiguous area of memory when its get_unmapped_area() is called if the backing fs/chardev is willing. [*] file->f_mapping->backing_dev_info->memory_backed == 1 (5) Require chardevs and files that support to provide a get_unmapped_area() file operation. (6) Made sure a private mapping of /dev/zero is possible. Shared mappings of /dev/zero are not currently supported because this'd need greater interaction of mmap with the chardev driver than is currently supported. (7) Add in some extra checks from mm/mmap.c: security, file having write access for a writable shared mapping, file not being in append mode. (8) Only account the mapping memory if it's allocated here; memory belonging to a shared chardev or file is not accounted. With this patch it should be possible to map contiguous flash files directly out of ROM simply by providing get_unmapped_area() for a read-only/shared mapping. I think that it might be worth splitting do_mmap_pgoff() up into smaller subfunctions: one to handle the checking, one to handle shared mappings and one to handle private mappings. Signed-Off-By: David Howells Signed-off-by: Andrew Morton --- 25-akpm/fs/proc/nommu.c | 2 25-akpm/mm/nommu.c | 182 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 124 insertions(+), 60 deletions(-) diff -puN fs/proc/nommu.c~permit-nommu-map_shared-of-memory-backed-files fs/proc/nommu.c --- 25/fs/proc/nommu.c~permit-nommu-map_shared-of-memory-backed-files Thu Dec 16 15:29:11 2004 +++ 25-akpm/fs/proc/nommu.c Thu Dec 16 15:29:11 2004 @@ -62,7 +62,7 @@ static int nommu_vma_list_show(struct se flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', - flags & VM_MAYSHARE ? 's' : 'p', + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', vma->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); diff -puN mm/nommu.c~permit-nommu-map_shared-of-memory-backed-files mm/nommu.c --- 25/mm/nommu.c~permit-nommu-map_shared-of-memory-backed-files Thu Dec 16 15:29:11 2004 +++ 25-akpm/mm/nommu.c Thu Dec 16 15:29:11 2004 @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include #include @@ -372,31 +375,15 @@ unsigned long do_mmap_pgoff(struct file struct rb_node *rb; unsigned int vm_flags; void *result; - int ret, chrdev; - - /* - * Get the !CONFIG_MMU specific checks done first - */ - chrdev = 0; - if (file) - chrdev = S_ISCHR(file->f_dentry->d_inode->i_mode); - - if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && file && !chrdev) { - printk("MAP_SHARED not completely supported (cannot detect page dirtying)\n"); - return -EINVAL; - } + int ret, membacked; + /* do the simple checks first */ if (flags & MAP_FIXED || addr) { - /* printk("can't do fixed-address/overlay mmap of RAM\n"); */ + printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n", + current->pid); return -EINVAL; } - /* - * now all the standard checks - */ - if (file && (!file->f_op || !file->f_op->mmap)) - return -ENODEV; - if (PAGE_ALIGN(len) == 0) return addr; @@ -407,55 +394,129 @@ unsigned long do_mmap_pgoff(struct file if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) return -EINVAL; - /* we're going to need to record the mapping if it works */ - vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL); - if (!vml) - goto error_getting_vml; - memset(vml, 0, sizeof(*vml)); + /* validate file mapping requests */ + membacked = 0; + if (file) { + /* files must support mmap */ + if (!file->f_op || !file->f_op->mmap) + return -ENODEV; + + if ((prot & PROT_EXEC) && + (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)) + return -EPERM; + + /* work out if what we've got could possibly be shared + * - we support chardevs that provide their own "memory" + * - we support files/blockdevs that are memory backed + */ + if (S_ISCHR(file->f_dentry->d_inode->i_mode)) { + membacked = 1; + } + else { + struct address_space *mapping = file->f_mapping; + if (!mapping) + mapping = file->f_dentry->d_inode->i_mapping; + if (mapping && mapping->backing_dev_info) + membacked = mapping->backing_dev_info->memory_backed; + } + + if (flags & MAP_SHARED) { + /* do checks for writing, appending and locking */ + if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE)) + return -EACCES; + + if (IS_APPEND(file->f_dentry->d_inode) && + (file->f_mode & FMODE_WRITE)) + return -EACCES; + + if (locks_verify_locked(file->f_dentry->d_inode)) + return -EAGAIN; + + if (!membacked) { + printk("MAP_SHARED not completely supported on !MMU\n"); + return -EINVAL; + } - /* Do simple checking here so the lower-level routines won't have + /* we require greater support from the driver or + * filesystem - we ask it to tell us what memory to + * use */ + if (!file->f_op->get_unmapped_area) + return -ENODEV; + } + else { + /* we read private files into memory we allocate */ + if (!file->f_op->read) + return -ENODEV; + } + } + + /* handle PROT_EXEC implication by PROT_READ */ + if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) + if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))) + prot |= PROT_EXEC; + + /* do simple checking here so the lower-level routines won't have * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */ | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - if (!chrdev) { + if (!membacked) { /* share any file segment that's mapped read-only */ if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) || ((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file)) - vm_flags |= VM_SHARED | VM_MAYSHARE; + vm_flags |= VM_MAYSHARE; /* refuse to let anyone share files with this process if it's being traced - * otherwise breakpoints set in it may interfere with another untraced process */ - if (!chrdev && current->ptrace & PT_PTRACED) + if (current->ptrace & PT_PTRACED) vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } else { - /* permit sharing of character devices at any time */ - vm_flags |= VM_MAYSHARE; - if (flags & MAP_SHARED) - vm_flags |= VM_SHARED; + /* permit sharing of character devices and ramfs files at any time for + * anything other than a privately writable mapping + */ + if (!(flags & MAP_PRIVATE) || !(prot & PROT_WRITE)) { + vm_flags |= VM_MAYSHARE; + if (flags & MAP_SHARED) + vm_flags |= VM_SHARED; + } } - /* if we want to share, we need to search for VMAs created by another mmap() call that - * overlap with our proposed mapping - * - we can only share with an exact match on regular files - * - shared mappings on character devices are permitted to overlap inexactly as far as we - * are concerned, but in that case, sharing is handled in the driver rather than here - */ + /* allow the security API to have its say */ + ret = security_file_mmap(file, prot, flags); + if (ret) + return ret; + + /* we're going to need to record the mapping if it works */ + vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL); + if (!vml) + goto error_getting_vml; + memset(vml, 0, sizeof(*vml)); + down_write(&nommu_vma_sem); - if (!chrdev && vm_flags & VM_SHARED) { + + /* if we want to share, we need to search for VMAs created by another + * mmap() call that overlap with our proposed mapping + * - we can only share with an exact match on most regular files + * - shared mappings on character devices and memory backed files are + * permitted to overlap inexactly as far as we are concerned for in + * these cases, sharing is handled in the driver or filesystem rather + * than here + */ + if (vm_flags & VM_MAYSHARE) { unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long vmpglen; for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { vma = rb_entry(rb, struct vm_area_struct, vm_rb); - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) continue; + /* search for overlapping mappings on the same file */ if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode) continue; @@ -466,8 +527,9 @@ unsigned long do_mmap_pgoff(struct file if (pgoff >= vma->vm_pgoff + vmpglen) continue; + /* handle inexact matches between mappings */ if (vmpglen != pglen || vma->vm_pgoff != pgoff) { - if (flags & MAP_SHARED) + if (!membacked) goto sharing_violation; continue; } @@ -481,11 +543,13 @@ unsigned long do_mmap_pgoff(struct file } } + vma = NULL; + /* obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space * - this is the hook for quasi-memory character devices */ - if (file && file->f_op && file->f_op->get_unmapped_area) + if (file && file->f_op->get_unmapped_area) addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR((void *) addr)) { @@ -511,18 +575,12 @@ unsigned long do_mmap_pgoff(struct file vml->vma = vma; - /* - * determine the object being mapped and call the appropriate - * specific mapper. + /* determine the object being mapped and call the appropriate specific + * mapper. */ if (file) { - ret = -ENODEV; - if (!file->f_op) - goto error; - #ifdef MAGIC_ROM_PTR /* First, try simpler routine designed to give us a ROM pointer. */ - if (file->f_op->romptr && !(prot & PROT_WRITE)) { ret = file->f_op->romptr(file, vma); #ifdef DEBUG @@ -536,9 +594,9 @@ unsigned long do_mmap_pgoff(struct file goto error; } else #endif /* MAGIC_ROM_PTR */ - /* Then try full mmap routine, which might return a RAM pointer, - or do something truly complicated. */ - + /* Then try full mmap routine, which might return a RAM + * pointer, or do something truly complicated + */ if (file->f_op->mmap) { ret = file->f_op->mmap(file, vma); @@ -556,11 +614,15 @@ unsigned long do_mmap_pgoff(struct file goto error; } - /* An ENOSYS error indicates that mmap isn't possible (as opposed to - tried but failed) so we'll fall through to the copy. */ + /* An ENOSYS error indicates that mmap isn't possible (as + * opposed to tried but failed) so we'll fall through to the + * copy. */ } - /* allocate some memory to hold the mapping */ + /* allocate some memory to hold the mapping + * - note that this may not return a page-aligned address if the object + * we're allocating is smaller than a page + */ ret = -ENOMEM; result = kmalloc(len, GFP_KERNEL); if (!result) { @@ -602,8 +664,10 @@ unsigned long do_mmap_pgoff(struct file flush_icache_range((unsigned long) result, (unsigned long) result + len); done: - realalloc += kobjsize(result); - askedalloc += len; + if (!(vma->vm_flags & VM_SHARED)) { + realalloc += kobjsize(result); + askedalloc += len; + } realalloc += kobjsize(vma); askedalloc += sizeof(*vma); _