From: Ingo Molnar i've attached prefault-2.6.0-A0, which: - enables prefaulting/populating of private-shared (including shared-readonly) mappings. This is the main mapping method of ld.so. The cached startup time of larger GUI apps on a P4 box got reduced by 3-4%. this patch is ontop of remap-file-pages-prot-2.6.0-H1. i've test-booted this patch on x86 SMP and UP, it works fine for a wide range of workloads. 25-akpm/mm/filemap.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++ 25-akpm/mm/memory.c | 18 ++++++++ 2 files changed, 127 insertions(+) diff -puN mm/filemap.c~prefault-2.6.0-A0 mm/filemap.c --- 25/mm/filemap.c~prefault-2.6.0-A0 Mon Jan 5 14:27:14 2004 +++ 25-akpm/mm/filemap.c Mon Jan 5 14:27:14 2004 @@ -27,6 +27,7 @@ #include #include #include +#include /* * This is needed for the following functions: * - try_to_release_page @@ -1296,6 +1297,111 @@ err: return NULL; } +#define NR_PAGES 8 + +/* + * Use gang lookup in the nonblock case and opencode the whole + * prefaulting loop which allows all sorts of shortcuts: + */ +static int filemap_populate_nonblock(struct vm_area_struct *vma, + unsigned long addr, + unsigned long len, + pgprot_t prot, + unsigned long pgoff) +{ + struct file *file = vma->vm_file; + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + unsigned long size, nr_pages, left, range; + struct mm_struct *mm = vma->vm_mm; + struct page **tmp, *pages[NR_PAGES]; + struct pte_chain *pte_chain = NULL; + pte_t *pte0 = NULL, *pte = NULL; + pgd_t *pgd; + pmd_t *pmd; + + range = len >> PAGE_CACHE_SHIFT; + size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (pgoff + range > size) + return -EINVAL; + + spin_lock(&mm->page_table_lock); + spin_lock(&mapping->page_lock); + + while (range) { + left = (PMD_SIZE - (addr & ~PMD_MASK)); + left >>= PAGE_CACHE_SHIFT; + if (!left) { + if (pte0) + pte_unmap(pte0); + pte0 = NULL; + } + if (!pte0) { + pgd = pgd_offset(mm, addr); + pmd = pmd_alloc(mm, pgd, addr); + if (!pmd) + goto err_unlock; + pte0 = pte = pte_alloc_map(mm, pmd, addr); + if (!pte) + goto err_unlock; + } + + if (left > range) + left = range; + if (left > NR_PAGES) + left = NR_PAGES; + + tmp = pages; + range -= left; + + /* + * Look up a block of pages that are cached already, + * and install them. Performance-wise we assume that + * there are blocks of cached pages in this range, if + * there are none then we skip over those holes. + */ + nr_pages = radix_tree_gang_lookup(&mapping->page_tree, + (void **)pages, pgoff, left); + while (left) { + left--; + if (nr_pages && ((*tmp)->index == pgoff) && + PageUptodate(*tmp) && (pte_none(*pte))) { + + if (!pte_chain) { + pte_chain = pte_chain_alloc(GFP_ATOMIC); + if (unlikely(!pte_chain)) + goto err_unlock; + } + page_cache_get(*tmp); + + mm->rss++; + flush_icache_page(vma, *tmp); + set_pte(pte, mk_pte(*tmp, prot)); + pte_chain = page_add_rmap(*tmp, pte, pte_chain); + update_mmu_cache(vma, addr, *pte_val); + + tmp++; + nr_pages--; + } + pgoff++; + addr += PAGE_SIZE; + pte++; + } + } + if (pte0) + pte_unmap(pte0); + spin_unlock(&mapping->page_lock); + spin_unlock(&mm->page_table_lock); + pte_chain_free(pte_chain); + return 0; + +err_unlock: + spin_unlock(&mapping->page_lock); + spin_unlock(&mm->page_table_lock); + pte_chain_free(pte_chain); + return -ENOMEM; +} + static int filemap_populate(struct vm_area_struct *vma, unsigned long addr, unsigned long len, @@ -1312,6 +1418,9 @@ static int filemap_populate(struct vm_ar struct page *page; int err; + if (linear && nonblock) + return filemap_populate_nonblock(vma, addr, len, prot, pgoff); + if (!nonblock) force_page_cache_readahead(mapping, vma->vm_file, pgoff, len >> PAGE_CACHE_SHIFT); diff -puN mm/memory.c~prefault-2.6.0-A0 mm/memory.c --- 25/mm/memory.c~prefault-2.6.0-A0 Mon Jan 5 14:27:14 2004 +++ 25-akpm/mm/memory.c Mon Jan 5 14:27:14 2004 @@ -1396,6 +1396,8 @@ out: return ret; } +#define PREFAULT_PAGES 7 + /* * do_no_page() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if @@ -1439,6 +1441,22 @@ retry: if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; + if (vma->vm_ops && vma->vm_ops->populate && + !(vma->vm_flags & VM_NONLINEAR)) { + + unsigned long start = (address & PAGE_MASK) + PAGE_SIZE, + end = start + PAGE_SIZE * PREFAULT_PAGES, size, pgoff; + int nr_pages; + + pgoff = ((start - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; + if (vma->vm_end < end) + end = vma->vm_end; + nr_pages = (end - start) / PAGE_SIZE; + size = nr_pages * PAGE_SIZE; + + if (nr_pages) + vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, pgoff, MAP_NONBLOCK); + } ret = VM_FAULT_MAJOR; pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) _