Index: linux-2.6.9/include/linux/sched.h =================================================================== --- linux-2.6.9.orig/include/linux/sched.h 2004-12-01 13:34:54.000000000 -0800 +++ linux-2.6.9/include/linux/sched.h 2004-12-01 13:34:56.000000000 -0800 @@ -541,6 +541,8 @@ #endif struct list_head tasks; + unsigned long anon_fault_next_addr; + int anon_fault_order; /* * ptrace_list/ptrace_children forms the list of my children * that were stolen by a ptracer. Index: linux-2.6.9/mm/memory.c =================================================================== --- linux-2.6.9.orig/mm/memory.c 2004-12-01 13:34:54.000000000 -0800 +++ linux-2.6.9/mm/memory.c 2004-12-01 14:56:04.000000000 -0800 @@ -1417,6 +1417,8 @@ return ret; } +int sysctl_max_prealloc_order = 3; + /* * We are called with the MM semaphore held. */ @@ -1425,59 +1427,92 @@ pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr, pte_t orig_entry) { - pte_t entry; - struct page * page = ZERO_PAGE(addr); + unsigned long end_addr; - /* Read-only mapping of ZERO_PAGE. */ - entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); + addr &= PAGE_MASK; - /* ..except if it's a write access */ - if (write_access) { - /* Allocate our own private page. */ - pte_unmap(page_table); + /* Check if there is a sequential allocation sequence of pages */ + if (likely(current->anon_fault_next_addr != addr)) { - if (unlikely(anon_vma_prepare(vma))) - goto no_mem; - page = alloc_page_vma(GFP_HIGHUSER, vma, addr); - if (!page) - goto no_mem; - clear_user_highpage(page, addr); + /* Single page */ + current->anon_fault_order = 0; + end_addr = addr + PAGE_SIZE; - page_table = pte_offset_map(pmd, addr); + } else { + int order = current->anon_fault_order; - entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, - vma->vm_page_prot)), - vma); - mark_page_accessed(page); - } + /* + * Calculate the number of pages to preallocate. The order of preallocations + * increases with each successful prediction + */ + if (order < sysctl_max_prealloc_order) + order++; - /* update the entry */ - if (!ptep_cmpxchg(vma, addr, page_table, orig_entry, entry)) { - if (write_access) { - pte_unmap(page_table); - page_cache_release(page); - } - goto out; + current->anon_fault_order = order; + + end_addr = addr + (PAGE_SIZE << order); + + /* Do not prefault beyond vm limits */ + if (end_addr > vma->vm_end) + end_addr = vma->vm_end; + + /* Stay in pmd */ + if ((addr & PMD_MASK) != (end_addr & PMD_MASK)) + end_addr &= PMD_MASK; } + if (write_access) { - /* - * These two functions must come after the cmpxchg - * because if the page is on the LRU then try_to_unmap may come - * in and unmap the pte. - */ - lru_cache_add_active(page); - page_add_anon_rmap(page, vma, addr); - current->rss++; + + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + + do { + pte_t entry; + struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + + if (!page) + return VM_FAULT_OOM; + + clear_user_highpage(page, addr); + + entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, + vma->vm_page_prot)), + vma); + mark_page_accessed(page); + + /* update the entry */ + if (unlikely(!ptep_cmpxchg(vma, addr, page_table, orig_entry, entry))) { + pte_unmap(page_table); + page_cache_release(page); + break; + } + + lru_cache_add_active(page); + page_add_anon_rmap(page, vma, addr); + current->rss++; - } - pte_unmap(page_table); + pte_unmap(page_table); + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); + addr += PAGE_SIZE; - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); -out: + } while (addr < end_addr && pte_none(orig_entry = *++page_table)); + } else { + pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); + /* Read */ + do { + if (unlikely(!ptep_cmpxchg(vma, addr, page_table, orig_entry, entry))) + break; + + pte_unmap(page_table); + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); + addr += PAGE_SIZE; + + } while (addr < end_addr && pte_none(orig_entry = *++page_table)); + } + current->anon_fault_next_addr = addr; return VM_FAULT_MINOR; -no_mem: - return VM_FAULT_OOM; } /* Index: linux-2.6.9/kernel/sysctl.c =================================================================== --- linux-2.6.9.orig/kernel/sysctl.c 2004-12-01 11:54:53.000000000 -0800 +++ linux-2.6.9/kernel/sysctl.c 2004-12-01 14:16:55.000000000 -0800 @@ -56,6 +56,7 @@ extern int C_A_D; extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; +extern int sysctl_max_prealloc_order; extern int max_threads; extern int sysrq_enabled; extern int core_uses_pid; @@ -816,6 +817,16 @@ .strategy = &sysctl_jiffies, }, #endif + { + .ctl_name = VM_MAX_PREFAULT_ORDER, + .procname = "max_prealloc_order", + .data = &sysctl_max_prealloc_order, + .maxlen = sizeof(sysctl_max_prealloc_order), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, { .ctl_name = 0 } }; Index: linux-2.6.9/include/linux/sysctl.h =================================================================== --- linux-2.6.9.orig/include/linux/sysctl.h 2004-12-01 11:54:53.000000000 -0800 +++ linux-2.6.9/include/linux/sysctl.h 2004-12-01 13:50:47.000000000 -0800 @@ -168,6 +168,7 @@ VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ + VM_MAX_PREFAULT_ORDER=29, /* max prefault order during anonymous page faults */ };