From: Christoph Lameter Do not use the page_table_lock in do_anonymous_page. This will significantly increase the parallelism in the page fault handler for SMP systems. The patch also modifies the definitions of _mm_counter functions so that rss and anon_rss become atomic (and will use atomic64_t if available). Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton --- include/linux/sched.h | 31 +++++++++++++++++++++++++++++++ mm/memory.c | 8 ++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff -puN include/linux/sched.h~page-fault-patches-no-pagetable-lock-in-do_anon_page include/linux/sched.h --- devel/include/linux/sched.h~page-fault-patches-no-pagetable-lock-in-do_anon_page 2005-08-06 15:34:30.000000000 -0700 +++ devel-akpm/include/linux/sched.h 2005-08-06 15:34:30.000000000 -0700 @@ -204,12 +204,43 @@ arch_get_unmapped_area_topdown(struct fi extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); +#ifdef CONFIG_ATOMIC_TABLE_OPS +/* + * No spinlock is held during atomic page table operations. The + * counters are not protected anymore and must also be + * incremented atomically. +*/ +#ifdef ATOMIC64_INIT +#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic64_dec(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member) +typedef atomic64_t mm_counter_t; +#else +/* + * This may limit process memory to 2^31 * PAGE_SIZE which may be around 8TB + * if using 4KB page size + */ +#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member) +typedef atomic_t mm_counter_t; +#endif +#else +/* + * No atomic page table operations. Counters are protected by + * the page table lock + */ #define set_mm_counter(mm, member, value) (mm)->_##member = (value) #define get_mm_counter(mm, member) ((mm)->_##member) #define add_mm_counter(mm, member, value) (mm)->_##member += (value) #define inc_mm_counter(mm, member) (mm)->_##member++ #define dec_mm_counter(mm, member) (mm)->_##member-- typedef unsigned long mm_counter_t; +#endif struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ diff -puN mm/memory.c~page-fault-patches-no-pagetable-lock-in-do_anon_page mm/memory.c --- devel/mm/memory.c~page-fault-patches-no-pagetable-lock-in-do_anon_page 2005-08-06 15:34:30.000000000 -0700 +++ devel-akpm/mm/memory.c 2005-08-06 15:34:30.000000000 -0700 @@ -1788,13 +1788,13 @@ do_anonymous_page(struct mm_struct *mm, entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)), vma); - spin_lock(&mm->page_table_lock); + page_table_atomic_start(mm); if (!ptep_cmpxchg(mm, addr, page_table, orig_entry, entry)) { pte_unmap(page_table); page_cache_release(page); inc_page_state(cmpxchg_fail_anon_write); - goto minor_fault_atomic; + goto minor_fault; } /* @@ -1810,10 +1810,6 @@ do_anonymous_page(struct mm_struct *mm, lazy_mmu_prot_update(entry); minor_fault: - spin_unlock(&mm->page_table_lock); - return VM_FAULT_MINOR; - -minor_fault_atomic: page_table_atomic_stop(mm); return VM_FAULT_MINOR; _