From: Bill Irwin Convert mapping->tree_lock to an rwlock. The use of an rwlock was found to be a significant improvement over a spinlock in performance testing at Oracle. with: dd if=/dev/zero of=foo bs=1 count=2M 0.80s user 4.15s system 99% cpu 4.961 total dd if=/dev/zero of=foo bs=1 count=2M 0.73s user 4.26s system 100% cpu 4.987 total dd if=/dev/zero of=foo bs=1 count=2M 0.79s user 4.25s system 100% cpu 5.034 total dd if=foo of=/dev/null bs=1 0.80s user 3.12s system 99% cpu 3.928 total dd if=foo of=/dev/null bs=1 0.77s user 3.15s system 100% cpu 3.914 total dd if=foo of=/dev/null bs=1 0.92s user 3.02s system 100% cpu 3.935 total (3.926: 1.87 usecs) dd if=/dev/zero of=foo bs=1 count=2M 0.85s user 3.92s system 99% cpu 4.780 total dd if=/dev/zero of=foo bs=1 count=2M 0.78s user 4.02s system 100% cpu 4.789 total dd if=/dev/zero of=foo bs=1 count=2M 0.82s user 3.94s system 99% cpu 4.763 total dd if=/dev/zero of=foo bs=1 count=2M 0.71s user 4.10s system 99% cpu 4.810 tota dd if=foo of=/dev/null bs=1 0.76s user 2.68s system 100% cpu 3.438 total dd if=foo of=/dev/null bs=1 0.74s user 2.72s system 99% cpu 3.465 total dd if=foo of=/dev/null bs=1 0.67s user 2.82s system 100% cpu 3.489 total dd if=foo of=/dev/null bs=1 0.70s user 2.62s system 99% cpu 3.326 total (3.430: 1.635 usecs) Signed-off-by: Andrew Morton --- 25-akpm/fs/buffer.c | 4 ++-- 25-akpm/fs/inode.c | 2 +- 25-akpm/include/asm-arm/cacheflush.h | 4 ++-- 25-akpm/include/asm-parisc/cacheflush.h | 4 ++-- 25-akpm/include/linux/fs.h | 2 +- 25-akpm/mm/filemap.c | 32 ++++++++++++++++---------------- 25-akpm/mm/page-writeback.c | 28 ++++++++++++++-------------- 25-akpm/mm/readahead.c | 8 ++++---- 25-akpm/mm/swap_state.c | 18 +++++++++--------- 25-akpm/mm/swapfile.c | 12 ++++++------ 25-akpm/mm/truncate.c | 8 ++++---- 25-akpm/mm/vmscan.c | 8 ++++---- 12 files changed, 65 insertions(+), 65 deletions(-) diff -puN fs/buffer.c~make-tree_lock-an-rwlock fs/buffer.c --- 25/fs/buffer.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.427921064 -0800 +++ 25-akpm/fs/buffer.c 2004-12-01 22:21:15.449917720 -0800 @@ -858,7 +858,7 @@ int __set_page_dirty_buffers(struct page spin_unlock(&mapping->private_lock); if (!TestSetPageDirty(page)) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ if (!mapping->backing_dev_info->memory_backed) inc_page_state(nr_dirty); @@ -866,7 +866,7 @@ int __set_page_dirty_buffers(struct page page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } diff -puN fs/inode.c~make-tree_lock-an-rwlock fs/inode.c --- 25/fs/inode.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.428920912 -0800 +++ 25-akpm/fs/inode.c 2004-12-01 22:21:15.450917568 -0800 @@ -199,7 +199,7 @@ void inode_init_once(struct inode *inode sema_init(&inode->i_sem, 1); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); - spin_lock_init(&inode->i_data.tree_lock); + rwlock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); atomic_set(&inode->i_data.truncate_count, 0); INIT_LIST_HEAD(&inode->i_data.private_list); diff -puN include/asm-arm/cacheflush.h~make-tree_lock-an-rwlock include/asm-arm/cacheflush.h --- 25/include/asm-arm/cacheflush.h~make-tree_lock-an-rwlock 2004-12-01 22:21:15.430920608 -0800 +++ 25-akpm/include/asm-arm/cacheflush.h 2004-12-01 22:21:15.451917416 -0800 @@ -312,9 +312,9 @@ flush_cache_page(struct vm_area_struct * extern void flush_dcache_page(struct page *); #define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) + write_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) + write_unlock_irq(&(mapping)->tree_lock) #define flush_icache_user_range(vma,page,addr,len) \ flush_dcache_page(page) diff -puN include/asm-parisc/cacheflush.h~make-tree_lock-an-rwlock include/asm-parisc/cacheflush.h --- 25/include/asm-parisc/cacheflush.h~make-tree_lock-an-rwlock 2004-12-01 22:21:15.431920456 -0800 +++ 25-akpm/include/asm-parisc/cacheflush.h 2004-12-01 22:21:15.451917416 -0800 @@ -68,9 +68,9 @@ flush_user_icache_range(unsigned long st extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) + write_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) + write_unlock_irq(&(mapping)->tree_lock) #define flush_icache_page(vma,page) do { flush_kernel_dcache_page(page_address(page)); flush_kernel_icache_page(page_address(page)); } while (0) diff -puN include/linux/fs.h~make-tree_lock-an-rwlock include/linux/fs.h --- 25/include/linux/fs.h~make-tree_lock-an-rwlock 2004-12-01 22:21:15.433920152 -0800 +++ 25-akpm/include/linux/fs.h 2004-12-01 22:21:15.453917112 -0800 @@ -339,7 +339,7 @@ struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ - spinlock_t tree_lock; /* and spinlock protecting it */ + rwlock_t tree_lock; /* and rwlock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ diff -puN mm/filemap.c~make-tree_lock-an-rwlock mm/filemap.c --- 25/mm/filemap.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.435919848 -0800 +++ 25-akpm/mm/filemap.c 2004-12-01 22:21:15.455916808 -0800 @@ -126,9 +126,9 @@ void remove_from_page_cache(struct page if (unlikely(!PageLocked(page))) PAGE_BUG(page); - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); } static int sync_page(void *word) @@ -349,7 +349,7 @@ int add_to_page_cache(struct page *page, int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (error == 0) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); error = radix_tree_insert(&mapping->page_tree, offset, page); if (!error) { page_cache_get(page); @@ -359,7 +359,7 @@ int add_to_page_cache(struct page *page, mapping->nrpages++; pagecache_acct(1); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } return error; @@ -472,11 +472,11 @@ struct page * find_get_page(struct addre { struct page *page; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); page = radix_tree_lookup(&mapping->page_tree, offset); if (page) page_cache_get(page); - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return page; } @@ -489,11 +489,11 @@ struct page *find_trylock_page(struct ad { struct page *page; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); page = radix_tree_lookup(&mapping->page_tree, offset); if (page && TestSetPageLocked(page)) page = NULL; - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return page; } @@ -515,15 +515,15 @@ struct page *find_lock_page(struct addre { struct page *page; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); repeat: page = radix_tree_lookup(&mapping->page_tree, offset); if (page) { page_cache_get(page); if (TestSetPageLocked(page)) { - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); lock_page(page); - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); /* Has the page been truncated while we slept? */ if (page->mapping != mapping || page->index != offset) { @@ -533,7 +533,7 @@ repeat: } } } - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return page; } @@ -608,12 +608,12 @@ unsigned find_get_pages(struct address_s unsigned int i; unsigned int ret; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); ret = radix_tree_gang_lookup(&mapping->page_tree, (void **)pages, start, nr_pages); for (i = 0; i < ret; i++) page_cache_get(pages[i]); - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return ret; } @@ -627,14 +627,14 @@ unsigned find_get_pages_tag(struct addre unsigned int i; unsigned int ret; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); ret = radix_tree_gang_lookup_tag(&mapping->page_tree, (void **)pages, *index, nr_pages, tag); for (i = 0; i < ret; i++) page_cache_get(pages[i]); if (ret) *index = pages[ret - 1]->index + 1; - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return ret; } diff -puN mm/page-writeback.c~make-tree_lock-an-rwlock mm/page-writeback.c --- 25/mm/page-writeback.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.437919544 -0800 +++ 25-akpm/mm/page-writeback.c 2004-12-01 22:21:15.456916656 -0800 @@ -588,7 +588,7 @@ int __set_page_dirty_nobuffers(struct pa struct address_space *mapping2; if (mapping) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); @@ -597,7 +597,7 @@ int __set_page_dirty_nobuffers(struct pa radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, @@ -672,17 +672,17 @@ int test_clear_page_dirty(struct page *p unsigned long flags; if (mapping) { - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); if (TestClearPageDirty(page)) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); if (!mapping->backing_dev_info->memory_backed) dec_page_state(nr_dirty); return 1; } - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 0; } return TestClearPageDirty(page); @@ -729,15 +729,15 @@ int __clear_page_dirty(struct page *page if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); if (TestClearPageDirty(page)) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 1; } - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 0; } return TestClearPageDirty(page); @@ -751,13 +751,13 @@ int test_clear_page_writeback(struct pag if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } @@ -772,7 +772,7 @@ int test_set_page_writeback(struct page if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) radix_tree_tag_set(&mapping->page_tree, @@ -782,7 +782,7 @@ int test_set_page_writeback(struct page radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } @@ -800,9 +800,9 @@ int mapping_tagged(struct address_space unsigned long flags; int ret; - spin_lock_irqsave(&mapping->tree_lock, flags); + read_lock_irqsave(&mapping->tree_lock, flags); ret = radix_tree_tagged(&mapping->page_tree, tag); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + read_unlock_irqrestore(&mapping->tree_lock, flags); return ret; } EXPORT_SYMBOL(mapping_tagged); diff -puN mm/readahead.c~make-tree_lock-an-rwlock mm/readahead.c --- 25/mm/readahead.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.438919392 -0800 +++ 25-akpm/mm/readahead.c 2004-12-01 22:21:15.457916504 -0800 @@ -273,7 +273,7 @@ __do_page_cache_readahead(struct address /* * Preallocate as many pages as we will need. */ - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { unsigned long page_offset = offset + page_idx; @@ -284,16 +284,16 @@ __do_page_cache_readahead(struct address if (page) continue; - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); page = page_cache_alloc_cold(mapping, page_offset); - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); if (!page) break; page->index = page_offset; list_add(&page->lru, &page_pool); ret++; } - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); /* * Now start the IO. We ignore I/O errors - if the page is not diff -puN mm/swapfile.c~make-tree_lock-an-rwlock mm/swapfile.c --- 25/mm/swapfile.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.440919088 -0800 +++ 25-akpm/mm/swapfile.c 2004-12-01 22:21:15.459916200 -0800 @@ -291,10 +291,10 @@ static int exclusive_swap_page(struct pa /* Is the only swap cache user the cache itself? */ if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); if (page_count(page) == 2) retval = 1; - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); } swap_info_put(p); } @@ -362,13 +362,13 @@ int remove_exclusive_swap_page(struct pa retval = 0; if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); if ((page_count(page) == 2) && !PageWriteback(page)) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); } swap_info_put(p); @@ -392,12 +392,12 @@ void free_swap_and_cache(swp_entry_t ent p = swap_info_get(entry); if (p) { if (swap_entry_free(p, swp_offset(entry)) == 1) { - spin_lock_irq(&swapper_space.tree_lock); + read_lock_irq(&swapper_space.tree_lock); page = radix_tree_lookup(&swapper_space.page_tree, entry.val); if (page && TestSetPageLocked(page)) page = NULL; - spin_unlock_irq(&swapper_space.tree_lock); + read_unlock_irq(&swapper_space.tree_lock); } swap_info_put(p); } diff -puN mm/swap_state.c~make-tree_lock-an-rwlock mm/swap_state.c --- 25/mm/swap_state.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.441918936 -0800 +++ 25-akpm/mm/swap_state.c 2004-12-01 22:21:15.460916048 -0800 @@ -35,7 +35,7 @@ static struct backing_dev_info swap_back struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = SPIN_LOCK_UNLOCKED, + .tree_lock = RW_LOCK_UNLOCKED, .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, @@ -74,7 +74,7 @@ static int __add_to_swap_cache(struct pa BUG_ON(PagePrivate(page)); error = radix_tree_preload(gfp_mask); if (!error) { - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); if (!error) { @@ -85,7 +85,7 @@ static int __add_to_swap_cache(struct pa total_swapcache_pages++; pagecache_acct(1); } - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); radix_tree_preload_end(); } return error; @@ -212,9 +212,9 @@ void delete_from_swap_cache(struct page entry.val = page->private; - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); swap_free(entry); page_cache_release(page); @@ -313,13 +313,13 @@ struct page * lookup_swap_cache(swp_entr { struct page *page; - spin_lock_irq(&swapper_space.tree_lock); + read_lock_irq(&swapper_space.tree_lock); page = radix_tree_lookup(&swapper_space.page_tree, entry.val); if (page) { page_cache_get(page); INC_CACHE_INFO(find_success); } - spin_unlock_irq(&swapper_space.tree_lock); + read_unlock_irq(&swapper_space.tree_lock); INC_CACHE_INFO(find_total); return page; } @@ -342,12 +342,12 @@ struct page *read_swap_cache_async(swp_e * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ - spin_lock_irq(&swapper_space.tree_lock); + read_lock_irq(&swapper_space.tree_lock); found_page = radix_tree_lookup(&swapper_space.page_tree, entry.val); if (found_page) page_cache_get(found_page); - spin_unlock_irq(&swapper_space.tree_lock); + read_unlock_irq(&swapper_space.tree_lock); if (found_page) break; diff -puN mm/truncate.c~make-tree_lock-an-rwlock mm/truncate.c --- 25/mm/truncate.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.442918784 -0800 +++ 25-akpm/mm/truncate.c 2004-12-01 22:21:15.460916048 -0800 @@ -74,19 +74,19 @@ invalidate_complete_page(struct address_ if (PagePrivate(page) && !try_to_release_page(page, 0)) return 0; - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); if (PageDirty(page)) { - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); return 0; } BUG_ON(PagePrivate(page)); if (page_count(page) != 2) { - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); return 0; } __remove_from_page_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); ClearPageUptodate(page); page_cache_release(page); /* pagecache ref */ return 1; diff -puN mm/vmscan.c~make-tree_lock-an-rwlock mm/vmscan.c --- 25/mm/vmscan.c~make-tree_lock-an-rwlock 2004-12-01 22:21:15.444918480 -0800 +++ 25-akpm/mm/vmscan.c 2004-12-01 22:21:15.461915896 -0800 @@ -474,7 +474,7 @@ static int shrink_list(struct list_head if (!mapping) goto keep_locked; /* truncate got there first */ - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); /* * The non-racy check for busy page. It is critical to check @@ -482,7 +482,7 @@ static int shrink_list(struct list_head * not in use by anybody. (pagecache + us == 2) */ if (page_count(page) != 2 || PageDirty(page)) { - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); goto keep_locked; } @@ -490,7 +490,7 @@ static int shrink_list(struct list_head if (PageSwapCache(page)) { swp_entry_t swap = { .val = page->private }; __delete_from_swap_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); swap_free(swap); __put_page(page); /* The pagecache ref */ goto free_it; @@ -498,7 +498,7 @@ static int shrink_list(struct list_head #endif /* CONFIG_SWAP */ __remove_from_page_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); __put_page(page); free_it: _