From: Bill Irwin Convert mapping->tree_lock to an rwlock. The use of an rwlock was found to be a significant improvement over a spinlock in performance testing at Oracle. with: dd if=/dev/zero of=foo bs=1 count=2M 0.80s user 4.15s system 99% cpu 4.961 total dd if=/dev/zero of=foo bs=1 count=2M 0.73s user 4.26s system 100% cpu 4.987 total dd if=/dev/zero of=foo bs=1 count=2M 0.79s user 4.25s system 100% cpu 5.034 total dd if=foo of=/dev/null bs=1 0.80s user 3.12s system 99% cpu 3.928 total dd if=foo of=/dev/null bs=1 0.77s user 3.15s system 100% cpu 3.914 total dd if=foo of=/dev/null bs=1 0.92s user 3.02s system 100% cpu 3.935 total (3.926: 1.87 usecs) dd if=/dev/zero of=foo bs=1 count=2M 0.85s user 3.92s system 99% cpu 4.780 total dd if=/dev/zero of=foo bs=1 count=2M 0.78s user 4.02s system 100% cpu 4.789 total dd if=/dev/zero of=foo bs=1 count=2M 0.82s user 3.94s system 99% cpu 4.763 total dd if=/dev/zero of=foo bs=1 count=2M 0.71s user 4.10s system 99% cpu 4.810 tota dd if=foo of=/dev/null bs=1 0.76s user 2.68s system 100% cpu 3.438 total dd if=foo of=/dev/null bs=1 0.74s user 2.72s system 99% cpu 3.465 total dd if=foo of=/dev/null bs=1 0.67s user 2.82s system 100% cpu 3.489 total dd if=foo of=/dev/null bs=1 0.70s user 2.62s system 99% cpu 3.326 total (3.430: 1.635 usecs) Signed-off-by: Andrew Morton --- 25-akpm/fs/buffer.c | 4 ++-- 25-akpm/fs/inode.c | 2 +- 25-akpm/include/asm-arm/cacheflush.h | 4 ++-- 25-akpm/include/asm-parisc/cacheflush.h | 4 ++-- 25-akpm/include/linux/fs.h | 2 +- 25-akpm/mm/filemap.c | 32 ++++++++++++++++---------------- 25-akpm/mm/page-writeback.c | 28 ++++++++++++++-------------- 25-akpm/mm/readahead.c | 8 ++++---- 25-akpm/mm/swap_state.c | 18 +++++++++--------- 25-akpm/mm/swapfile.c | 12 ++++++------ 25-akpm/mm/truncate.c | 6 +++--- 25-akpm/mm/vmscan.c | 8 ++++---- 12 files changed, 64 insertions(+), 64 deletions(-) diff -puN fs/buffer.c~make-tree_lock-an-rwlock fs/buffer.c --- 25/fs/buffer.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.896426376 -0700 +++ 25-akpm/fs/buffer.c 2004-07-05 16:07:04.918423032 -0700 @@ -947,7 +947,7 @@ int __set_page_dirty_buffers(struct page spin_unlock(&mapping->private_lock); if (!TestSetPageDirty(page)) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ if (!mapping->backing_dev_info->memory_backed) inc_page_state(nr_dirty); @@ -955,7 +955,7 @@ int __set_page_dirty_buffers(struct page page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } diff -puN fs/inode.c~make-tree_lock-an-rwlock fs/inode.c --- 25/fs/inode.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.898426072 -0700 +++ 25-akpm/fs/inode.c 2004-07-05 16:07:04.919422880 -0700 @@ -196,7 +196,7 @@ void inode_init_once(struct inode *inode sema_init(&inode->i_sem, 1); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); - spin_lock_init(&inode->i_data.tree_lock); + rwlock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); atomic_set(&inode->i_data.truncate_count, 0); INIT_LIST_HEAD(&inode->i_data.private_list); diff -puN include/asm-arm/cacheflush.h~make-tree_lock-an-rwlock include/asm-arm/cacheflush.h --- 25/include/asm-arm/cacheflush.h~make-tree_lock-an-rwlock 2004-07-05 16:07:04.899425920 -0700 +++ 25-akpm/include/asm-arm/cacheflush.h 2004-07-05 16:07:04.920422728 -0700 @@ -294,9 +294,9 @@ flush_cache_page(struct vm_area_struct * extern void flush_dcache_page(struct page *); #define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) + write_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) + write_unlock_irq(&(mapping)->tree_lock) #define flush_icache_user_range(vma,page,addr,len) \ flush_dcache_page(page) diff -puN include/asm-parisc/cacheflush.h~make-tree_lock-an-rwlock include/asm-parisc/cacheflush.h --- 25/include/asm-parisc/cacheflush.h~make-tree_lock-an-rwlock 2004-07-05 16:07:04.901425616 -0700 +++ 25-akpm/include/asm-parisc/cacheflush.h 2004-07-05 16:07:04.920422728 -0700 @@ -79,9 +79,9 @@ static inline void flush_dcache_page(str } #define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) + write_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) + write_unlock_irq(&(mapping)->tree_lock) #define flush_icache_page(vma,page) do { flush_kernel_dcache_page(page_address(page)); flush_kernel_icache_page(page_address(page)); } while (0) diff -puN include/linux/fs.h~make-tree_lock-an-rwlock include/linux/fs.h --- 25/include/linux/fs.h~make-tree_lock-an-rwlock 2004-07-05 16:07:04.903425312 -0700 +++ 25-akpm/include/linux/fs.h 2004-07-05 16:07:04.922422424 -0700 @@ -328,7 +328,7 @@ struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ - spinlock_t tree_lock; /* and spinlock protecting it */ + rwlock_t tree_lock; /* and rwlock protecting it */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ struct address_space_operations *a_ops; /* methods */ diff -puN mm/filemap.c~make-tree_lock-an-rwlock mm/filemap.c --- 25/mm/filemap.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.905425008 -0700 +++ 25-akpm/mm/filemap.c 2004-07-05 16:07:04.924422120 -0700 @@ -122,9 +122,9 @@ void remove_from_page_cache(struct page if (unlikely(!PageLocked(page))) PAGE_BUG(page); - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); } static inline int sync_page(struct page *page) @@ -261,7 +261,7 @@ int add_to_page_cache(struct page *page, int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (error == 0) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); error = radix_tree_insert(&mapping->page_tree, offset, page); if (!error) { page_cache_get(page); @@ -271,7 +271,7 @@ int add_to_page_cache(struct page *page, mapping->nrpages++; pagecache_acct(1); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } return error; @@ -444,11 +444,11 @@ struct page * find_get_page(struct addre * We scan the hash list read-only. Addition to and removal from * the hash-list needs a held write-lock. */ - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); page = radix_tree_lookup(&mapping->page_tree, offset); if (page) page_cache_get(page); - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return page; } @@ -461,11 +461,11 @@ struct page *find_trylock_page(struct ad { struct page *page; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); page = radix_tree_lookup(&mapping->page_tree, offset); if (page && TestSetPageLocked(page)) page = NULL; - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return page; } @@ -487,15 +487,15 @@ struct page *find_lock_page(struct addre { struct page *page; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); repeat: page = radix_tree_lookup(&mapping->page_tree, offset); if (page) { page_cache_get(page); if (TestSetPageLocked(page)) { - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); lock_page(page); - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); /* Has the page been truncated while we slept? */ if (page->mapping != mapping || page->index != offset) { @@ -505,7 +505,7 @@ repeat: } } } - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return page; } @@ -579,12 +579,12 @@ unsigned find_get_pages(struct address_s unsigned int i; unsigned int ret; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); ret = radix_tree_gang_lookup(&mapping->page_tree, (void **)pages, start, nr_pages); for (i = 0; i < ret; i++) page_cache_get(pages[i]); - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return ret; } @@ -598,14 +598,14 @@ unsigned find_get_pages_tag(struct addre unsigned int i; unsigned int ret; - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); ret = radix_tree_gang_lookup_tag(&mapping->page_tree, (void **)pages, *index, nr_pages, tag); for (i = 0; i < ret; i++) page_cache_get(pages[i]); if (ret) *index = pages[ret - 1]->index + 1; - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); return ret; } diff -puN mm/page-writeback.c~make-tree_lock-an-rwlock mm/page-writeback.c --- 25/mm/page-writeback.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.906424856 -0700 +++ 25-akpm/mm/page-writeback.c 2004-07-05 16:07:04.925421968 -0700 @@ -582,7 +582,7 @@ int __set_page_dirty_nobuffers(struct pa struct address_space *mapping = page_mapping(page); if (mapping) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); mapping = page_mapping(page); if (page_mapping(page)) { /* Race with truncate? */ BUG_ON(page_mapping(page) != mapping); @@ -591,7 +591,7 @@ int __set_page_dirty_nobuffers(struct pa radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, @@ -666,17 +666,17 @@ int test_clear_page_dirty(struct page *p unsigned long flags; if (mapping) { - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); if (TestClearPageDirty(page)) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); if (!mapping->backing_dev_info->memory_backed) dec_page_state(nr_dirty); return 1; } - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 0; } return TestClearPageDirty(page); @@ -723,15 +723,15 @@ int __clear_page_dirty(struct page *page if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); if (TestClearPageDirty(page)) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 1; } - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 0; } return TestClearPageDirty(page); @@ -745,13 +745,13 @@ int test_clear_page_writeback(struct pag if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } @@ -766,7 +766,7 @@ int test_set_page_writeback(struct page if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) radix_tree_tag_set(&mapping->page_tree, @@ -776,7 +776,7 @@ int test_set_page_writeback(struct page radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } @@ -794,9 +794,9 @@ int mapping_tagged(struct address_space unsigned long flags; int ret; - spin_lock_irqsave(&mapping->tree_lock, flags); + read_lock_irqsave(&mapping->tree_lock, flags); ret = radix_tree_tagged(&mapping->page_tree, tag); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + read_unlock_irqrestore(&mapping->tree_lock, flags); return ret; } EXPORT_SYMBOL(mapping_tagged); diff -puN mm/readahead.c~make-tree_lock-an-rwlock mm/readahead.c --- 25/mm/readahead.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.907424704 -0700 +++ 25-akpm/mm/readahead.c 2004-07-05 16:07:04.926421816 -0700 @@ -234,7 +234,7 @@ __do_page_cache_readahead(struct address /* * Preallocate as many pages as we will need. */ - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { unsigned long page_offset = offset + page_idx; @@ -245,16 +245,16 @@ __do_page_cache_readahead(struct address if (page) continue; - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); page = page_cache_alloc_cold(mapping); - spin_lock_irq(&mapping->tree_lock); + read_lock_irq(&mapping->tree_lock); if (!page) break; page->index = page_offset; list_add(&page->lru, &page_pool); ret++; } - spin_unlock_irq(&mapping->tree_lock); + read_unlock_irq(&mapping->tree_lock); /* * Now start the IO. We ignore I/O errors - if the page is not diff -puN mm/swapfile.c~make-tree_lock-an-rwlock mm/swapfile.c --- 25/mm/swapfile.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.909424400 -0700 +++ 25-akpm/mm/swapfile.c 2004-07-05 16:07:04.927421664 -0700 @@ -290,10 +290,10 @@ static int exclusive_swap_page(struct pa /* Is the only swap cache user the cache itself? */ if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); if (page_count(page) == 2) retval = 1; - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); } swap_info_put(p); } @@ -361,13 +361,13 @@ int remove_exclusive_swap_page(struct pa retval = 0; if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); if ((page_count(page) == 2) && !PageWriteback(page)) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); } swap_info_put(p); @@ -391,12 +391,12 @@ void free_swap_and_cache(swp_entry_t ent p = swap_info_get(entry); if (p) { if (swap_entry_free(p, swp_offset(entry)) == 1) { - spin_lock_irq(&swapper_space.tree_lock); + read_lock_irq(&swapper_space.tree_lock); page = radix_tree_lookup(&swapper_space.page_tree, entry.val); if (page && TestSetPageLocked(page)) page = NULL; - spin_unlock_irq(&swapper_space.tree_lock); + read_unlock_irq(&swapper_space.tree_lock); } swap_info_put(p); } diff -puN mm/swap_state.c~make-tree_lock-an-rwlock mm/swap_state.c --- 25/mm/swap_state.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.910424248 -0700 +++ 25-akpm/mm/swap_state.c 2004-07-05 16:07:04.928421512 -0700 @@ -35,7 +35,7 @@ static struct backing_dev_info swap_back struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC), - .tree_lock = SPIN_LOCK_UNLOCKED, + .tree_lock = RW_LOCK_UNLOCKED, .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, @@ -74,7 +74,7 @@ static int __add_to_swap_cache(struct pa BUG_ON(PagePrivate(page)); error = radix_tree_preload(gfp_mask); if (!error) { - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); if (!error) { @@ -85,7 +85,7 @@ static int __add_to_swap_cache(struct pa total_swapcache_pages++; pagecache_acct(1); } - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); radix_tree_preload_end(); } return error; @@ -212,9 +212,9 @@ void delete_from_swap_cache(struct page entry.val = page->private; - spin_lock_irq(&swapper_space.tree_lock); + write_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); - spin_unlock_irq(&swapper_space.tree_lock); + write_unlock_irq(&swapper_space.tree_lock); swap_free(entry); page_cache_release(page); @@ -313,13 +313,13 @@ struct page * lookup_swap_cache(swp_entr { struct page *page; - spin_lock_irq(&swapper_space.tree_lock); + read_lock_irq(&swapper_space.tree_lock); page = radix_tree_lookup(&swapper_space.page_tree, entry.val); if (page) { page_cache_get(page); INC_CACHE_INFO(find_success); } - spin_unlock_irq(&swapper_space.tree_lock); + read_unlock_irq(&swapper_space.tree_lock); INC_CACHE_INFO(find_total); return page; } @@ -342,12 +342,12 @@ struct page *read_swap_cache_async(swp_e * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ - spin_lock_irq(&swapper_space.tree_lock); + read_lock_irq(&swapper_space.tree_lock); found_page = radix_tree_lookup(&swapper_space.page_tree, entry.val); if (found_page) page_cache_get(found_page); - spin_unlock_irq(&swapper_space.tree_lock); + read_unlock_irq(&swapper_space.tree_lock); if (found_page) break; diff -puN mm/truncate.c~make-tree_lock-an-rwlock mm/truncate.c --- 25/mm/truncate.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.911424096 -0700 +++ 25-akpm/mm/truncate.c 2004-07-05 16:07:04.929421360 -0700 @@ -74,13 +74,13 @@ invalidate_complete_page(struct address_ if (PagePrivate(page) && !try_to_release_page(page, 0)) return 0; - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); if (PageDirty(page)) { - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); return 0; } __remove_from_page_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); ClearPageUptodate(page); page_cache_release(page); /* pagecache ref */ return 1; diff -puN mm/vmscan.c~make-tree_lock-an-rwlock mm/vmscan.c --- 25/mm/vmscan.c~make-tree_lock-an-rwlock 2004-07-05 16:07:04.913423792 -0700 +++ 25-akpm/mm/vmscan.c 2004-07-05 16:07:04.930421208 -0700 @@ -474,7 +474,7 @@ static int shrink_list(struct list_head if (!mapping) goto keep_locked; /* truncate got there first */ - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); /* * The non-racy check for busy page. It is critical to check @@ -482,7 +482,7 @@ static int shrink_list(struct list_head * not in use by anybody. (pagecache + us == 2) */ if (page_count(page) != 2 || PageDirty(page)) { - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); goto keep_locked; } @@ -490,7 +490,7 @@ static int shrink_list(struct list_head if (PageSwapCache(page)) { swp_entry_t swap = { .val = page->private }; __delete_from_swap_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); swap_free(swap); __put_page(page); /* The pagecache ref */ goto free_it; @@ -498,7 +498,7 @@ static int shrink_list(struct list_head #endif /* CONFIG_SWAP */ __remove_from_page_cache(page); - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); __put_page(page); free_it: _