Instead, use a radix-tree walk of the pages which are tagged as being under writeback. The new function wait_on_page_writeback_range() was generalised out of filemap_fdatawait(). We can later use this to provide concurrent fsync of just a section of a file. --- 25-akpm/fs/inode.c | 1 25-akpm/include/linux/fs.h | 1 25-akpm/include/linux/mm.h | 2 - 25-akpm/mm/filemap.c | 74 ++++++++++++++++++++++----------------------- 25-akpm/mm/swap_state.c | 1 mm/page-writeback.c | 0 mm/truncate.c | 0 mm/vmscan.c | 0 8 files changed, 38 insertions(+), 41 deletions(-) diff -puN fs/inode.c~stop-using-locked-pages fs/inode.c --- 25/fs/inode.c~stop-using-locked-pages 2004-04-03 03:00:13.167229832 -0800 +++ 25-akpm/fs/inode.c 2004-04-03 03:00:13.180227856 -0800 @@ -179,7 +179,6 @@ void inode_init_once(struct inode *inode memset(inode, 0, sizeof(*inode)); INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_data.clean_pages); - INIT_LIST_HEAD(&inode->i_data.locked_pages); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); diff -puN mm/filemap.c~stop-using-locked-pages mm/filemap.c --- 25/mm/filemap.c~stop-using-locked-pages 2004-04-03 03:00:13.168229680 -0800 +++ 25-akpm/mm/filemap.c 2004-04-03 03:00:13.181227704 -0800 @@ -154,7 +154,6 @@ int filemap_fdatawrite(struct address_sp { return __filemap_fdatawrite(mapping, WB_SYNC_ALL); } - EXPORT_SYMBOL(filemap_fdatawrite); /* @@ -165,51 +164,40 @@ int filemap_flush(struct address_space * { return __filemap_fdatawrite(mapping, WB_SYNC_NONE); } - EXPORT_SYMBOL(filemap_flush); -/** - * filemap_fdatawait - walk the list of locked pages of the given address - * space and wait for all of them. - * @mapping: address space structure to wait for +/* + * Wait for writeback to complete against pages indexed by start->end + * inclusive */ -int filemap_fdatawait(struct address_space * mapping) +static int wait_on_page_writeback_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) { + struct pagevec pvec; + int nr_pages; int ret = 0; - int progress; - -restart: - progress = 0; - spin_lock_irq(&mapping->tree_lock); - while (!list_empty(&mapping->locked_pages)) { - struct page *page; + pgoff_t index; - page = list_entry(mapping->locked_pages.next,struct page,list); - list_del_init(&page->list); + if (end < start) + return 0; - if (!PageWriteback(page)) { - if (++progress > 32) { - if (need_resched()) { - spin_unlock_irq(&mapping->tree_lock); - __cond_resched(); - goto restart; - } - } - continue; + pagevec_init(&pvec, 0); + index = start; + while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_WRITEBACK, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + wait_on_page_writeback(page); + if (PageError(page)) + ret = -EIO; } - - progress = 0; - page_cache_get(page); - spin_unlock_irq(&mapping->tree_lock); - - wait_on_page_writeback(page); - if (PageError(page)) - ret = -EIO; - - page_cache_release(page); - spin_lock_irq(&mapping->tree_lock); + pagevec_release(&pvec); + cond_resched(); } - spin_unlock_irq(&mapping->tree_lock); /* Check for outstanding write errors */ if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) @@ -219,6 +207,18 @@ restart: return ret; } + +/** + * filemap_fdatawait - walk the list of under-writeback pages of the given + * address space and wait for all of them. + * + * @mapping: address space structure to wait for + */ +int filemap_fdatawait(struct address_space *mapping) +{ + return wait_on_page_writeback_range(mapping, 0, -1); +} + EXPORT_SYMBOL(filemap_fdatawait); int filemap_write_and_wait(struct address_space *mapping) diff -puN mm/page-writeback.c~stop-using-locked-pages mm/page-writeback.c diff -puN mm/swap_state.c~stop-using-locked-pages mm/swap_state.c --- 25/mm/swap_state.c~stop-using-locked-pages 2004-04-03 03:00:13.171229224 -0800 +++ 25-akpm/mm/swap_state.c 2004-04-03 03:00:13.182227552 -0800 @@ -27,7 +27,6 @@ struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC), .tree_lock = SPIN_LOCK_UNLOCKED, .clean_pages = LIST_HEAD_INIT(swapper_space.clean_pages), - .locked_pages = LIST_HEAD_INIT(swapper_space.locked_pages), .a_ops = &swap_aops, .backing_dev_info = &swap_backing_dev_info, .i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap), diff -puN mm/truncate.c~stop-using-locked-pages mm/truncate.c diff -puN mm/vmscan.c~stop-using-locked-pages mm/vmscan.c diff -puN include/linux/fs.h~stop-using-locked-pages include/linux/fs.h --- 25/include/linux/fs.h~stop-using-locked-pages 2004-04-03 03:00:13.174228768 -0800 +++ 25-akpm/include/linux/fs.h 2004-04-03 03:00:13.184227248 -0800 @@ -324,7 +324,6 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and spinlock protecting it */ struct list_head clean_pages; /* list of clean pages */ - struct list_head locked_pages; /* list of locked pages */ unsigned long nrpages; /* number of total pages */ struct address_space_operations *a_ops; /* methods */ struct list_head i_mmap; /* list of private mappings */ diff -puN include/linux/mm.h~stop-using-locked-pages include/linux/mm.h --- 25/include/linux/mm.h~stop-using-locked-pages 2004-04-03 03:00:13.176228464 -0800 +++ 25-akpm/include/linux/mm.h 2004-04-03 03:00:13.185227096 -0800 @@ -182,7 +182,7 @@ struct page { atomic_t count; /* Usage count, see below. */ struct list_head list; /* ->mapping has some page lists. */ struct address_space *mapping; /* The inode (or ...) we belong to. */ - unsigned long index; /* Our offset within mapping. */ + pgoff_t index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list; protected by zone->lru_lock !! */ union { _