The radix-tree walk for writeback has a couple of problems: a) It always scans a file from its first dirty page, so if someone is repeatedly dirtying the front part of a file, pages near the end may be starved of writeout. (Well, not completely: the `kupdate' function will write an entire file once the file's dirty timestamp has expired). b) When the disk queues are huge (10000 requests), there can be a very large number of locked pages. Scanning past these in writeback consumes quite some CPU time. So in each address_space we record the index at which the last batch of writeout terminated and start the next batch of writeback from that point. --- 25-akpm/fs/mpage.c | 20 +++++++++++++++++++- 25-akpm/include/linux/fs.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff -puN fs/mpage.c~writeback-search-start fs/mpage.c --- 25/fs/mpage.c~writeback-search-start 2004-04-03 03:00:16.981649952 -0800 +++ 25-akpm/fs/mpage.c 2004-04-03 03:00:16.986649192 -0800 @@ -610,6 +610,7 @@ mpage_writepages(struct address_space *m struct pagevec pvec; int nr_pages; pgoff_t index; + int scanned = 0; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; @@ -621,11 +622,18 @@ mpage_writepages(struct address_space *m writepage = mapping->a_ops->writepage; pagevec_init(&pvec, 0); - index = 0; + if (wbc->sync_mode == WB_SYNC_NONE) { + index = mapping->writeback_index; /* Start from prev offset */ + } else { + index = 0; /* whole-file sweep */ + scanned = 1; + } +retry: while (!done && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE))) { unsigned i; + scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -672,6 +680,16 @@ mpage_writepages(struct address_space *m } pagevec_release(&pvec); } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + mapping->writeback_index = index; if (bio) mpage_bio_submit(WRITE, bio); return ret; diff -puN include/linux/fs.h~writeback-search-start include/linux/fs.h --- 25/include/linux/fs.h~writeback-search-start 2004-04-03 03:00:16.983649648 -0800 +++ 25-akpm/include/linux/fs.h 2004-04-03 03:00:16.988648888 -0800 @@ -324,6 +324,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and spinlock protecting it */ unsigned long nrpages; /* number of total pages */ + pgoff_t writeback_index;/* writeback starts here */ struct address_space_operations *a_ops; /* methods */ struct list_head i_mmap; /* list of private mappings */ struct list_head i_mmap_shared; /* list of shared mappings */ _