The radix-tree walk for writeback has a couple of problems: a) It always scans a file from its first dirty page, so if someone is repeatedly dirtying the front part of a file, pages near the end may be starved of writeout. (Well, not completely: the `kupdate' function will write an entire file once the file's dirty timestamp has expired). b) When the disk queues are hufe (10000 requests), there can be a very large number of locked pages. Scanning past these in writeback consumes quite some CPU time. So in each address_space we record the index at which the last bathc of writeout terminated and start the next batch of writeback from that point. --- 25-akpm/fs/mpage.c | 20 +++++++++++++++++++- 25-akpm/include/linux/fs.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff -puN fs/mpage.c~writeback-search-start fs/mpage.c --- 25/fs/mpage.c~writeback-search-start 2004-03-23 22:10:45.209131632 -0800 +++ 25-akpm/fs/mpage.c 2004-03-23 22:13:41.072396352 -0800 @@ -610,6 +610,7 @@ mpage_writepages(struct address_space *m struct pagevec pvec; int nr_pages; pgoff_t index; + int scanned = 0; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; @@ -621,11 +622,18 @@ mpage_writepages(struct address_space *m writepage = mapping->a_ops->writepage; pagevec_init(&pvec, 0); - index = 0; + if (wbc->sync_mode != WB_SYNC_NONE) { + index = 0; + scanned = 1; + } else { + index = mapping->writeback_index; + } +retry: while (!done && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE))) { unsigned i; + scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -672,6 +680,16 @@ mpage_writepages(struct address_space *m } pagevec_release(&pvec); } + if (scanned == 0 && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + mapping->writeback_index = index; if (bio) mpage_bio_submit(WRITE, bio); return ret; diff -puN include/linux/fs.h~writeback-search-start include/linux/fs.h --- 25/include/linux/fs.h~writeback-search-start 2004-03-23 22:10:45.211131328 -0800 +++ 25-akpm/include/linux/fs.h 2004-03-23 22:10:45.216130568 -0800 @@ -327,6 +327,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and spinlock protecting it */ unsigned long nrpages; /* number of total pages */ + pgoff_t writeback_index;/* writeback starts here */ struct address_space_operations *a_ops; /* methods */ struct list_head i_mmap; /* list of private mappings */ struct list_head i_mmap_shared; /* list of shared mappings */ _