Filesystem aio write 25-akpm/drivers/block/ll_rw_blk.c | 35 +++++++++++++++++++++++++++++------ 25-akpm/fs/buffer.c | 4 +++- 25-akpm/include/linux/blkdev.h | 1 + 25-akpm/include/linux/writeback.h | 2 +- 25-akpm/mm/filemap.c | 29 ++++++++++++++++++++++++----- 25-akpm/mm/page-writeback.c | 17 ++++++++++++----- 6 files changed, 70 insertions(+), 18 deletions(-) diff -puN drivers/block/ll_rw_blk.c~aio-05-fs_write drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~aio-05-fs_write Wed Oct 29 12:17:02 2003 +++ 25-akpm/drivers/block/ll_rw_blk.c Wed Oct 29 12:17:02 2003 @@ -1865,29 +1865,52 @@ void blk_put_request(struct request *req spin_unlock_irqrestore(q->queue_lock, flags); } } - EXPORT_SYMBOL(blk_put_request); /** - * blk_congestion_wait - wait for a queue to become uncongested + * blk_congestion_wait_wq - wait for a queue to become uncongested, * @rw: READ or WRITE * @timeout: timeout in jiffies + * @wait : wait queue entry to use for waiting or async notification + * (NULL defaults to synchronous behaviour) * * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. * If no queues are congested then just wait for the next request to be * returned. + * + * If the wait queue parameter specifies an async i/o callback, + * then instead of blocking, just register the callback on the wait + * queue for async notification when the queue gets uncongested. */ -void blk_congestion_wait(int rw, long timeout) +int blk_congestion_wait_wq(int rw, long timeout, wait_queue_t *wait) { - DEFINE_WAIT(wait); wait_queue_head_t *wqh = &congestion_wqh[rw]; + DEFINE_WAIT(local_wait); + + if (!wait) + wait = &local_wait; blk_run_queues(); - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wqh, wait, TASK_UNINTERRUPTIBLE); + if (!is_sync_wait(wait)) { + /* + * if we've queued an async wait queue + * callback do not block; just tell the + * caller to return and retry later when + * the callback is notified + */ + return -EIOCBRETRY; + } io_schedule_timeout(timeout); - finish_wait(wqh, &wait); + finish_wait(wqh, wait); + return 0; } +EXPORT_SYMBOL(blk_congestion_wait_wq); +void blk_congestion_wait(int rw, long timeout) +{ + blk_congestion_wait_wq(rw, timeout, NULL); +} EXPORT_SYMBOL(blk_congestion_wait); /* diff -puN fs/buffer.c~aio-05-fs_write fs/buffer.c --- 25/fs/buffer.c~aio-05-fs_write Wed Oct 29 12:17:02 2003 +++ 25-akpm/fs/buffer.c Wed Oct 29 12:17:06 2003 @@ -2001,7 +2001,8 @@ static int __block_prepare_write(struct * If we issued read requests - let them complete. */ while(wait_bh > wait) { - wait_on_buffer(*--wait_bh); + if (err = wait_on_buffer_wq(*--wait_bh, current->io_wait)) + return err; if (!buffer_uptodate(*wait_bh)) return -EIO; } @@ -3059,6 +3060,7 @@ void __init buffer_init(void) EXPORT_SYMBOL(__bforget); EXPORT_SYMBOL(__brelse); EXPORT_SYMBOL(__wait_on_buffer); +EXPORT_SYMBOL(__wait_on_buffer_wq); EXPORT_SYMBOL(block_commit_write); EXPORT_SYMBOL(block_prepare_write); EXPORT_SYMBOL(block_read_full_page); diff -puN include/linux/blkdev.h~aio-05-fs_write include/linux/blkdev.h --- 25/include/linux/blkdev.h~aio-05-fs_write Wed Oct 29 12:17:02 2003 +++ 25-akpm/include/linux/blkdev.h Wed Oct 29 12:17:02 2003 @@ -585,6 +585,7 @@ extern void blk_queue_free_tags(request_ extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern void blk_congestion_wait(int rw, long timeout); +extern int blk_congestion_wait_wq(int rw, long timeout, wait_queue_t *wait); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); extern void blk_rq_prep_restart(struct request *); diff -puN include/linux/writeback.h~aio-05-fs_write include/linux/writeback.h --- 25/include/linux/writeback.h~aio-05-fs_write Wed Oct 29 12:17:02 2003 +++ 25-akpm/include/linux/writeback.h Wed Oct 29 12:17:02 2003 @@ -84,7 +84,7 @@ int dirty_writeback_centisecs_handler(st void __user *, size_t *); void page_writeback_init(void); -void balance_dirty_pages_ratelimited(struct address_space *mapping); +int balance_dirty_pages_ratelimited(struct address_space *mapping); int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); diff -puN mm/filemap.c~aio-05-fs_write mm/filemap.c --- 25/mm/filemap.c~aio-05-fs_write Wed Oct 29 12:17:02 2003 +++ 25-akpm/mm/filemap.c Wed Oct 29 12:17:03 2003 @@ -494,8 +494,8 @@ EXPORT_SYMBOL(find_trylock_page); * * Returns zero if the page was not present. find_lock_page() may sleep. */ -struct page *find_lock_page(struct address_space *mapping, - unsigned long offset) +struct page *find_lock_page_wq(struct address_space *mapping, + unsigned long offset, wait_queue_t *wait) { struct page *page; @@ -506,7 +506,10 @@ repeat: page_cache_get(page); if (TestSetPageLocked(page)) { spin_unlock(&mapping->page_lock); - lock_page(page); + if (-EIOCBRETRY == lock_page_wq(page, wait)) { + page_cache_release(page); + return ERR_PTR(-EIOCBRETRY); + } spin_lock(&mapping->page_lock); /* Has the page been truncated while we slept? */ @@ -523,6 +526,12 @@ repeat: EXPORT_SYMBOL(find_lock_page); +struct page *find_lock_page(struct address_space *mapping, + unsigned long offset) +{ + return find_lock_page_wq(mapping, offset, NULL); +} + /** * find_or_create_page - locate or add a pagecache page * @@ -1545,7 +1554,9 @@ __grab_cache_page(struct address_space * int err; struct page *page; repeat: - page = find_lock_page(mapping, index); + page = find_lock_page_wq(mapping, index, current->io_wait); + if (IS_ERR(page)) + return page; if (!page) { if (!*cached_page) { *cached_page = page_cache_alloc(mapping); @@ -1897,6 +1908,10 @@ generic_file_aio_write_nolock(struct kio fault_in_pages_readable(buf, bytes); page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); + if (IS_ERR(page)) { + status = PTR_ERR(page); + break; + } if (!page) { status = -ENOMEM; break; @@ -1945,7 +1960,11 @@ generic_file_aio_write_nolock(struct kio page_cache_release(page); if (status < 0) break; - balance_dirty_pages_ratelimited(mapping); + status = balance_dirty_pages_ratelimited(mapping); + if (status < 0) { + pr_debug("async balance_dirty_pages\n"); + break; + } cond_resched(); } while (count); *ppos = pos; diff -puN mm/page-writeback.c~aio-05-fs_write mm/page-writeback.c --- 25/mm/page-writeback.c~aio-05-fs_write Wed Oct 29 12:17:02 2003 +++ 25-akpm/mm/page-writeback.c Wed Oct 29 12:17:03 2003 @@ -146,7 +146,7 @@ get_dirty_limits(struct page_state *ps, * If we're over `background_thresh' then pdflush is woken to perform some * writeout. */ -static void balance_dirty_pages(struct address_space *mapping) +static int balance_dirty_pages(struct address_space *mapping) { struct page_state ps; long nr_reclaimable; @@ -163,6 +163,7 @@ static void balance_dirty_pages(struct a .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, + .nonblocking = !is_sync_wait(current->io_wait) }; get_dirty_limits(&ps, &background_thresh, &dirty_thresh); @@ -189,7 +190,11 @@ static void balance_dirty_pages(struct a if (pages_written >= write_chunk) break; /* We've done our duty */ } - blk_congestion_wait(WRITE, HZ/10); + if (-EIOCBRETRY == blk_congestion_wait_wq(WRITE, HZ/10, + current->io_wait)) { + pr_debug("async blk congestion wait\n"); + return -EIOCBRETRY; + } } if (nr_reclaimable + ps.nr_writeback <= dirty_thresh) @@ -197,6 +202,8 @@ static void balance_dirty_pages(struct a if (!writeback_in_progress(bdi) && nr_reclaimable > background_thresh) pdflush_operation(background_writeout, 0); + + return 0; } /** @@ -212,7 +219,7 @@ static void balance_dirty_pages(struct a * decrease the ratelimiting by a lot, to prevent individual processes from * overshooting the limit by (ratelimit_pages) each. */ -void balance_dirty_pages_ratelimited(struct address_space *mapping) +int balance_dirty_pages_ratelimited(struct address_space *mapping) { static DEFINE_PER_CPU(int, ratelimits) = 0; long ratelimit; @@ -228,10 +235,10 @@ void balance_dirty_pages_ratelimited(str if (get_cpu_var(ratelimits)++ >= ratelimit) { __get_cpu_var(ratelimits) = 0; put_cpu_var(ratelimits); - balance_dirty_pages(mapping); - return; + return balance_dirty_pages(mapping); } put_cpu_var(ratelimits); + return 0; } /* _