Filesystem aio write 25-akpm/drivers/block/ll_rw_blk.c | 35 ++++++++++++++++++++++++++++++----- 25-akpm/fs/buffer.c | 3 ++- 25-akpm/include/linux/blkdev.h | 1 + 25-akpm/include/linux/writeback.h | 4 ++-- 25-akpm/mm/filemap.c | 29 ++++++++++++++++++++++++----- 25-akpm/mm/page-writeback.c | 17 ++++++++++++----- 6 files changed, 71 insertions(+), 18 deletions(-) diff -puN drivers/block/ll_rw_blk.c~aio-05-fs_write drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~aio-05-fs_write Tue Jun 3 11:05:02 2003 +++ 25-akpm/drivers/block/ll_rw_blk.c Tue Jun 3 11:05:03 2003 @@ -1625,25 +1625,50 @@ void blk_put_request(struct request *req } /** - * blk_congestion_wait - wait for a queue to become uncongested + * blk_congestion_wait_wq - wait for a queue to become uncongested, * @rw: READ or WRITE * @timeout: timeout in jiffies + * @wait : wait queue entry to use for waiting or async notification + * (NULL defaults to synchronous behaviour) * * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. * If no queues are congested then just wait for the next request to be * returned. + * + * If the wait queue parameter specifies an async i/o callback, + * then instead of blocking, just register the callback on the wait + * queue for async notification when the queue gets uncongested. */ -void blk_congestion_wait(int rw, long timeout) +int blk_congestion_wait_wq(int rw, long timeout, wait_queue_t *wait) { - DEFINE_WAIT(wait); wait_queue_head_t *wqh = &congestion_wqh[rw]; + DEFINE_WAIT(local_wait); + + if (!wait) + wait = &local_wait; blk_run_queues(); - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wqh, wait, TASK_UNINTERRUPTIBLE); + if (!is_sync_wait(wait)) { + /* + * if we've queued an async wait queue + * callback do not block; just tell the + * caller to return and retry later when + * the callback is notified + */ + return -EIOCBRETRY; + } io_schedule_timeout(timeout); - finish_wait(wqh, &wait); + finish_wait(wqh, wait); + return 0; +} + +void blk_congestion_wait(int rw, long timeout) +{ + blk_congestion_wait_wq(rw, timeout, NULL); } + /* * Has to be called with the request spinlock acquired */ diff -puN fs/buffer.c~aio-05-fs_write fs/buffer.c --- 25/fs/buffer.c~aio-05-fs_write Tue Jun 3 11:05:02 2003 +++ 25-akpm/fs/buffer.c Tue Jun 3 11:05:03 2003 @@ -1974,7 +1974,8 @@ static int __block_prepare_write(struct * If we issued read requests - let them complete. */ while(wait_bh > wait) { - wait_on_buffer(*--wait_bh); + if (err = wait_on_buffer_wq(*--wait_bh, current->io_wait)) + return err; if (!buffer_uptodate(*wait_bh)) return -EIO; } diff -puN include/linux/blkdev.h~aio-05-fs_write include/linux/blkdev.h --- 25/include/linux/blkdev.h~aio-05-fs_write Tue Jun 3 11:05:02 2003 +++ 25-akpm/include/linux/blkdev.h Tue Jun 3 11:05:03 2003 @@ -492,6 +492,7 @@ extern void blk_queue_free_tags(request_ extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern void blk_congestion_wait(int rw, long timeout); +extern int blk_congestion_wait_wq(int rw, long timeout, wait_queue_t *wait); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); diff -puN include/linux/writeback.h~aio-05-fs_write include/linux/writeback.h --- 25/include/linux/writeback.h~aio-05-fs_write Tue Jun 3 11:05:02 2003 +++ 25-akpm/include/linux/writeback.h Tue Jun 3 11:05:03 2003 @@ -84,8 +84,8 @@ int dirty_writeback_centisecs_handler(st void *, size_t *); void page_writeback_init(void); -void balance_dirty_pages(struct address_space *mapping); -void balance_dirty_pages_ratelimited(struct address_space *mapping); +int balance_dirty_pages(struct address_space *mapping); +int balance_dirty_pages_ratelimited(struct address_space *mapping); int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); diff -puN mm/filemap.c~aio-05-fs_write mm/filemap.c --- 25/mm/filemap.c~aio-05-fs_write Tue Jun 3 11:05:02 2003 +++ 25-akpm/mm/filemap.c Tue Jun 3 11:05:03 2003 @@ -449,8 +449,8 @@ struct page *find_trylock_page(struct ad * * Returns zero if the page was not present. find_lock_page() may sleep. */ -struct page *find_lock_page(struct address_space *mapping, - unsigned long offset) +struct page *find_lock_page_wq(struct address_space *mapping, + unsigned long offset, wait_queue_t *wait) { struct page *page; @@ -461,7 +461,10 @@ repeat: page_cache_get(page); if (TestSetPageLocked(page)) { spin_unlock(&mapping->page_lock); - lock_page(page); + if (-EIOCBRETRY == lock_page_wq(page, wait)) { + page_cache_release(page); + return ERR_PTR(-EIOCBRETRY); + } spin_lock(&mapping->page_lock); /* Has the page been truncated while we slept? */ @@ -476,6 +479,12 @@ repeat: return page; } +struct page *find_lock_page(struct address_space *mapping, + unsigned long offset) +{ + return find_lock_page_wq(mapping, offset, NULL); +} + /** * find_or_create_page - locate or add a pagecache page * @@ -1439,7 +1448,9 @@ __grab_cache_page(struct address_space * int err; struct page *page; repeat: - page = find_lock_page(mapping, index); + page = find_lock_page_wq(mapping, index, current->io_wait); + if (IS_ERR(page)) + return page; if (!page) { if (!*cached_page) { *cached_page = page_cache_alloc(mapping); @@ -1778,6 +1789,10 @@ generic_file_aio_write_nolock(struct kio fault_in_pages_readable(buf, bytes); page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); + if (IS_ERR(page)) { + status = PTR_ERR(page); + break; + } if (!page) { status = -ENOMEM; break; @@ -1827,7 +1842,11 @@ generic_file_aio_write_nolock(struct kio page_cache_release(page); if (status < 0) break; - balance_dirty_pages_ratelimited(mapping); + status = balance_dirty_pages_ratelimited(mapping); + if (status < 0) { + pr_debug("async balance_dirty_pages\n"); + break; + } cond_resched(); } while (count); *ppos = pos; diff -puN mm/page-writeback.c~aio-05-fs_write mm/page-writeback.c --- 25/mm/page-writeback.c~aio-05-fs_write Tue Jun 3 11:05:03 2003 +++ 25-akpm/mm/page-writeback.c Tue Jun 3 11:05:03 2003 @@ -135,7 +135,7 @@ get_dirty_limits(struct page_state *ps, * If we're over `background_thresh' then pdflush is woken to perform some * writeout. */ -void balance_dirty_pages(struct address_space *mapping) +int balance_dirty_pages(struct address_space *mapping) { struct page_state ps; long nr_reclaimable; @@ -152,6 +152,7 @@ void balance_dirty_pages(struct address_ .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, + .nonblocking = !is_sync_wait(current->io_wait) }; get_dirty_limits(&ps, &background_thresh, &dirty_thresh); @@ -178,7 +179,11 @@ void balance_dirty_pages(struct address_ if (pages_written >= write_chunk) break; /* We've done our duty */ } - blk_congestion_wait(WRITE, HZ/10); + if (-EIOCBRETRY == blk_congestion_wait_wq(WRITE, HZ/10, + current->io_wait)) { + pr_debug("async blk congestion wait\n"); + return -EIOCBRETRY; + } } if (nr_reclaimable + ps.nr_writeback <= dirty_thresh) @@ -186,6 +191,8 @@ void balance_dirty_pages(struct address_ if (!writeback_in_progress(bdi) && nr_reclaimable > background_thresh) pdflush_operation(background_writeout, 0); + + return 0; } /** @@ -201,7 +208,7 @@ void balance_dirty_pages(struct address_ * decrease the ratelimiting by a lot, to prevent individual processes from * overshooting the limit by (ratelimit_pages) each. */ -void balance_dirty_pages_ratelimited(struct address_space *mapping) +int balance_dirty_pages_ratelimited(struct address_space *mapping) { static DEFINE_PER_CPU(int, ratelimits) = 0; int cpu; @@ -215,10 +222,10 @@ void balance_dirty_pages_ratelimited(str if (per_cpu(ratelimits, cpu)++ >= ratelimit) { per_cpu(ratelimits, cpu) = 0; put_cpu(); - balance_dirty_pages(mapping); - return; + return balance_dirty_pages(mapping); } put_cpu(); + return 0; } EXPORT_SYMBOL_GPL(balance_dirty_pages_ratelimited); _