From: Suparna Bhattacharya The open coded readahead logic which was added in aio_pread is best avoided if possible. Duplicating similar checks across sync and aio paths (e.g. checking for O_DIRECT) and the divergence of logic between these paths isn't good from a long term maintainability standpoint. Secondly, this logic really belongs in the generic fops methods for aio_read rather than in the high level aio handlers; it should be possible for a filesystem to override the logic with its own if suitable. So, this patch moves the readahead out of aio_pread, and instead modifies do_generic_mapping_read to readahead _all_ the pages in the range requested upfront before it starts waiting for any of the pages to become uptodate. This leads to sane readahead behaviour for the kind of i/o patterns generated by streaming aio reads. It also takes care not to repeatedly issue readaheads for subsequent AIO retries for the same request. fs/aio.c | 19 +------------------ include/linux/aio.h | 3 +++ mm/filemap.c | 30 +++++++++++++++++++++++------- 3 files changed, 27 insertions(+), 25 deletions(-) diff -puN fs/aio.c~aio-readahead-rework fs/aio.c --- 25/fs/aio.c~aio-readahead-rework 2003-08-02 12:48:05.000000000 -0700 +++ 25-akpm/fs/aio.c 2003-08-02 12:48:05.000000000 -0700 @@ -1362,25 +1362,8 @@ ssize_t aio_setup_iocb(struct kiocb *kio kiocb->ki_left))) break; ret = -EINVAL; - if (file->f_op->aio_read) { - /* - * Do not do readahead for DIRECT i/o - */ - if (!(file->f_flags & O_DIRECT)) { - struct address_space *mapping; - unsigned long index; - unsigned long end; - - mapping = file->f_dentry->d_inode->i_mapping; - index = kiocb->ki_pos >> PAGE_CACHE_SHIFT; - end = (kiocb->ki_pos + kiocb->ki_left) >> - PAGE_CACHE_SHIFT; - for (; index < end; index++) - page_cache_readahead(mapping, - &file->f_ra, file, index); - } + if (file->f_op->aio_read) kiocb->ki_retry = aio_pread; - } break; case IOCB_CMD_PWRITE: ret = -EBADF; diff -puN include/linux/aio.h~aio-readahead-rework include/linux/aio.h --- 25/include/linux/aio.h~aio-readahead-rework 2003-08-02 12:48:05.000000000 -0700 +++ 25-akpm/include/linux/aio.h 2003-08-02 12:48:05.000000000 -0700 @@ -179,6 +179,9 @@ int FASTCALL(io_submit_one(struct kioctx dump_stack(); \ } +#define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) +#define is_retried_kiocb(iocb) ((iocb)->ki_retried > 1) + #include static inline struct kiocb *list_kiocb(struct list_head *h) diff -puN mm/filemap.c~aio-readahead-rework mm/filemap.c --- 25/mm/filemap.c~aio-readahead-rework 2003-08-02 12:48:05.000000000 -0700 +++ 25-akpm/mm/filemap.c 2003-08-02 12:48:05.000000000 -0700 @@ -608,21 +608,39 @@ void do_generic_mapping_read(struct addr read_actor_t actor) { struct inode *inode = mapping->host; - unsigned long index, offset; + unsigned long index, offset, last, end_index; struct page *cached_page; + loff_t isize = i_size_read(inode); int error; cached_page = NULL; index = *ppos >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; + last = (*ppos + desc->count) >> PAGE_CACHE_SHIFT; + end_index = isize >> PAGE_CACHE_SHIFT; + if (last > end_index) + last = end_index; + + /* Don't repeat the readahead if we are executing aio retries */ + if (in_aio()) { + if (is_retried_kiocb(io_wait_to_kiocb(current->io_wait))) + goto done_readahead; + } + + /* + * Let the readahead logic know upfront about all + * the pages we'll need to satisfy this request + */ + for (; index < last; index++) + page_cache_readahead(mapping, ra, filp, index); + index = *ppos >> PAGE_CACHE_SHIFT; + +done_readahead: for (;;) { struct page *page; - unsigned long end_index, nr, ret; - loff_t isize = i_size_read(inode); + unsigned long nr, ret; - end_index = isize >> PAGE_CACHE_SHIFT; - if (index > end_index) break; nr = PAGE_CACHE_SIZE; @@ -633,8 +651,6 @@ void do_generic_mapping_read(struct addr } cond_resched(); - if (is_sync_wait(current->io_wait)) - page_cache_readahead(mapping, ra, filp, index); nr = nr - offset; find_page: _