From: Ram Pai Signed-off-by: Andrew Morton --- 25-akpm/mm/readahead.c | 124 ++++++++++++++++++++++++++++++++++++------------- 1 files changed, 92 insertions(+), 32 deletions(-) diff -puN mm/readahead.c~readahead-congestion-control mm/readahead.c --- 25/mm/readahead.c~readahead-congestion-control 2004-12-04 00:07:22.283187816 -0800 +++ 25-akpm/mm/readahead.c 2004-12-04 00:07:22.288187056 -0800 @@ -249,6 +249,9 @@ out: * We really don't want to intermingle reads and writes like that. * * Returns the number of pages requested, or the maximum amount of I/O allowed. + * + * do_page_cache_readahead() returns -1 is it encountered request queue + * congestion. */ static inline int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, @@ -359,13 +362,45 @@ int check_ra_success(struct file_ra_stat } /* - * Issue the I/O. If pages already in cache, increment the hit count until - * we exceed max, then turn RA off until we start missing again. + * This version skips the IO if the queue is read-congested, and will tell the + * block layer to abandon the readahead if request allocation would block. + * + * force_page_cache_readahead() will ignore queue congestion and will block on + * request queues. */ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, unsigned long offset, unsigned long nr_to_read) { - return __do_page_cache_readahead(mapping, filp, offset, nr_to_read); + if (bdi_read_congested(mapping->backing_dev_info)) + return -1; + + return __do_page_cache_readahead(mapping, filp, offset, nr_to_read); +} + +/* + * Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block' + * is set wait till the read completes. Otherwise attempt to read without + * blocking. + * Returns 1 meaning 'success' if read is succesfull without switching off + * readhaead mode. Otherwise return failure. + */ +static int +blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, + unsigned long offset, unsigned long nr_to_read, + struct file_ra_state *ra, int block) +{ + int actual; + + if (block) { + actual = __do_page_cache_readahead(mapping, filp, + offset, nr_to_read); + } else { + actual = do_page_cache_readahead(mapping, filp, + offset, nr_to_read); + if (actual == -1) + return 0; + } + return check_ra_success(ra, nr_to_read, actual); } /* @@ -379,7 +414,7 @@ page_cache_readahead(struct address_spac { unsigned long max, min; unsigned long newsize = req_size; - unsigned long actual=0; + unsigned long block; /* * Here we detect the case where the application is performing @@ -392,7 +427,6 @@ page_cache_readahead(struct address_spac max = get_max_readahead(ra); min = get_min_readahead(ra); -// maxsane = max_sane_readahead(max); newsize = min(req_size, max); if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) { @@ -407,12 +441,11 @@ page_cache_readahead(struct address_spac if ((ra->size == 0 && offset == 0) /* first io and start of file */ || (ra->size == -1 && ra->prev_page == offset - 1)) { /* First sequential */ - ra->prev_page = offset + newsize-1; + ra->prev_page = offset + newsize - 1; ra->size = get_init_ra_size(newsize, max); ra->start = offset; - actual = do_page_cache_readahead(mapping, filp, offset, - ra->size); - if (!check_ra_success(ra, ra->size, actual)) + if (!blockable_page_cache_readahead(mapping, filp, offset, + ra->size, ra, 1)) goto out; /* @@ -427,9 +460,8 @@ page_cache_readahead(struct address_spac ra->ahead_size = get_next_ra_size(ra->size, max, min, &ra->flags); ra->ahead_start = ra->start + ra->size; - actual = do_page_cache_readahead(mapping, filp, - ra->ahead_start, ra->ahead_size); - check_ra_success(ra, ra->ahead_size, actual); + blockable_page_cache_readahead(mapping, filp, + ra->ahead_start, ra->ahead_size, ra, 1); } goto out; } @@ -441,10 +473,9 @@ page_cache_readahead(struct address_spac */ if ((offset != (ra->prev_page+1) || (ra->size == 0))) { ra_off(ra); - ra->prev_page = offset + newsize-1; - actual = do_page_cache_readahead(mapping, filp, offset, - newsize); - check_ra_success(ra, newsize, actual); + ra->prev_page = offset + newsize - 1; + blockable_page_cache_readahead(mapping, filp, offset, + newsize, ra, 1); goto out; } @@ -454,33 +485,62 @@ page_cache_readahead(struct address_spac */ if (ra->ahead_start == 0) { /* no ahead window yet */ - ra->ahead_size = get_next_ra_size(max(newsize,ra->size), - max, min, &ra->flags); + ra->ahead_size = get_next_ra_size(ra->size, max, min, + &ra->flags); ra->ahead_start = ra->start + ra->size; - newsize = min (newsize, ra->ahead_start - offset); - actual = do_page_cache_readahead(mapping, filp, - ra->ahead_start, ra->ahead_size); - if (!check_ra_success(ra, ra->ahead_size, actual)) + block = ((offset + newsize -1) >= ra->ahead_start); + if (!blockable_page_cache_readahead(mapping, filp, + ra->ahead_start, ra->ahead_size, ra, block)) { + /* A read failure in blocking mode, implies pages are + * all cached. So we can safely assume we have taken + * care of all the pages requested in this call. A read + * failure in non-blocking mode, implies we are reading + * more pages than requested in this call. So we safely + * assume we have taken care of all the pages requested + * in this call. + * + * Just reset the ahead window in case we failed due to + * congestion. The ahead window will any way be closed + * in case we failed due to exessive page cache hits. + */ + ra->ahead_start = 0; + ra->ahead_size = 0; goto out; + } } /* - * Already have an ahead window, check if we crossed into it if so, - * shift windows and issue a new ahead window. - * Only return the #pages that are in the current window, so that we - * get called back on the first page of the ahead window which will - * allow us to submit more IO. + * Already have an ahead window, check if we crossed into it. + * If so, shift windows and issue a new ahead window. + * Only return the #pages that are in the current window, so that + * we get called back on the first page of the ahead window which + * will allow us to submit more IO. */ - if ((offset + newsize -1) >= ra->ahead_start) { + if ((offset + newsize - 1) >= ra->ahead_start) { ra->start = ra->ahead_start; ra->size = ra->ahead_size; ra->ahead_start = ra->ahead_start + ra->ahead_size; ra->ahead_size = get_next_ra_size(ra->ahead_size, max, min, &ra->flags); - newsize = min (newsize, ra->ahead_start - offset); - actual = do_page_cache_readahead(mapping, filp, - ra->ahead_start, ra->ahead_size); - check_ra_success(ra, ra->ahead_size, actual); + block = ((offset + newsize - 1) >= ra->ahead_start); + if (!blockable_page_cache_readahead(mapping, filp, + ra->ahead_start, ra->ahead_size, ra, block)) { + /* A read failure in blocking mode, implies pages are + * all cached. So we can safely assume we have taken + * care of all the pages requested in this call. + * A read failure in non-blocking mode, implies we are + * reading more pages than requested in this call. So + * we safely assume we have taken care of all the pages + * requested in this call. + * + * Just reset the ahead window in case we failed due to + * congestion. The ahead window will any way be closed + * in case we failed due to excessive page cache hits. + */ + ra->ahead_start = 0; + ra->ahead_size = 0; + } } + out: ra->prev_page = offset + newsize - 1; return(newsize); _