From: Ram Pai Here is a consolidated readahead patch that takes care of the performance regression seen with multiple threaded writes to the same file descriptor. The patch does the following: 1. Instead of calculating the average count of sequential access in the read patterns, it calculates the average amount of hits in the current window. 2. This average is used to guide the size of the next current window. 3. Since the field serial_cnt in the ra structure does not make sense with the introduction of the new logic, I have renamed that field as currnt_wnd_hit. This patch will help the read patterns that are not neccessarily sequential but have sufficient locality. However it may regress random workload. Results: 1. Berkley Shands has reported great performance with this patch. 2. iozone showed negligible effect on various read patterns. 3. DSS workload saw neglible change. 4. Sysbench saw a small improvement. Signed-off-by: Andrew Morton --- 25-akpm/include/linux/fs.h | 4 ++-- 25-akpm/mm/readahead.c | 37 ++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff -puN include/linux/fs.h~consolidated-readahead-fixes include/linux/fs.h --- 25/include/linux/fs.h~consolidated-readahead-fixes 2004-08-16 13:48:45.134615088 -0700 +++ 25-akpm/include/linux/fs.h 2004-08-16 13:48:45.141614024 -0700 @@ -556,8 +556,8 @@ struct file_ra_state { unsigned long prev_page; /* Cache last read() position */ unsigned long ahead_start; /* Ahead window */ unsigned long ahead_size; - unsigned long serial_cnt; /* measure of sequentiality */ - unsigned long average; /* another measure of sequentiality */ + unsigned long currnt_wnd_hit; /* locality in the current window */ + unsigned long average; /* size of next current window */ unsigned long ra_pages; /* Maximum readahead window */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ diff -puN mm/readahead.c~consolidated-readahead-fixes mm/readahead.c --- 25/mm/readahead.c~consolidated-readahead-fixes 2004-08-16 13:48:45.136614784 -0700 +++ 25-akpm/mm/readahead.c 2004-08-16 13:48:45.143613720 -0700 @@ -384,25 +384,10 @@ page_cache_readahead(struct address_spac first_access=1; ra->next_size = max / 2; ra->prev_page = offset; - ra->serial_cnt++; + ra->currnt_wnd_hit++; goto do_io; } - if (offset == ra->prev_page + 1) { - if (ra->serial_cnt <= (max * 2)) - ra->serial_cnt++; - } else { - /* - * to avoid rounding errors, ensure that 'average' - * tends towards the value of ra->serial_cnt. - */ - average = ra->average; - if (average < ra->serial_cnt) { - average++; - } - ra->average = (average + ra->serial_cnt) / 2; - ra->serial_cnt = 1; - } ra->prev_page = offset; if (offset >= ra->start && offset <= (ra->start + ra->size)) { @@ -411,12 +396,22 @@ page_cache_readahead(struct address_spac * page beyond the end. Expand the next readahead size. */ ra->next_size += 2; + + if (ra->currnt_wnd_hit <= (max * 2)) + ra->currnt_wnd_hit++; } else { /* * A miss - lseek, pagefault, pread, etc. Shrink the readahead * window. */ ra->next_size -= 2; + + average = ra->average; + if (average < ra->currnt_wnd_hit) { + average++; + } + ra->average = (average + ra->currnt_wnd_hit) / 2; + ra->currnt_wnd_hit = 1; } if ((long)ra->next_size > (long)max) @@ -468,7 +463,11 @@ do_io: * pages shall be accessed in the next * current window. */ - ra->next_size = min(ra->average , (unsigned long)max); + average = ra->average; + if (ra->currnt_wnd_hit > average) + average = (ra->currnt_wnd_hit + ra->average + 1) / 2; + + ra->next_size = min(average , (unsigned long)max); } ra->start = offset; ra->size = ra->next_size; @@ -501,8 +500,8 @@ do_io: * random. Hence don't bother to readahead. */ average = ra->average; - if (ra->serial_cnt > average) - average = (ra->serial_cnt + ra->average + 1) / 2; + if (ra->currnt_wnd_hit > average) + average = (ra->currnt_wnd_hit + ra->average + 1) / 2; if (average > max) { ra->ahead_start = ra->start + ra->size; _