From: Nick Piggin do_generic_mapping_read() { isize1 = i_size_read(); ... readpage copy_to_user up to isize1; } readpage() { isize2 = i_size_read(); ... read blocks ... zero-fill all blocks past isize2 } If a second thread runs truncate and shrinks i_size, so isize1 and isize2 are different, the read can return up to a page of zero-fill that shouldn't really exist. The trick is to read isize1 after doing the readpage. I realised this is the right way to do it without having to change the readpage API. The patch should not cost any cycles when reading from pagecache. Signed-off-by: Andrew Morton --- 25-akpm/mm/filemap.c | 76 +++++++++++++++++++++++++++++++++------------------ 1 files changed, 50 insertions(+), 26 deletions(-) diff -puN mm/filemap.c~read-vs-truncate-race mm/filemap.c --- 25/mm/filemap.c~read-vs-truncate-race Tue May 25 16:03:56 2004 +++ 25-akpm/mm/filemap.c Tue May 25 16:04:25 2004 @@ -650,7 +650,8 @@ void do_generic_mapping_read(struct addr read_actor_t actor) { struct inode *inode = mapping->host; - unsigned long index, offset; + unsigned long index, end_index, offset; + loff_t isize; struct page *cached_page; int error; struct file_ra_state ra = *_ra; @@ -659,26 +660,18 @@ void do_generic_mapping_read(struct addr index = *ppos >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; + isize = i_size_read(inode); + end_index = isize >> PAGE_CACHE_SHIFT; + if (index > end_index) + goto out; + for (;;) { struct page *page; - unsigned long end_index, nr, ret; - loff_t isize = i_size_read(inode); - - end_index = isize >> PAGE_CACHE_SHIFT; - - if (index > end_index) - break; - nr = PAGE_CACHE_SIZE; - if (index == end_index) { - nr = isize & ~PAGE_CACHE_MASK; - if (nr <= offset) - break; - } + unsigned long nr, ret; cond_resched(); page_cache_readahead(mapping, &ra, filp, index); - nr = nr - offset; find_page: page = find_get_page(mapping, index); if (unlikely(page == NULL)) { @@ -688,6 +681,17 @@ find_page: if (!PageUptodate(page)) goto page_not_up_to_date; page_ok: + /* nr is the maximum number of bytes to copy from this page */ + nr = PAGE_CACHE_SIZE; + if (index == end_index) { + nr = isize & ~PAGE_CACHE_MASK; + if (nr <= offset) { + page_cache_release(page); + goto out; + } + } + nr = nr - offset; + /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. @@ -719,7 +723,7 @@ page_ok: page_cache_release(page); if (ret == nr && desc->count) continue; - break; + goto out; page_not_up_to_date: /* Get exclusive access to the page ... */ @@ -739,22 +743,41 @@ page_not_up_to_date: } readpage: - /* ... and start the actual read. The read will unlock the page. */ + /* Start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); - if (!error) { - if (PageUptodate(page)) - goto page_ok; + if (unlikely(error)) + goto readpage_error; + + if (!PageUptodate(page)) { wait_on_page_locked(page); - if (PageUptodate(page)) - goto page_ok; - error = -EIO; + if (!PageUptodate(page)) { + error = -EIO; + goto readpage_error; + } + } + + /* + * i_size must be checked after we have done ->readpage. + * + * Checking i_size after the readpage allows us to calculate + * the correct value for "nr", which means the zero-filled + * part of the page is not copied back to userspace (unless + * another truncate extends the file - this is desired though). + */ + isize = i_size_read(inode); + end_index = isize >> PAGE_CACHE_SHIFT; + if (index > end_index) { + page_cache_release(page); + goto out; } + goto page_ok; +readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ desc->error = error; page_cache_release(page); - break; + goto out; no_cached_page: /* @@ -765,7 +788,7 @@ no_cached_page: cached_page = page_cache_alloc_cold(mapping); if (!cached_page) { desc->error = -ENOMEM; - break; + goto out; } } error = add_to_page_cache_lru(cached_page, mapping, @@ -774,13 +797,14 @@ no_cached_page: if (error == -EEXIST) goto find_page; desc->error = error; - break; + goto out; } page = cached_page; cached_page = NULL; goto readpage; } +out: *_ra = ra; *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; _