diff options
author | Andrew Morton <akpm@osdl.org> | 2005-01-07 22:03:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-01-07 22:03:18 -0800 |
commit | 918798e7b49f275970304c17b8449de4b728ec38 (patch) | |
tree | 511fb330b9506f37dd1e74552fc4fc9f001e05fa /mm | |
parent | ba1f08f14b523e1722ee423eb729663e3fa5b192 (diff) | |
download | history-918798e7b49f275970304c17b8449de4b728ec38.tar.gz |
[PATCH] invalidate_inode_pages2() mmap coherency fix
- When invalidating pages, take care to shoot down any ptes which map them
as well.
This ensures that the next mmap access to the page will generate a major
fault, so NFS's server-side modifications are picked up.
This also allows us to call invalidate_complete_page() on all pages, so
filesytems such as ext3 get a chance to invalidate the buffer_heads.
- Don't mark in-pagetable pages as non-uptodate any more. That broke a
previous guarantee that mapped-into-user-process pages are always uptodate.
- Check the return value of invalidate_complete_page(). It can fail if
someone redirties a page after generic_file_direct_IO() write it back.
But we still have a problem. If invalidate_inode_pages2() calls
unmap_mapping_range(), that can cause zap_pte_range() to dirty the pagecache
pages. That will redirty the page's buffers and will cause
invalidate_complete_page() to fail.
So, in generic_file_direct_IO() we do a complete pte shootdown on the file
up-front, prior to writing back dirty pagecache. This is only done for
O_DIRECT writes. It _could_ be done for O_DIRECT reads too, providing full
mmap-vs-direct-IO coherency for both O_DIRECT reads and O_DIRECT writes, but
permitting the pte shootdown on O_DIRECT reads trivially allows people to nuke
other people's mapped pagecache.
NFS also uses invalidate_inode_pages2() for handling server-side modification
notifications. But in the NFS case the clear_page_dirty() in
invalidate_inode_pages2() is sufficient, because NFS doesn't have to worry
about the "dirty buffers against a clean page" problem. (I think)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 18 | ||||
-rw-r--r-- | mm/truncate.c | 67 |
2 files changed, 58 insertions, 27 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 57e39e6a6b3a58..4bfd5aafd90e0e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2247,7 +2247,8 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov, EXPORT_SYMBOL(generic_file_writev); /* - * Called under i_sem for writes to S_ISREG files + * Called under i_sem for writes to S_ISREG files. Returns -EIO if something + * went wrong during pagecache shootdown. */ ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, @@ -2257,12 +2258,23 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct address_space *mapping = file->f_mapping; ssize_t retval; + /* + * If it's a write, unmap all mmappings of the file up-front. This + * will cause any pte dirty bits to be propagated into the pageframes + * for the subsequent filemap_write_and_wait(). + */ + if (rw == WRITE && mapping_mapped(mapping)) + unmap_mapping_range(mapping, 0, -1, 0); + retval = filemap_write_and_wait(mapping); if (retval == 0) { retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs); - if (rw == WRITE && mapping->nrpages) - invalidate_inode_pages2(mapping); + if (rw == WRITE && mapping->nrpages) { + int err = invalidate_inode_pages2(mapping); + if (err) + retval = err; + } } return retval; } diff --git a/mm/truncate.c b/mm/truncate.c index b18ec4c41ae5d6..a1b4a442d90dce 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -65,6 +65,8 @@ truncate_complete_page(struct address_space *mapping, struct page *page) * be marked dirty at any time too. So we re-check the dirtiness inside * ->tree_lock. That provides exclusion against the __set_page_dirty * functions. + * + * Returns non-zero if the page was successfully invalidated. */ static int invalidate_complete_page(struct address_space *mapping, struct page *page) @@ -240,50 +242,67 @@ unsigned long invalidate_inode_pages(struct address_space *mapping) EXPORT_SYMBOL(invalidate_inode_pages); /** - * invalidate_inode_pages2 - remove all unmapped pages from an address_space + * invalidate_inode_pages2 - remove all pages from an address_space * @mapping - the address_space * - * invalidate_inode_pages2() is like truncate_inode_pages(), except for the case - * where the page is seen to be mapped into process pagetables. In that case, - * the page is marked clean but is left attached to its address_space. - * - * The page is also marked not uptodate so that a subsequent pagefault will - * perform I/O to bringthe page's contents back into sync with its backing - * store. + * Any pages which are found to be mapped into pagetables are unmapped prior to + * invalidation. * - * FIXME: invalidate_inode_pages2() is probably trivially livelockable. + * Returns -EIO if any pages could not be invalidated. */ -void invalidate_inode_pages2(struct address_space *mapping) +int invalidate_inode_pages2(struct address_space *mapping) { struct pagevec pvec; pgoff_t next = 0; int i; + int ret = 0; + int did_full_unmap = 0; pagevec_init(&pvec, 0); - while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { - for (i = 0; i < pagevec_count(&pvec); i++) { + while (!ret && pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + for (i = 0; !ret && i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; + int was_dirty; lock_page(page); - if (page->mapping == mapping) { /* truncate race? */ - wait_on_page_writeback(page); - next = page->index + 1; - if (page_mapped(page)) { - clear_page_dirty(page); - ClearPageUptodate(page); + if (page->mapping != mapping) { /* truncate race? */ + unlock_page(page); + continue; + } + wait_on_page_writeback(page); + next = page->index + 1; + while (page_mapped(page)) { + if (!did_full_unmap) { + /* + * Zap the rest of the file in one hit. + * FIXME: invalidate_inode_pages2() + * should take start/end offsets. + */ + unmap_mapping_range(mapping, + page->index << PAGE_CACHE_SHIFT, + -1, 0); + did_full_unmap = 1; } else { - if (!invalidate_complete_page(mapping, - page)) { - clear_page_dirty(page); - ClearPageUptodate(page); - } + /* + * Just zap this page + */ + unmap_mapping_range(mapping, + page->index << PAGE_CACHE_SHIFT, + (page->index << PAGE_CACHE_SHIFT)+1, + 0); } } + was_dirty = test_clear_page_dirty(page); + if (!invalidate_complete_page(mapping, page)) { + if (was_dirty) + set_page_dirty(page); + ret = -EIO; + } unlock_page(page); } pagevec_release(&pvec); cond_resched(); } + return ret; } - EXPORT_SYMBOL_GPL(invalidate_inode_pages2); |