aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2005-01-07 22:03:18 -0800
committerLinus Torvalds <torvalds@evo.osdl.org>2005-01-07 22:03:18 -0800
commit918798e7b49f275970304c17b8449de4b728ec38 (patch)
tree511fb330b9506f37dd1e74552fc4fc9f001e05fa /mm
parentba1f08f14b523e1722ee423eb729663e3fa5b192 (diff)
downloadhistory-918798e7b49f275970304c17b8449de4b728ec38.tar.gz
[PATCH] invalidate_inode_pages2() mmap coherency fix
- When invalidating pages, take care to shoot down any ptes which map them as well. This ensures that the next mmap access to the page will generate a major fault, so NFS's server-side modifications are picked up. This also allows us to call invalidate_complete_page() on all pages, so filesytems such as ext3 get a chance to invalidate the buffer_heads. - Don't mark in-pagetable pages as non-uptodate any more. That broke a previous guarantee that mapped-into-user-process pages are always uptodate. - Check the return value of invalidate_complete_page(). It can fail if someone redirties a page after generic_file_direct_IO() write it back. But we still have a problem. If invalidate_inode_pages2() calls unmap_mapping_range(), that can cause zap_pte_range() to dirty the pagecache pages. That will redirty the page's buffers and will cause invalidate_complete_page() to fail. So, in generic_file_direct_IO() we do a complete pte shootdown on the file up-front, prior to writing back dirty pagecache. This is only done for O_DIRECT writes. It _could_ be done for O_DIRECT reads too, providing full mmap-vs-direct-IO coherency for both O_DIRECT reads and O_DIRECT writes, but permitting the pte shootdown on O_DIRECT reads trivially allows people to nuke other people's mapped pagecache. NFS also uses invalidate_inode_pages2() for handling server-side modification notifications. But in the NFS case the clear_page_dirty() in invalidate_inode_pages2() is sufficient, because NFS doesn't have to worry about the "dirty buffers against a clean page" problem. (I think) Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c18
-rw-r--r--mm/truncate.c67
2 files changed, 58 insertions, 27 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 57e39e6a6b3a58..4bfd5aafd90e0e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2247,7 +2247,8 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
EXPORT_SYMBOL(generic_file_writev);
/*
- * Called under i_sem for writes to S_ISREG files
+ * Called under i_sem for writes to S_ISREG files. Returns -EIO if something
+ * went wrong during pagecache shootdown.
*/
ssize_t
generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
@@ -2257,12 +2258,23 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct address_space *mapping = file->f_mapping;
ssize_t retval;
+ /*
+ * If it's a write, unmap all mmappings of the file up-front. This
+ * will cause any pte dirty bits to be propagated into the pageframes
+ * for the subsequent filemap_write_and_wait().
+ */
+ if (rw == WRITE && mapping_mapped(mapping))
+ unmap_mapping_range(mapping, 0, -1, 0);
+
retval = filemap_write_and_wait(mapping);
if (retval == 0) {
retval = mapping->a_ops->direct_IO(rw, iocb, iov,
offset, nr_segs);
- if (rw == WRITE && mapping->nrpages)
- invalidate_inode_pages2(mapping);
+ if (rw == WRITE && mapping->nrpages) {
+ int err = invalidate_inode_pages2(mapping);
+ if (err)
+ retval = err;
+ }
}
return retval;
}
diff --git a/mm/truncate.c b/mm/truncate.c
index b18ec4c41ae5d6..a1b4a442d90dce 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -65,6 +65,8 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
* be marked dirty at any time too. So we re-check the dirtiness inside
* ->tree_lock. That provides exclusion against the __set_page_dirty
* functions.
+ *
+ * Returns non-zero if the page was successfully invalidated.
*/
static int
invalidate_complete_page(struct address_space *mapping, struct page *page)
@@ -240,50 +242,67 @@ unsigned long invalidate_inode_pages(struct address_space *mapping)
EXPORT_SYMBOL(invalidate_inode_pages);
/**
- * invalidate_inode_pages2 - remove all unmapped pages from an address_space
+ * invalidate_inode_pages2 - remove all pages from an address_space
* @mapping - the address_space
*
- * invalidate_inode_pages2() is like truncate_inode_pages(), except for the case
- * where the page is seen to be mapped into process pagetables. In that case,
- * the page is marked clean but is left attached to its address_space.
- *
- * The page is also marked not uptodate so that a subsequent pagefault will
- * perform I/O to bringthe page's contents back into sync with its backing
- * store.
+ * Any pages which are found to be mapped into pagetables are unmapped prior to
+ * invalidation.
*
- * FIXME: invalidate_inode_pages2() is probably trivially livelockable.
+ * Returns -EIO if any pages could not be invalidated.
*/
-void invalidate_inode_pages2(struct address_space *mapping)
+int invalidate_inode_pages2(struct address_space *mapping)
{
struct pagevec pvec;
pgoff_t next = 0;
int i;
+ int ret = 0;
+ int did_full_unmap = 0;
pagevec_init(&pvec, 0);
- while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
+ while (!ret && pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+ for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
+ int was_dirty;
lock_page(page);
- if (page->mapping == mapping) { /* truncate race? */
- wait_on_page_writeback(page);
- next = page->index + 1;
- if (page_mapped(page)) {
- clear_page_dirty(page);
- ClearPageUptodate(page);
+ if (page->mapping != mapping) { /* truncate race? */
+ unlock_page(page);
+ continue;
+ }
+ wait_on_page_writeback(page);
+ next = page->index + 1;
+ while (page_mapped(page)) {
+ if (!did_full_unmap) {
+ /*
+ * Zap the rest of the file in one hit.
+ * FIXME: invalidate_inode_pages2()
+ * should take start/end offsets.
+ */
+ unmap_mapping_range(mapping,
+ page->index << PAGE_CACHE_SHIFT,
+ -1, 0);
+ did_full_unmap = 1;
} else {
- if (!invalidate_complete_page(mapping,
- page)) {
- clear_page_dirty(page);
- ClearPageUptodate(page);
- }
+ /*
+ * Just zap this page
+ */
+ unmap_mapping_range(mapping,
+ page->index << PAGE_CACHE_SHIFT,
+ (page->index << PAGE_CACHE_SHIFT)+1,
+ 0);
}
}
+ was_dirty = test_clear_page_dirty(page);
+ if (!invalidate_complete_page(mapping, page)) {
+ if (was_dirty)
+ set_page_dirty(page);
+ ret = -EIO;
+ }
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
}
+ return ret;
}
-
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);