aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>2014-04-27 15:36:11 +0900
committerDaniel Phillips <daniel@tux3.org>2014-04-27 15:36:11 +0900
commita5bb7eae2af5ef12fa5ae63db78341ff195e345b (patch)
tree4e8a538c19eafb08f7c8761b061ed668d156a616
parent0ffba0cbd35ccb13e1961e006eb6190a44f2c5d6 (diff)
downloadlinux-tux3-a5bb7eae2af5ef12fa5ae63db78341ff195e345b.tar.gz
tux3: Support mmap write: Fix race of mmap write with write(2) on delta boundary
Clear writable to protect oldpage from following mmap write race. cpu0 cpu1 cpu2 [mmap write] mmap write(old) page fault [backend] dirty old delta++ [page_fault] page fork [*A] mmap write(old) no page fault copy_page(new, old) modify page replace_pte(new, old) flusher page_mkclean(old) There is delay between delta++ and page_mkclean() for I/O. So, while cpu0 copying data on page by pagefork, another cpu (cpu2) can change data on the same page. If this race happens, new and old page can have different data. To fix this race, we should make PTE read-only before start pagefork at [*A] place. Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r--fs/tux3/buffer_fork.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/fs/tux3/buffer_fork.c b/fs/tux3/buffer_fork.c
index fd3508e0f1b1d4..c1530c2e95def7 100644
--- a/fs/tux3/buffer_fork.c
+++ b/fs/tux3/buffer_fork.c
@@ -198,6 +198,42 @@ void free_forked_buffers(struct sb *sb, struct inode *inode, int force)
#include "mmap_builtin_hack.h"
/*
+ * Clear writable to protect oldpage from following mmap write race.
+ *
+ * cpu0 cpu1 cpu2
+ * [mmap write]
+ * mmap write(old)
+ * page fault
+ * [backend] dirty old
+ * delta++
+ * [page_fault]
+ * page fork
+ * mmap write(old)
+ * no page fault
+ * copy_page(new, old) modify page
+ * replace_pte(new, old)
+ * flusher
+ * page_mkclean(old)
+ *
+ * There is delay between delta++ and page_mkclean() for I/O. So,
+ * while cpu0 copying data on page by page fork, another cpu (cpu2)
+ * can change data on the same page. If this race happens, new and old
+ * page can have different data.
+ */
+static void prepare_clone_page(struct page *page)
+{
+ assert(PageLocked(page));
+
+ /*
+ * If backend flusher is still not clearing the dirty flag and
+ * (not call page_mkclean()) for I/O. Call it here to prevent
+ * above race, instead.
+ */
+ if (PageDirty(page))
+ page_mkclean(page);
+}
+
+/*
* This replaces the oldpage on radix-tree with newpage atomically.
*
* Similar to migrate_pages(), but the oldpage is for writeout.
@@ -601,6 +637,9 @@ struct page *pagefork_for_blockdirty(struct page *oldpage, unsigned newdelta)
goto out;
}
+ /* Clear writable to protect oldpage from mmap write race */
+ prepare_clone_page(oldpage);
+
/*
* We need to buffer fork. Start to clone the oldpage.
*/