aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>2013-02-11 15:40:41 +0900
committerDaniel Phillips <daniel@tux3.org>2013-02-11 15:40:41 +0900
commit46de4a7f9511547dfb8d4b1f6506cdae9a9d6d2b (patch)
treee458cf586ebabd519fd136cb39061640268519ad
parentcc7fe09830b7b4c51bf4a99211b9828db75e36a3 (diff)
downloadlinux-tux3-46de4a7f9511547dfb8d4b1f6506cdae9a9d6d2b.tar.gz
tux3: Fix race of free_forked_buffers()
If inode was evicted, it calls free_forked_buffers() to make sure all forked buffers related to inode was freed. So, free_forked_buffers() is called arbitrary timing of inode reclaimer (this is also true if we had multiple background deltas). So, we have to make sure there is no race with free_forked_buffers(), forked buffer, and I/O path. Now, we have race between free_forked_buffers() and bufvec_prepare_and_lock_page() free_forked_buffers() bufvec_prepare_and_lock_page() TestClearPageDirty() PageDirty() PageWriteback() /* thinks I/O was done */ TestSetPageWriteback() With above race, free_forked_buffers() thinks page is not under I/O path. But, is_freeable_forked() and buffer_busy() are assuming it is called only if I/O for page was already done. This fixes race by setting writeback flag before dirty. FIXME: we would want to use refcount to free forked page somehow. Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r--fs/tux3/buffer_fork.c25
-rw-r--r--fs/tux3/buffer_writeback.c24
-rw-r--r--fs/tux3/buffer_writebacklib.c12
3 files changed, 44 insertions, 17 deletions
diff --git a/fs/tux3/buffer_fork.c b/fs/tux3/buffer_fork.c
index 4d578337214ff2..9892ad77e77358 100644
--- a/fs/tux3/buffer_fork.c
+++ b/fs/tux3/buffer_fork.c
@@ -75,6 +75,10 @@ static void free_forked_page(struct page *page)
static inline int buffer_busy(struct buffer_head *buffer, int refcount)
{
+ /*
+ * Page didn't have dirty and writeback, so this buffer should
+ * already be flushed. Check if reader is still using this.
+ */
assert(!buffer_dirty(buffer));
assert(!buffer_async_write(buffer));
assert(!buffer_async_read(buffer));
@@ -144,7 +148,26 @@ void free_forked_buffers(struct sb *sb, struct inode *inode, int umount)
assert(!PageDirty(page));
#endif
assert(!force || (!PageDirty(page) && !PageWriteback(page)));
- /* I/O was submitted, and I/O was done? */
+
+ /*
+ * I/O was submitted and I/O was done?
+ *
+ * NOTE: order of checking flags is important.
+ *
+ * free_forked_buffers bufvec_prepare_and_lock_page
+ * PageWriteback()
+ * TestSetPageWriteback()
+ * TestClearPageDirty()
+ * PageDirty()
+ * [missed both flags]
+ *
+ * Above order has race. So, we have to check "dirty"
+ * at first, then check "writeback".
+ *
+ * FIXME: we would not want to depend on this fragile
+ * way, and would want to use refcount simply to free
+ * forked page.
+ */
if (!PageDirty(page) && !PageWriteback(page)) {
/* All users were gone or force=1? */
if (force || is_freeable_forked(buffer, page)) {
diff --git a/fs/tux3/buffer_writeback.c b/fs/tux3/buffer_writeback.c
index 3f3fa134c6fbd0..af0f64af8a76e0 100644
--- a/fs/tux3/buffer_writeback.c
+++ b/fs/tux3/buffer_writeback.c
@@ -195,13 +195,24 @@ bufvec_prepare_and_lock_page(struct bufvec *bufvec, struct page *page)
struct tux3_iattr_data *idata = bufvec->idata;
pgoff_t last_index;
unsigned offset;
- int old_flag;
+ int old_flag, old_writeback;
lock_page(page);
assert(PageDirty(page));
assert(!PageWriteback(page));
/*
+ * Set "writeback" flag before clearing "dirty" flag, so, page
+ * presents either of "dirty" or "writeback" flag. With this,
+ * free_forked_buffers() can check page flags without locking
+ * page. See FIXME of forked_buffers().
+ *
+ * And writeback flag prevents vmscan releases page.
+ */
+ old_writeback = TestSetPageWriteback(page);
+ assert(!old_writeback);
+
+ /*
* NOTE: This has the race if there is concurrent mark
* dirty. But we shouldn't use concurrent dirty [B] on volmap.
*
@@ -218,14 +229,13 @@ bufvec_prepare_and_lock_page(struct bufvec *bufvec, struct page *page)
}
/*
- * set_page_writeback() is assuming to be called after clearing dirty.
- * This is under lock_page, so, buffer fork will see the dirty
- * or writeback are presented.
+ * This fixes incoherency of page accounting and radix-tree
+ * tag by above change of dirty and writeback.
*
- * And writeback flag prevents vmscan releases page.
+ * NOTE: This is assuming to be called after clearing dirty
+ * (See comment of tux3_clear_page_dirty_for_io()).
*/
- old_flag = tux3_test_set_page_writeback(page);
- assert(!old_flag);
+ __tux3_test_set_page_writeback(page, old_writeback);
/*
* Zero fill the page for mmap outside i_size after clear dirty.
diff --git a/fs/tux3/buffer_writebacklib.c b/fs/tux3/buffer_writebacklib.c
index 7fba6c10120345..2274fa99e6f64c 100644
--- a/fs/tux3/buffer_writebacklib.c
+++ b/fs/tux3/buffer_writebacklib.c
@@ -76,18 +76,16 @@ static int tux3_clear_page_dirty_for_io(struct page *page)
return TestClearPageDirty(page);
}
-static int tux3_test_set_page_writeback(struct page *page)
+static void __tux3_test_set_page_writeback(struct page *page, int old_writeback)
{
struct address_space *mapping = page->mapping;
- int ret;
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
- ret = TestSetPageWriteback(page);
- if (!ret) {
+ if (!old_writeback) {
/* If PageForked(), don't touch tag */
if (!PageForked(page))
radix_tree_tag_set(&mapping->page_tree,
@@ -105,11 +103,7 @@ static int tux3_test_set_page_writeback(struct page *page)
page_index(page),
PAGECACHE_TAG_TOWRITE);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
- } else {
- ret = TestSetPageWriteback(page);
}
- if (!ret)
+ if (!old_writeback)
account_page_writeback(page);
- return ret;
-
}