diff options
author | OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> | 2013-02-11 15:40:41 +0900 |
---|---|---|
committer | Daniel Phillips <daniel@tux3.org> | 2013-02-11 15:40:41 +0900 |
commit | 46de4a7f9511547dfb8d4b1f6506cdae9a9d6d2b (patch) | |
tree | e458cf586ebabd519fd136cb39061640268519ad | |
parent | cc7fe09830b7b4c51bf4a99211b9828db75e36a3 (diff) | |
download | linux-tux3-46de4a7f9511547dfb8d4b1f6506cdae9a9d6d2b.tar.gz |
tux3: Fix race of free_forked_buffers()
If inode was evicted, it calls free_forked_buffers() to make sure all
forked buffers related to inode was freed. So, free_forked_buffers()
is called arbitrary timing of inode reclaimer (this is also true if we
had multiple background deltas).
So, we have to make sure there is no race with free_forked_buffers(),
forked buffer, and I/O path.
Now, we have race between free_forked_buffers() and
bufvec_prepare_and_lock_page()
free_forked_buffers() bufvec_prepare_and_lock_page()
TestClearPageDirty()
PageDirty()
PageWriteback()
/* thinks I/O was done */
TestSetPageWriteback()
With above race, free_forked_buffers() thinks page is not under I/O
path. But, is_freeable_forked() and buffer_busy() are assuming it is
called only if I/O for page was already done.
This fixes race by setting writeback flag before dirty.
FIXME: we would want to use refcount to free forked page somehow.
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r-- | fs/tux3/buffer_fork.c | 25 | ||||
-rw-r--r-- | fs/tux3/buffer_writeback.c | 24 | ||||
-rw-r--r-- | fs/tux3/buffer_writebacklib.c | 12 |
3 files changed, 44 insertions, 17 deletions
diff --git a/fs/tux3/buffer_fork.c b/fs/tux3/buffer_fork.c index 4d578337214ff2..9892ad77e77358 100644 --- a/fs/tux3/buffer_fork.c +++ b/fs/tux3/buffer_fork.c @@ -75,6 +75,10 @@ static void free_forked_page(struct page *page) static inline int buffer_busy(struct buffer_head *buffer, int refcount) { + /* + * Page didn't have dirty and writeback, so this buffer should + * already be flushed. Check if reader is still using this. + */ assert(!buffer_dirty(buffer)); assert(!buffer_async_write(buffer)); assert(!buffer_async_read(buffer)); @@ -144,7 +148,26 @@ void free_forked_buffers(struct sb *sb, struct inode *inode, int umount) assert(!PageDirty(page)); #endif assert(!force || (!PageDirty(page) && !PageWriteback(page))); - /* I/O was submitted, and I/O was done? */ + + /* + * I/O was submitted and I/O was done? + * + * NOTE: order of checking flags is important. + * + * free_forked_buffers bufvec_prepare_and_lock_page + * PageWriteback() + * TestSetPageWriteback() + * TestClearPageDirty() + * PageDirty() + * [missed both flags] + * + * Above order has race. So, we have to check "dirty" + * at first, then check "writeback". + * + * FIXME: we would not want to depend on this fragile + * way, and would want to use refcount simply to free + * forked page. + */ if (!PageDirty(page) && !PageWriteback(page)) { /* All users were gone or force=1? */ if (force || is_freeable_forked(buffer, page)) { diff --git a/fs/tux3/buffer_writeback.c b/fs/tux3/buffer_writeback.c index 3f3fa134c6fbd0..af0f64af8a76e0 100644 --- a/fs/tux3/buffer_writeback.c +++ b/fs/tux3/buffer_writeback.c @@ -195,13 +195,24 @@ bufvec_prepare_and_lock_page(struct bufvec *bufvec, struct page *page) struct tux3_iattr_data *idata = bufvec->idata; pgoff_t last_index; unsigned offset; - int old_flag; + int old_flag, old_writeback; lock_page(page); assert(PageDirty(page)); assert(!PageWriteback(page)); /* + * Set "writeback" flag before clearing "dirty" flag, so, page + * presents either of "dirty" or "writeback" flag. With this, + * free_forked_buffers() can check page flags without locking + * page. See FIXME of forked_buffers(). + * + * And writeback flag prevents vmscan releases page. + */ + old_writeback = TestSetPageWriteback(page); + assert(!old_writeback); + + /* * NOTE: This has the race if there is concurrent mark * dirty. But we shouldn't use concurrent dirty [B] on volmap. * @@ -218,14 +229,13 @@ bufvec_prepare_and_lock_page(struct bufvec *bufvec, struct page *page) } /* - * set_page_writeback() is assuming to be called after clearing dirty. - * This is under lock_page, so, buffer fork will see the dirty - * or writeback are presented. + * This fixes incoherency of page accounting and radix-tree + * tag by above change of dirty and writeback. * - * And writeback flag prevents vmscan releases page. + * NOTE: This is assuming to be called after clearing dirty + * (See comment of tux3_clear_page_dirty_for_io()). */ - old_flag = tux3_test_set_page_writeback(page); - assert(!old_flag); + __tux3_test_set_page_writeback(page, old_writeback); /* * Zero fill the page for mmap outside i_size after clear dirty. diff --git a/fs/tux3/buffer_writebacklib.c b/fs/tux3/buffer_writebacklib.c index 7fba6c10120345..2274fa99e6f64c 100644 --- a/fs/tux3/buffer_writebacklib.c +++ b/fs/tux3/buffer_writebacklib.c @@ -76,18 +76,16 @@ static int tux3_clear_page_dirty_for_io(struct page *page) return TestClearPageDirty(page); } -static int tux3_test_set_page_writeback(struct page *page) +static void __tux3_test_set_page_writeback(struct page *page, int old_writeback) { struct address_space *mapping = page->mapping; - int ret; if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); - ret = TestSetPageWriteback(page); - if (!ret) { + if (!old_writeback) { /* If PageForked(), don't touch tag */ if (!PageForked(page)) radix_tree_tag_set(&mapping->page_tree, @@ -105,11 +103,7 @@ static int tux3_test_set_page_writeback(struct page *page) page_index(page), PAGECACHE_TAG_TOWRITE); spin_unlock_irqrestore(&mapping->tree_lock, flags); - } else { - ret = TestSetPageWriteback(page); } - if (!ret) + if (!old_writeback) account_page_writeback(page); - return ret; - } |