aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>2014-05-01 17:22:32 +0900
committerDaniel Phillips <daniel@tux3.org>2014-05-01 17:22:32 +0900
commit66e06b3693f5a7c2449ed0f2996a96ea3a30c1e5 (patch)
tree48326e39e2051ef461f1745dee038d5aa3a8a946
parentc6d82acc178b551dc7af3ed7a7a7a4ec9173b489 (diff)
downloadlinux-tux3-66e06b3693f5a7c2449ed0f2996a96ea3a30c1e5.tar.gz
tux3: Support mmap write: Fix race of mmap write with write(2) for copy_from_user()
mmap write and write(2) can race. cpu0 cpu1 write(2) delta-1 = get_delta() remove_suid() update_timestamp() write_begin() delta++ mmap write lock_page() delta-2 = get_delta() page-B = pagefork(page-A) unlock_page() get_delta_if_need() lock_page() page-B = find_get_page(); pagefork(page-B) write_end() put_delta() If mmap write and write(2) run in above order, cpu0 see dirty page-B for delta-2, while write(2) is using delta-1. We should guarantee one doesn't see data in future delta. To fix this race, this (re)takes delta while holding lock_page(). With this change, we guarantee to use latest delta, and fix race. FIXME: But this way separates transactions of update_timestamp()/remove_suid()/etc. and first modification to page. We would be better to modify data and metadata in same delta though (to make app's modification and on-disk state more atomic). Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r--fs/tux3/commit.c6
-rw-r--r--fs/tux3/filemap.c13
-rw-r--r--fs/tux3/filemap_blocklib.c20
-rw-r--r--fs/tux3/tux3.h2
4 files changed, 26 insertions, 15 deletions
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c
index fc7f85e6f912ce..a50919f8c955b6 100644
--- a/fs/tux3/commit.c
+++ b/fs/tux3/commit.c
@@ -768,10 +768,14 @@ int change_end(struct sb *sb)
* }
* change_end_if_need()
*/
-void change_begin_if_needed(struct sb *sb)
+void change_begin_if_needed(struct sb *sb, int need_sep)
{
if (current->journal_info == NULL)
change_begin(sb);
+ else if (need_sep) {
+ change_end(sb);
+ change_begin(sb);
+ }
}
void change_end_if_needed(struct sb *sb)
diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c
index 612ac41e2490bc..c71f1c5f93f2e3 100644
--- a/fs/tux3/filemap.c
+++ b/fs/tux3/filemap.c
@@ -824,12 +824,12 @@ static int __tux3_file_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
- int check_fork)
+ int tux3_flags)
{
int ret;
ret = tux3_write_begin(mapping, pos, len, flags, pagep,
- tux3_da_get_block, check_fork);
+ tux3_da_get_block, tux3_flags);
if (ret < 0)
tux3_write_failed(mapping, pos + len);
return ret;
@@ -854,12 +854,9 @@ static int tux3_file_write_begin(struct file *file,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- /* Separate big write transaction to small chunk. */
- assert(S_ISREG(mapping->host->i_mode));
- change_begin_if_needed(tux_sb(mapping->host->i_sb));
-
return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, 1);
+ fsdata,
+ TUX3_F_PAGEFORK | TUX3_F_SEP_DELTA);
}
static int tux3_file_write_end(struct file *file, struct address_space *mapping,
@@ -989,7 +986,7 @@ static int tux3_symlink_write_begin(struct file *file,
struct page **pagep, void **fsdata)
{
return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, 1);
+ fsdata, TUX3_F_PAGEFORK);
}
/* Copy of tux_file_aops, except ->write_begin/end */
diff --git a/fs/tux3/filemap_blocklib.c b/fs/tux3/filemap_blocklib.c
index f7d78642632748..8b77c9f64ad26d 100644
--- a/fs/tux3/filemap_blocklib.c
+++ b/fs/tux3/filemap_blocklib.c
@@ -152,6 +152,9 @@ static int __tux3_write_begin(struct page *page, loff_t pos, unsigned len,
return err;
}
+#define TUX3_F_PAGEFORK (1 << 0)
+#define TUX3_F_SEP_DELTA (1 << 1)
+
/*
* Copy of block_write_begin()
* (Add to call pagefork_for_blockdirty() for buffer fork)
@@ -159,7 +162,7 @@ static int __tux3_write_begin(struct page *page, loff_t pos, unsigned len,
static int tux3_write_begin(struct address_space *mapping, loff_t pos,
unsigned len, unsigned flags,
struct page **pagep, get_block_t *get_block,
- int check_fork)
+ int tux3_flags)
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
@@ -170,12 +173,19 @@ retry:
if (!page)
return -ENOMEM;
+ if (tux3_flags & TUX3_F_SEP_DELTA) {
+ /* Separate big write transaction to small chunk. */
+ assert(S_ISREG(mapping->host->i_mode));
+ change_begin_if_needed(tux_sb(mapping->host->i_sb), 1);
+ }
+
/*
- * FIXME: If check_fork == 0, caller handle buffer fork.
- * Unlike check_fork hack, we are better to provide the different
- * blockget() implementation doesn't use tux3_write_begin().
+ * FIXME: If TUX3_WRITE_PAGEFORK, caller handle buffer fork.
+ * Unlike TUX3_WRITE_PAGEFORK hack, we are better to provide
+ * the different blockget() implementation doesn't use
+ * tux3_write_begin().
*/
- if (check_fork) {
+ if (tux3_flags & TUX3_F_PAGEFORK) {
struct page *tmp;
tmp = pagefork_for_blockdirty(page, tux3_get_current_delta());
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 44576e193f8656..041529802421e6 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -790,7 +790,7 @@ void change_begin_atomic_nested(struct sb *sb, void **ptr);
void change_end_atomic_nested(struct sb *sb, void *ptr);
void change_begin(struct sb *sb);
int change_end(struct sb *sb);
-void change_begin_if_needed(struct sb *sb);
+void change_begin_if_needed(struct sb *sb, int need_sep);
void change_end_if_needed(struct sb *sb);
/* commit_flusher.c */