diff options
author | OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> | 2014-05-01 17:22:32 +0900 |
---|---|---|
committer | Daniel Phillips <daniel@tux3.org> | 2014-05-01 17:22:32 +0900 |
commit | 66e06b3693f5a7c2449ed0f2996a96ea3a30c1e5 (patch) | |
tree | 48326e39e2051ef461f1745dee038d5aa3a8a946 | |
parent | c6d82acc178b551dc7af3ed7a7a7a4ec9173b489 (diff) | |
download | linux-tux3-66e06b3693f5a7c2449ed0f2996a96ea3a30c1e5.tar.gz |
tux3: Support mmap write: Fix race of mmap write with write(2) for copy_from_user()
mmap write and write(2) can race.
cpu0 cpu1
write(2)
delta-1 = get_delta()
remove_suid()
update_timestamp()
write_begin()
delta++
mmap write
lock_page()
delta-2 = get_delta()
page-B = pagefork(page-A)
unlock_page()
get_delta_if_need()
lock_page()
page-B = find_get_page();
pagefork(page-B)
write_end()
put_delta()
If mmap write and write(2) run in above order, cpu0 see dirty page-B
for delta-2, while write(2) is using delta-1. We should guarantee one
doesn't see data in future delta.
To fix this race, this (re)takes delta while holding lock_page(). With
this change, we guarantee to use latest delta, and fix race.
FIXME: But this way separates transactions of
update_timestamp()/remove_suid()/etc. and first modification to
page. We would be better to modify data and metadata in same delta
though (to make app's modification and on-disk state more atomic).
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r-- | fs/tux3/commit.c | 6 | ||||
-rw-r--r-- | fs/tux3/filemap.c | 13 | ||||
-rw-r--r-- | fs/tux3/filemap_blocklib.c | 20 | ||||
-rw-r--r-- | fs/tux3/tux3.h | 2 |
4 files changed, 26 insertions, 15 deletions
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c index fc7f85e6f912ce..a50919f8c955b6 100644 --- a/fs/tux3/commit.c +++ b/fs/tux3/commit.c @@ -768,10 +768,14 @@ int change_end(struct sb *sb) * } * change_end_if_need() */ -void change_begin_if_needed(struct sb *sb) +void change_begin_if_needed(struct sb *sb, int need_sep) { if (current->journal_info == NULL) change_begin(sb); + else if (need_sep) { + change_end(sb); + change_begin(sb); + } } void change_end_if_needed(struct sb *sb) diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c index 612ac41e2490bc..c71f1c5f93f2e3 100644 --- a/fs/tux3/filemap.c +++ b/fs/tux3/filemap.c @@ -824,12 +824,12 @@ static int __tux3_file_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, - int check_fork) + int tux3_flags) { int ret; ret = tux3_write_begin(mapping, pos, len, flags, pagep, - tux3_da_get_block, check_fork); + tux3_da_get_block, tux3_flags); if (ret < 0) tux3_write_failed(mapping, pos + len); return ret; @@ -854,12 +854,9 @@ static int tux3_file_write_begin(struct file *file, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - /* Separate big write transaction to small chunk. */ - assert(S_ISREG(mapping->host->i_mode)); - change_begin_if_needed(tux_sb(mapping->host->i_sb)); - return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, 1); + fsdata, + TUX3_F_PAGEFORK | TUX3_F_SEP_DELTA); } static int tux3_file_write_end(struct file *file, struct address_space *mapping, @@ -989,7 +986,7 @@ static int tux3_symlink_write_begin(struct file *file, struct page **pagep, void **fsdata) { return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, 1); + fsdata, TUX3_F_PAGEFORK); } /* Copy of tux_file_aops, except ->write_begin/end */ diff --git a/fs/tux3/filemap_blocklib.c b/fs/tux3/filemap_blocklib.c index f7d78642632748..8b77c9f64ad26d 100644 --- a/fs/tux3/filemap_blocklib.c +++ b/fs/tux3/filemap_blocklib.c @@ -152,6 +152,9 @@ static int __tux3_write_begin(struct page *page, loff_t pos, unsigned len, return err; } +#define TUX3_F_PAGEFORK (1 << 0) +#define TUX3_F_SEP_DELTA (1 << 1) + /* * Copy of block_write_begin() * (Add to call pagefork_for_blockdirty() for buffer fork) @@ -159,7 +162,7 @@ static int __tux3_write_begin(struct page *page, loff_t pos, unsigned len, static int tux3_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block, - int check_fork) + int tux3_flags) { pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; @@ -170,12 +173,19 @@ retry: if (!page) return -ENOMEM; + if (tux3_flags & TUX3_F_SEP_DELTA) { + /* Separate big write transaction to small chunk. */ + assert(S_ISREG(mapping->host->i_mode)); + change_begin_if_needed(tux_sb(mapping->host->i_sb), 1); + } + /* - * FIXME: If check_fork == 0, caller handle buffer fork. - * Unlike check_fork hack, we are better to provide the different - * blockget() implementation doesn't use tux3_write_begin(). + * FIXME: If TUX3_WRITE_PAGEFORK, caller handle buffer fork. + * Unlike TUX3_WRITE_PAGEFORK hack, we are better to provide + * the different blockget() implementation doesn't use + * tux3_write_begin(). */ - if (check_fork) { + if (tux3_flags & TUX3_F_PAGEFORK) { struct page *tmp; tmp = pagefork_for_blockdirty(page, tux3_get_current_delta()); diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h index 44576e193f8656..041529802421e6 100644 --- a/fs/tux3/tux3.h +++ b/fs/tux3/tux3.h @@ -790,7 +790,7 @@ void change_begin_atomic_nested(struct sb *sb, void **ptr); void change_end_atomic_nested(struct sb *sb, void *ptr); void change_begin(struct sb *sb); int change_end(struct sb *sb); -void change_begin_if_needed(struct sb *sb); +void change_begin_if_needed(struct sb *sb, int need_sep); void change_end_if_needed(struct sb *sb); /* commit_flusher.c */ |