diff options
author | Ben Hutchings <ben@decadent.org.uk> | 2020-02-24 02:17:06 +0000 |
---|---|---|
committer | Ben Hutchings <ben@decadent.org.uk> | 2020-02-24 02:17:06 +0000 |
commit | 44774ce5f6a99b7b475d7e7167917a0d13de3311 (patch) | |
tree | fa004eec7d331c6698038de718f9de385e2b6884 /queue-3.16 | |
parent | 16195f7eda66ca9e5411af05638fb8cabc2bb3a9 (diff) | |
download | linux-stable-queue-44774ce5f6a99b7b475d7e7167917a0d13de3311.tar.gz |
Add some old ext4 fixes
Diffstat (limited to 'queue-3.16')
7 files changed, 700 insertions, 0 deletions
diff --git a/queue-3.16/ext4-fix-races-between-buffered-io-and-collapse-insert-range.patch b/queue-3.16/ext4-fix-races-between-buffered-io-and-collapse-insert-range.patch new file mode 100644 index 00000000..5e9f8d08 --- /dev/null +++ b/queue-3.16/ext4-fix-races-between-buffered-io-and-collapse-insert-range.patch @@ -0,0 +1,74 @@ +From: Jan Kara <jack@suse.com> +Date: Mon, 7 Dec 2015 14:31:11 -0500 +Subject: ext4: fix races between buffered IO and collapse / insert range + +commit 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 upstream. + +Current code implementing FALLOC_FL_COLLAPSE_RANGE and +FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page +faults. If buffered write or write via mmap manages to squeeze between +filemap_write_and_wait_range() and truncate_pagecache() in the fallocate +implementations, the written data is simply discarded by +truncate_pagecache() although it should have been shifted. + +Fix the problem by moving filemap_write_and_wait_range() call inside +i_mutex and i_mmap_sem. That way we are protected against races with +both buffered writes and page faults. + +Signed-off-by: Jan Kara <jack@suse.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +[bwh: Backported to 3.16: drop changes in ext4_insert_range()] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -5453,21 +5453,7 @@ int ext4_collapse_range(struct inode *in + return ret; + } + +- /* +- * Need to round down offset to be aligned with page size boundary +- * for page size > block size. +- */ +- ioffset = round_down(offset, PAGE_SIZE); +- +- /* Write out all dirty pages */ +- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, +- LLONG_MAX); +- if (ret) +- return ret; +- +- /* Take mutex lock */ + mutex_lock(&inode->i_mutex); +- + /* + * There is no need to overlap collapse range with EOF, in which case + * it is effectively a truncate operation +@@ -5492,6 +5478,27 @@ int ext4_collapse_range(struct inode *in + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); ++ /* ++ * Need to round down offset to be aligned with page size boundary ++ * for page size > block size. ++ */ ++ ioffset = round_down(offset, PAGE_SIZE); ++ /* ++ * Write tail of the last page before removed range since it will get ++ * removed from the page cache below. ++ */ ++ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); ++ if (ret) ++ goto out_mmap; ++ /* ++ * Write data that will be shifted to preserve them when discarding ++ * page cache below. We are also protected from pages becoming dirty ++ * by i_mmap_sem. ++ */ ++ ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, ++ LLONG_MAX); ++ if (ret) ++ goto out_mmap; + truncate_pagecache(inode, ioffset); + + credits = ext4_writepage_trans_blocks(inode); diff --git a/queue-3.16/ext4-fix-races-between-page-faults-and-hole-punching.patch b/queue-3.16/ext4-fix-races-between-page-faults-and-hole-punching.patch new file mode 100644 index 00000000..a0641b48 --- /dev/null +++ b/queue-3.16/ext4-fix-races-between-page-faults-and-hole-punching.patch @@ -0,0 +1,275 @@ +From: Jan Kara <jack@suse.com> +Date: Mon, 7 Dec 2015 14:28:03 -0500 +Subject: ext4: fix races between page faults and hole punching + +commit ea3d7209ca01da209cda6f0dea8be9cc4b7a933bgit upstream. + +Currently, page faults and hole punching are completely unsynchronized. +This can result in page fault faulting in a page into a range that we +are punching after truncate_pagecache_range() has been called and thus +we can end up with a page mapped to disk blocks that will be shortly +freed. Filesystem corruption will shortly follow. Note that the same +race is avoided for truncate by checking page fault offset against +i_size but there isn't similar mechanism available for punching holes. + +Fix the problem by creating new rw semaphore i_mmap_sem in inode and +grab it for writing over truncate, hole punching, and other functions +removing blocks from extent tree and for read over page faults. We +cannot easily use i_data_sem for this since that ranks below transaction +start and we need something ranking above it so that it can be held over +the whole truncate / hole punching operation. Also remove various +workarounds we had in the code to reduce race window when page fault +could have created pages with stale mapping information. + +Signed-off-by: Jan Kara <jack@suse.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +[bwh: Backported to 3.16: + - Drop changes in ext4_insert_range(), ext4_dax_* + - Adjust context] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -937,6 +937,15 @@ struct ext4_inode_info { + * by other means, so we have i_data_sem. + */ + struct rw_semaphore i_data_sem; ++ /* ++ * i_mmap_sem is for serializing page faults with truncate / punch hole ++ * operations. We have to make sure that new page cannot be faulted in ++ * a section of the inode that is being punched. We cannot easily use ++ * i_data_sem for this since we need protection for the whole punch ++ * operation and i_data_sem ranks below transaction start so we have ++ * to occasionally drop it. ++ */ ++ struct rw_semaphore i_mmap_sem; + struct inode vfs_inode; + struct jbd2_inode *jinode; + +@@ -2205,6 +2214,7 @@ extern int ext4_chunk_trans_blocks(struc + extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, + loff_t lstart, loff_t lend); + extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); ++extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); + extern qsize_t *ext4_get_reserved_space(struct inode *inode); + extern void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim); +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4778,7 +4778,6 @@ static long ext4_zero_range(struct file + int partial_begin, partial_end; + loff_t start, end; + ext4_lblk_t lblk; +- struct address_space *mapping = inode->i_mapping; + unsigned int blkbits = inode->i_blkbits; + + trace_ext4_zero_range(inode, offset, len, mode); +@@ -4794,17 +4793,6 @@ static long ext4_zero_range(struct file + } + + /* +- * Write out all dirty pages to avoid race conditions +- * Then release them. +- */ +- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { +- ret = filemap_write_and_wait_range(mapping, offset, +- offset + len - 1); +- if (ret) +- return ret; +- } +- +- /* + * Round up offset. This is not fallocate, we neet to zero out + * blocks, so convert interior block aligned part of the range to + * unwritten and possibly manually zero out unaligned parts of the +@@ -4865,16 +4853,22 @@ static long ext4_zero_range(struct file + flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | + EXT4_EX_NOCACHE); + +- /* Now release the pages and zero block aligned part of pages*/ +- truncate_pagecache_range(inode, start, end - 1); +- inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +- + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ++ /* ++ * Prevent page faults from reinstantiating pages we have ++ * released from page cache. ++ */ ++ down_write(&EXT4_I(inode)->i_mmap_sem); ++ /* Now release the pages and zero block aligned part of pages */ ++ truncate_pagecache_range(inode, start, end - 1); ++ inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ++ + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, + flags, mode); ++ up_write(&EXT4_I(inode)->i_mmap_sem); + if (ret) + goto out_dio; + } +@@ -5490,17 +5484,22 @@ int ext4_collapse_range(struct inode *in + goto out_mutex; + } + +- truncate_pagecache(inode, ioffset); +- + /* Wait for existing dio to complete */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ++ /* ++ * Prevent page faults from reinstantiating pages we have released from ++ * page cache. ++ */ ++ down_write(&EXT4_I(inode)->i_mmap_sem); ++ truncate_pagecache(inode, ioffset); ++ + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); +- goto out_dio; ++ goto out_mmap; + } + + down_write(&EXT4_I(inode)->i_data_sem); +@@ -5540,7 +5539,8 @@ int ext4_collapse_range(struct inode *in + + out_stop: + ext4_journal_stop(handle); +-out_dio: ++out_mmap: ++ up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); + out_mutex: + mutex_unlock(&inode->i_mutex); +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -199,7 +199,7 @@ errout: + } + + static const struct vm_operations_struct ext4_file_vm_ops = { +- .fault = filemap_fault, ++ .fault = ext4_filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = ext4_page_mkwrite, + }; +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3716,6 +3716,15 @@ int ext4_punch_hole(struct inode *inode, + + } + ++ /* Wait all existing dio workers, newcomers will block on i_mutex */ ++ ext4_inode_block_unlocked_dio(inode); ++ inode_dio_wait(inode); ++ ++ /* ++ * Prevent page faults from reinstantiating pages we have released from ++ * page cache. ++ */ ++ down_write(&EXT4_I(inode)->i_mmap_sem); + first_block_offset = round_up(offset, sb->s_blocksize); + last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; + +@@ -3724,10 +3733,6 @@ int ext4_punch_hole(struct inode *inode, + truncate_pagecache_range(inode, first_block_offset, + last_block_offset); + +- /* Wait all existing dio workers, newcomers will block on i_mutex */ +- ext4_inode_block_unlocked_dio(inode); +- inode_dio_wait(inode); +- + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + credits = ext4_writepage_trans_blocks(inode); + else +@@ -3773,11 +3778,6 @@ int ext4_punch_hole(struct inode *inode, + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + +- /* Now release the pages again to reduce race window */ +- if (last_block_offset > first_block_offset) +- truncate_pagecache_range(inode, first_block_offset, +- last_block_offset); +- + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + if (ret >= 0) +@@ -3785,6 +3785,7 @@ int ext4_punch_hole(struct inode *inode, + out_stop: + ext4_journal_stop(handle); + out_dio: ++ up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); + out_mutex: + mutex_unlock(&inode->i_mutex); +@@ -4879,6 +4880,7 @@ int ext4_setattr(struct dentry *dentry, + } else + ext4_wait_for_tail_page_commit(inode); + } ++ down_write(&EXT4_I(inode)->i_mmap_sem); + /* + * Truncate pagecache after we've waited for commit + * in data=journal mode to make pages freeable. +@@ -4886,6 +4888,7 @@ int ext4_setattr(struct dentry *dentry, + truncate_pagecache(inode, inode->i_size); + if (shrink) + ext4_truncate(inode); ++ up_write(&EXT4_I(inode)->i_mmap_sem); + } + + if (!rc) { +@@ -5338,6 +5341,8 @@ int ext4_page_mkwrite(struct vm_area_str + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + ++ down_read(&EXT4_I(inode)->i_mmap_sem); ++ + ret = ext4_convert_inline_data(inode); + if (ret) + goto out_ret; +@@ -5411,6 +5416,19 @@ retry_alloc: + out_ret: + ret = block_page_mkwrite_return(ret); + out: ++ up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(inode->i_sb); + return ret; + } ++ ++int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ struct inode *inode = file_inode(vma->vm_file); ++ int err; ++ ++ down_read(&EXT4_I(inode)->i_mmap_sem); ++ err = filemap_fault(vma, vmf); ++ up_read(&EXT4_I(inode)->i_mmap_sem); ++ ++ return err; ++} +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -967,6 +967,7 @@ static void init_once(void *foo) + INIT_LIST_HEAD(&ei->i_orphan); + init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->i_data_sem); ++ init_rwsem(&ei->i_mmap_sem); + inode_init_once(&ei->vfs_inode); + } + +--- a/fs/ext4/truncate.h ++++ b/fs/ext4/truncate.h +@@ -10,8 +10,10 @@ + */ + static inline void ext4_truncate_failed_write(struct inode *inode) + { ++ down_write(&EXT4_I(inode)->i_mmap_sem); + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); ++ up_write(&EXT4_I(inode)->i_mmap_sem); + } + + /* diff --git a/queue-3.16/ext4-fix-races-of-writeback-with-punch-hole-and-zero-range.patch b/queue-3.16/ext4-fix-races-of-writeback-with-punch-hole-and-zero-range.patch new file mode 100644 index 00000000..30b3c76d --- /dev/null +++ b/queue-3.16/ext4-fix-races-of-writeback-with-punch-hole-and-zero-range.patch @@ -0,0 +1,102 @@ +From: Jan Kara <jack@suse.com> +Date: Mon, 7 Dec 2015 14:34:49 -0500 +Subject: ext4: fix races of writeback with punch hole and zero range + +commit 011278485ecc3cd2a3954b5d4c73101d919bf1fa upstream. + +When doing delayed allocation, update of on-disk inode size is postponed +until IO submission time. However hole punch or zero range fallocate +calls can end up discarding the tail page cache page and thus on-disk +inode size would never be properly updated. + +Make sure the on-disk inode size is updated before truncating page +cache. + +Signed-off-by: Jan Kara <jack@suse.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/ext4/ext4.h | 3 +++ + fs/ext4/extents.c | 5 +++++ + fs/ext4/inode.c | 35 ++++++++++++++++++++++++++++++++++- + 3 files changed, 42 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -2560,6 +2560,9 @@ static inline int ext4_update_inode_size + return changed; + } + ++int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, ++ loff_t len); ++ + struct ext4_group_info { + unsigned long bb_state; + struct rb_root bb_free_root; +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4856,6 +4856,11 @@ static long ext4_zero_range(struct file + * released from page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); ++ ret = ext4_update_disksize_before_punch(inode, offset, len); ++ if (ret) { ++ up_write(&EXT4_I(inode)->i_mmap_sem); ++ goto out_dio; ++ } + /* Now release the pages and zero block aligned part of pages */ + truncate_pagecache_range(inode, start, end - 1); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3652,6 +3652,35 @@ int ext4_can_truncate(struct inode *inod + } + + /* ++ * We have to make sure i_disksize gets properly updated before we truncate ++ * page cache due to hole punching or zero range. Otherwise i_disksize update ++ * can get lost as it may have been postponed to submission of writeback but ++ * that will never happen after we truncate page cache. ++ */ ++int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, ++ loff_t len) ++{ ++ handle_t *handle; ++ loff_t size = i_size_read(inode); ++ ++ WARN_ON(!mutex_is_locked(&inode->i_mutex)); ++ if (offset > size || offset + len < size) ++ return 0; ++ ++ if (EXT4_I(inode)->i_disksize >= size) ++ return 0; ++ ++ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ext4_update_i_disksize(inode, size); ++ ext4_mark_inode_dirty(handle, inode); ++ ext4_journal_stop(handle); ++ ++ return 0; ++} ++ ++/* + * ext4_punch_hole: punches a hole in a file by releaseing the blocks + * associated with the given offset and length + * +@@ -3729,9 +3758,13 @@ int ext4_punch_hole(struct inode *inode, + last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; + + /* Now release the pages and zero block aligned part of pages*/ +- if (last_block_offset > first_block_offset) ++ if (last_block_offset > first_block_offset) { ++ ret = ext4_update_disksize_before_punch(inode, offset, length); ++ if (ret) ++ goto out_dio; + truncate_pagecache_range(inode, first_block_offset, + last_block_offset); ++ } + + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + credits = ext4_writepage_trans_blocks(inode); diff --git a/queue-3.16/ext4-move-unlocked-dio-protection-from-ext4_alloc_file_blocks.patch b/queue-3.16/ext4-move-unlocked-dio-protection-from-ext4_alloc_file_blocks.patch new file mode 100644 index 00000000..604591ef --- /dev/null +++ b/queue-3.16/ext4-move-unlocked-dio-protection-from-ext4_alloc_file_blocks.patch @@ -0,0 +1,89 @@ +From: Jan Kara <jack@suse.com> +Date: Mon, 7 Dec 2015 14:29:17 -0500 +Subject: ext4: move unlocked dio protection from ext4_alloc_file_blocks() + +commit 17048e8a083fec7ad841d88ef0812707fbc7e39f upstream. + +Currently ext4_alloc_file_blocks() was handling protection against +unlocked DIO. However we now need to sometimes call it under i_mmap_sem +and sometimes not and DIO protection ranks above it (although strictly +speaking this cannot currently create any deadlocks). Also +ext4_zero_range() was actually getting & releasing unlocked DIO +protection twice in some cases. Luckily it didn't introduce any real bug +but it was a land mine waiting to be stepped on. So move DIO protection +out from ext4_alloc_file_blocks() into the two callsites. + +Signed-off-by: Jan Kara <jack@suse.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/ext4/extents.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4707,10 +4707,6 @@ static int ext4_alloc_file_blocks(struct + if (len <= EXT_UNWRITTEN_MAX_LEN) + flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; + +- /* Wait all existing dio workers, newcomers will block on i_mutex */ +- ext4_inode_block_unlocked_dio(inode); +- inode_dio_wait(inode); +- + /* + * credits to insert 1 extent into extent tree + */ +@@ -4760,8 +4756,6 @@ retry: + goto retry; + } + +- ext4_inode_resume_unlocked_dio(inode); +- + return ret > 0 ? ret2 : ret; + } + +@@ -4836,6 +4830,10 @@ static long ext4_zero_range(struct file + if (mode & FALLOC_FL_KEEP_SIZE) + flags |= EXT4_GET_BLOCKS_KEEP_SIZE; + ++ /* Wait all existing dio workers, newcomers will block on i_mutex */ ++ ext4_inode_block_unlocked_dio(inode); ++ inode_dio_wait(inode); ++ + /* Preallocate the range including the unaligned edges */ + if (partial_begin || partial_end) { + ret = ext4_alloc_file_blocks(file, +@@ -4844,7 +4842,7 @@ static long ext4_zero_range(struct file + round_down(offset, 1 << blkbits)) >> blkbits, + new_size, flags, mode); + if (ret) +- goto out_mutex; ++ goto out_dio; + + } + +@@ -4853,10 +4851,6 @@ static long ext4_zero_range(struct file + flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | + EXT4_EX_NOCACHE); + +- /* Wait all existing dio workers, newcomers will block on i_mutex */ +- ext4_inode_block_unlocked_dio(inode); +- inode_dio_wait(inode); +- + /* + * Prevent page faults from reinstantiating pages we have + * released from page cache. +@@ -4985,8 +4979,13 @@ long ext4_fallocate(struct file *file, i + goto out; + } + ++ /* Wait all existing dio workers, newcomers will block on i_mutex */ ++ ext4_inode_block_unlocked_dio(inode); ++ inode_dio_wait(inode); ++ + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, + flags, mode); ++ ext4_inode_resume_unlocked_dio(inode); + if (ret) + goto out; + diff --git a/queue-3.16/ext4-only-call-ext4_truncate-when-size-isize.patch b/queue-3.16/ext4-only-call-ext4_truncate-when-size-isize.patch new file mode 100644 index 00000000..e32b24cf --- /dev/null +++ b/queue-3.16/ext4-only-call-ext4_truncate-when-size-isize.patch @@ -0,0 +1,109 @@ +From: Josef Bacik <jbacik@fb.com> +Date: Mon, 22 Jun 2015 00:31:26 -0400 +Subject: ext4: only call ext4_truncate when size <= isize + +commit 3da40c7b089810ac9cf2bb1e59633f619f3a7312 upstream. + +At LSF we decided that if we truncate up from isize we shouldn't trim +fallocated blocks that were fallocated with KEEP_SIZE and are past the +new i_size. This patch fixes ext4 to do this. + +[ Completely reworked patch so that i_disksize would actually get set + when truncating up. Also reworked the code for handling truncate so + that it's easier to handle. -- tytso ] + +Signed-off-by: Josef Bacik <jbacik@fb.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Reviewed-by: Lukas Czerner <lczerner@redhat.com> +[bwh: Backported to 3.16 as dependency of commit ea3d7209ca01 + "ext4: fix races between page faults and hole punching": + - Adjust context] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/ext4/inode.c | 38 ++++++++++++++++++-------------------- + 1 file changed, 18 insertions(+), 20 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4809,8 +4809,10 @@ int ext4_setattr(struct dentry *dentry, + ext4_journal_stop(handle); + } + +- if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { ++ if (attr->ia_valid & ATTR_SIZE) { + handle_t *handle; ++ loff_t oldsize = inode->i_size; ++ int shrink = (attr->ia_size <= inode->i_size); + + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); +@@ -4818,24 +4820,26 @@ int ext4_setattr(struct dentry *dentry, + if (attr->ia_size > sbi->s_bitmap_maxbytes) + return -EFBIG; + } ++ if (!S_ISREG(inode->i_mode)) ++ return -EINVAL; + + if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) + inode_inc_iversion(inode); + +- if (S_ISREG(inode->i_mode) && ++ if (ext4_should_order_data(inode) && + (attr->ia_size < inode->i_size)) { +- if (ext4_should_order_data(inode)) { +- error = ext4_begin_ordered_truncate(inode, ++ error = ext4_begin_ordered_truncate(inode, + attr->ia_size); +- if (error) +- goto err_out; +- } ++ if (error) ++ goto err_out; ++ } ++ if (attr->ia_size != inode->i_size) { + handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } +- if (ext4_handle_valid(handle)) { ++ if (ext4_handle_valid(handle) && shrink) { + error = ext4_orphan_add(handle, inode); + orphan = 1; + } +@@ -4854,15 +4858,13 @@ int ext4_setattr(struct dentry *dentry, + up_write(&EXT4_I(inode)->i_data_sem); + ext4_journal_stop(handle); + if (error) { +- ext4_orphan_del(NULL, inode); ++ if (orphan) ++ ext4_orphan_del(NULL, inode); + goto err_out; + } +- } else { +- loff_t oldsize = inode->i_size; +- +- i_size_write(inode, attr->ia_size); +- pagecache_isize_extended(inode, oldsize, inode->i_size); + } ++ if (!shrink) ++ pagecache_isize_extended(inode, oldsize, inode->i_size); + + /* + * Blocks are going to be removed from the inode. Wait +@@ -4882,13 +4884,9 @@ int ext4_setattr(struct dentry *dentry, + * in data=journal mode to make pages freeable. + */ + truncate_pagecache(inode, inode->i_size); ++ if (shrink) ++ ext4_truncate(inode); + } +- /* +- * We want to call ext4_truncate() even if attr->ia_size == +- * inode->i_size for cases like truncation of fallocated space +- */ +- if (attr->ia_valid & ATTR_SIZE) +- ext4_truncate(inode); + + if (!rc) { + setattr_copy(inode, attr); diff --git a/queue-3.16/ext4-wait-for-existing-dio-workers-in-ext4_alloc_file_blocks.patch b/queue-3.16/ext4-wait-for-existing-dio-workers-in-ext4_alloc_file_blocks.patch new file mode 100644 index 00000000..808625e0 --- /dev/null +++ b/queue-3.16/ext4-wait-for-existing-dio-workers-in-ext4_alloc_file_blocks.patch @@ -0,0 +1,45 @@ +From: Lukas Czerner <lczerner@redhat.com> +Date: Mon, 15 Jun 2015 00:23:53 -0400 +Subject: ext4: wait for existing dio workers in ext4_alloc_file_blocks() + +commit 0d306dcf86e8f065dff42a4a934ae9d99af35ba5 upstream. + +Currently existing dio workers can jump in and potentially increase +extent tree depth while we're allocating blocks in +ext4_alloc_file_blocks(). This may cause us to underestimate the +number of credits needed for the transaction because the extent tree +depth can change after our estimation. + +Fix this by waiting for all the existing dio workers in the same way +as we do it in ext4_punch_hole. We've seen errors caused by this in +xfstest generic/299, however it's really hard to reproduce. + +Signed-off-by: Lukas Czerner <lczerner@redhat.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/ext4/extents.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4707,6 +4707,10 @@ static int ext4_alloc_file_blocks(struct + if (len <= EXT_UNWRITTEN_MAX_LEN) + flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; + ++ /* Wait all existing dio workers, newcomers will block on i_mutex */ ++ ext4_inode_block_unlocked_dio(inode); ++ inode_dio_wait(inode); ++ + /* + * credits to insert 1 extent into extent tree + */ +@@ -4756,6 +4760,8 @@ retry: + goto retry; + } + ++ ext4_inode_resume_unlocked_dio(inode); ++ + return ret > 0 ? ret2 : ret; + } + diff --git a/queue-3.16/series b/queue-3.16/series index 79e34cda..18e66d16 100644 --- a/queue-3.16/series +++ b/queue-3.16/series @@ -5,3 +5,9 @@ mwifiex-fix-heap-overflow-in-mmwifiex_process_tdls_action_frame.patch cfg80211-mac80211-make-ieee80211_send_layer2_update-a-public.patch mac80211-do-not-send-layer-2-update-frame-before-authorization.patch x86-microcode-amd-add-support-for-fam17h-microcode-loading.patch +ext4-wait-for-existing-dio-workers-in-ext4_alloc_file_blocks.patch +ext4-only-call-ext4_truncate-when-size-isize.patch +ext4-fix-races-between-page-faults-and-hole-punching.patch +ext4-move-unlocked-dio-protection-from-ext4_alloc_file_blocks.patch +ext4-fix-races-between-buffered-io-and-collapse-insert-range.patch +ext4-fix-races-of-writeback-with-punch-hole-and-zero-range.patch |