Merge tag 'for-6.4-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba: - handle memory allocation error in checksumming helper (reported by syzbot) - fix lockdep splat when aborting a transaction, add NOFS protection around invalidate_inode_pages2 that could allocate with GFP_KERNEL - reduce chances to hit an ENOSPC during scrub with RAID56 profiles * tag 'for-6.4-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: use nofs when cleaning up aborted transactions btrfs: handle memory allocation failure in btrfs_csum_one_bio btrfs: scrub: try harder to mark RAID56 block groups read-only
author: Linus Torvalds <torvalds@linux-foundation.org> 2023-05-26 13:21:38 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2023-05-26 13:21:38 -0700
commit: b158dd941b4f28e12c4f956caf2352febe09fe4e (patch)
tree: 967259cfbbdab9983538c34a0983372e895c0cb4
parent: b83ac44e02986e640ee954e187ba414cb94453e2 (diff)
parent: 597441b3436a43011f31ce71dc0a6c0bf5ce958a (diff)
download: linux-b158dd941b4f28e12c4f956caf2352febe09fe4e.tar.gz
4 files changed, 32 insertions, 4 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 957ad1c31c4f86..590b0356026505 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2818,10 +2818,20 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
 	}
 
 	ret = inc_block_group_ro(cache, 0);
-	if (!do_chunk_alloc || ret == -ETXTBSY)
-		goto unlock_out;
 	if (!ret)
 		goto out;
+	if (ret == -ETXTBSY)
+		goto unlock_out;
+
+	/*
+	 * Skip chunk alloction if the bg is SYSTEM, this is to avoid system
+	 * chunk allocation storm to exhaust the system chunk array.  Otherwise
+	 * we still want to try our best to mark the block group read-only.
+	 */
+	if (!do_chunk_alloc && ret == -ENOSPC &&
+	    (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
+		goto unlock_out;
+
 	alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
 	ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
 	if (ret < 0)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fbf9006c62346c..6de6dcf2743e26 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4936,7 +4936,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 		 */
 		inode = igrab(&btrfs_inode->vfs_inode);
 		if (inode) {
+			unsigned int nofs_flag;
+
+			nofs_flag = memalloc_nofs_save();
 			invalidate_inode_pages2(inode->i_mapping);
+			memalloc_nofs_restore(nofs_flag);
 			iput(inode);
 		}
 		spin_lock(&root->delalloc_lock);
@@ -5042,7 +5046,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
 
 	inode = cache->io_ctl.inode;
 	if (inode) {
+		unsigned int nofs_flag;
+
+		nofs_flag = memalloc_nofs_save();
 		invalidate_inode_pages2(inode->i_mapping);
+		memalloc_nofs_restore(nofs_flag);
+
 		BTRFS_I(inode)->generation = 0;
 		cache->io_ctl.inode = NULL;
 		iput(inode);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index cd4cce9ba443b9..d1cd0a692f8efb 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -792,7 +792,9 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
 				sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
 						      bytes_left), GFP_KERNEL);
 				memalloc_nofs_restore(nofs_flag);
-				BUG_ON(!sums); /* -ENOMEM */
+				if (!sums)
+					return BLK_STS_RESOURCE;
+
 				sums->len = bytes_left;
 				ordered = btrfs_lookup_ordered_extent(inode,
 								offset);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 836725a19661c1..dd37cba5802224 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2518,13 +2518,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
 		if (ret == 0) {
 			ro_set = 1;
-		} else if (ret == -ENOSPC && !sctx->is_dev_replace) {
+		} else if (ret == -ENOSPC && !sctx->is_dev_replace &&
+			   !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) {
 			/*
 			 * btrfs_inc_block_group_ro return -ENOSPC when it
 			 * failed in creating new chunk for metadata.
 			 * It is not a problem for scrub, because
 			 * metadata are always cowed, and our scrub paused
 			 * commit_transactions.
+			 *
+			 * For RAID56 chunks, we have to mark them read-only
+			 * for scrub, as later we would use our own cache
+			 * out of RAID56 realm.
+			 * Thus we want the RAID56 bg to be marked RO to
+			 * prevent RMW from screwing up out cache.
 			 */
 			ro_set = 0;
 		} else if (ret == -ETXTBSY) {
author	Linus Torvalds <torvalds@linux-foundation.org>	2023-05-26 13:21:38 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2023-05-26 13:21:38 -0700
commit	b158dd941b4f28e12c4f956caf2352febe09fe4e (patch)
tree	967259cfbbdab9983538c34a0983372e895c0cb4
parent	b83ac44e02986e640ee954e187ba414cb94453e2 (diff)
parent	597441b3436a43011f31ce71dc0a6c0bf5ce958a (diff)
download	linux-b158dd941b4f28e12c4f956caf2352febe09fe4e.tar.gz