aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2023-09-06 11:21:47 +0200
committerCarlos Maiolino <cem@kernel.org>2023-09-07 11:55:50 +0200
commitef16737e44b9274fddcc736ec2da5c07d5c4f703 (patch)
treec8bd0e35b4c7d4c2609e18d2aeae6d4e513115ec
parentd03a3c4b235c6ecdda9b27d5f31115ad710b5912 (diff)
downloadxfsprogs-dev-ef16737e44b9274fddcc736ec2da5c07d5c4f703.tar.gz
xfs: use deferred frees for btree block freeing
Source kernel commit: b742d7b4f0e03df25c2a772adcded35044b625ca Btrees that aren't freespace management trees use the normal extent allocation and freeing routines for their blocks. Hence when a btree block is freed, a direct call to xfs_free_extent() is made and the extent is immediately freed. This puts the entire free space management btrees under this path, so we are stacking btrees on btrees in the call stack. The inobt, finobt and refcount btrees all do this. However, the bmap btree does not do this - it calls xfs_free_extent_later() to defer the extent free operation via an XEFI and hence it gets processed in deferred operation processing during the commit of the primary transaction (i.e. via intent chaining). We need to change xfs_free_extent() to behave in a non-blocking manner so that we can avoid deadlocks with busy extents near ENOSPC in transactions that free multiple extents. Inserting or removing a record from a btree can cause a multi-level tree merge operation and that will free multiple blocks from the btree in a single transaction. i.e. we can call xfs_free_extent() multiple times, and hence the btree manipulation transaction is vulnerable to this busy extent deadlock vector. To fix this, convert all the remaining callers of xfs_free_extent() to use xfs_free_extent_later() to queue XEFIs and hence defer processing of the extent frees to a context that can be safely restarted if a deadlock condition is detected. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Chandan Babu R <chandan.babu@oracle.com> Signed-off-by: Carlos Maiolino <cem@kernel.org>
-rw-r--r--libxfs/xfs_ag.c2
-rw-r--r--libxfs/xfs_alloc.c4
-rw-r--r--libxfs/xfs_alloc.h8
-rw-r--r--libxfs/xfs_bmap.c8
-rw-r--r--libxfs/xfs_bmap_btree.c3
-rw-r--r--libxfs/xfs_ialloc.c8
-rw-r--r--libxfs/xfs_ialloc_btree.c3
-rw-r--r--libxfs/xfs_refcount.c9
-rw-r--r--libxfs/xfs_refcount_btree.c8
9 files changed, 29 insertions, 24 deletions
diff --git a/libxfs/xfs_ag.c b/libxfs/xfs_ag.c
index c82afbb82a..75711cbaef 100644
--- a/libxfs/xfs_ag.c
+++ b/libxfs/xfs_ag.c
@@ -983,7 +983,7 @@ xfs_ag_shrink_space(
goto resv_err;
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
- true);
+ XFS_AG_RESV_NONE, true);
if (err2)
goto resv_err;
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index 107a849207..e5fd360cff 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -2445,6 +2445,7 @@ xfs_defer_agfl_block(
xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
xefi->xefi_blockcount = 1;
xefi->xefi_owner = oinfo->oi_owner;
+ xefi->xefi_agresv = XFS_AG_RESV_AGFL;
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
return -EFSCORRUPTED;
@@ -2466,6 +2467,7 @@ __xfs_free_extent_later(
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type,
bool skip_discard)
{
struct xfs_extent_free_item *xefi;
@@ -2486,6 +2488,7 @@ __xfs_free_extent_later(
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
#endif
ASSERT(xfs_extfree_item_cache != NULL);
+ ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
return -EFSCORRUPTED;
@@ -2494,6 +2497,7 @@ __xfs_free_extent_later(
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
xefi->xefi_blockcount = (xfs_extlen_t)len;
+ xefi->xefi_agresv = type;
if (skip_discard)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
if (oinfo) {
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index 85ac470be0..a3e519577e 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -232,7 +232,7 @@ xfs_buf_to_agfl_bno(
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
- bool skip_discard);
+ enum xfs_ag_resv_type type, bool skip_discard);
/*
* List of extents to be free "later".
@@ -245,6 +245,7 @@ struct xfs_extent_free_item {
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
struct xfs_perag *xefi_pag;
unsigned int xefi_flags;
+ enum xfs_ag_resv_type xefi_agresv;
};
void xfs_extent_free_get_group(struct xfs_mount *mp,
@@ -259,9 +260,10 @@ xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
- const struct xfs_owner_info *oinfo)
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
{
- return __xfs_free_extent_later(tp, bno, len, oinfo, false);
+ return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
}
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 5deeb47271..2bd23d40e7 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -567,7 +567,8 @@ xfs_bmap_btree_to_extents(
return error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
- error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
+ XFS_AG_RESV_NONE);
if (error)
return error;
@@ -5229,8 +5230,9 @@ xfs_bmap_del_extent_real(
} else {
error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
- (bflags & XFS_BMAPI_NODISCARD) ||
- del->br_state == XFS_EXT_UNWRITTEN);
+ XFS_AG_RESV_NONE,
+ ((bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN));
if (error)
goto done;
}
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index 751e8165b0..925cc153b2 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -269,7 +269,8 @@ xfs_bmbt_free_block(
int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
- error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
+ XFS_AG_RESV_NONE);
if (error)
return error;
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index 42d8863e08..1cb680d4f4 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -1848,8 +1848,8 @@ xfs_difree_inode_chunk(
/* not sparse, calculate extent info directly */
return xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, sagbno),
- M_IGEO(mp)->ialloc_blks,
- &XFS_RMAP_OINFO_INODES);
+ M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1894,8 +1894,8 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
error = xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &XFS_RMAP_OINFO_INODES);
+ XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+ &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
if (error)
return error;
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 560393b05d..0c5d7ba1ce 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -159,8 +159,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
- return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_INOBT, resv);
}
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
index 0006ad7c93..8f30dc196e 100644
--- a/libxfs/xfs_refcount.c
+++ b/libxfs/xfs_refcount.c
@@ -1151,7 +1151,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno,
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
- tmp.rc_blockcount, NULL);
+ tmp.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
goto out_error;
}
@@ -1212,7 +1213,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno,
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
- ext.rc_blockcount, NULL);
+ ext.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
goto out_error;
}
@@ -1980,7 +1982,8 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */
error = xfs_free_extent_later(tp, fsb,
- rr->rr_rrec.rc_blockcount, NULL);
+ rr->rr_rrec.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
goto out_trans;
diff --git a/libxfs/xfs_refcount_btree.c b/libxfs/xfs_refcount_btree.c
index 1205e29191..67cb59e335 100644
--- a/libxfs/xfs_refcount_btree.c
+++ b/libxfs/xfs_refcount_btree.c
@@ -105,19 +105,13 @@ xfs_refcountbt_free_block(
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
- int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
- error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
- if (error)
- return error;
-
- return error;
}
STATIC int