From 215b2bf72a05d702287fc45d854b4f5019f9aad1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Mar 2024 15:13:52 -0800 Subject: xfs: fix dev_t usage in xmbuf tracepoints Fix some inconsistencies in the xmbuf tracepoints -- they should be reporting the major/minor of the filesystem that they're associated with, so that we have some clue on whose behalf the xmbuf was created. Fix the xmbuf_free tracepoint to report the same. Don't call the trace function until the xmbuf is fully initialized. Fixes: 5076a6040ca1 ("xfs: support in-memory buffer cache target") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_buf_mem.c | 4 ++-- fs/xfs/xfs_trace.h | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 8ad38c64708ec5..9bb2d24de70941 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -81,8 +81,6 @@ xmbuf_alloc( /* ensure all writes are below EOF to avoid pagecache zeroing */ i_size_write(inode, inode->i_sb->s_maxbytes); - trace_xmbuf_create(btp); - error = xfs_buf_cache_init(btp->bt_cache); if (error) goto out_file; @@ -99,6 +97,8 @@ xmbuf_alloc( if (error) goto out_bcache; + trace_xmbuf_create(btp); + *btpp = btp; return 0; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 56b07d8ed431f2..aea97fc074f8de 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4626,6 +4626,7 @@ TRACE_EVENT(xmbuf_create, char *path; struct file *file = btp->bt_file; + __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; memset(pathname, 0, sizeof(pathname)); path = file_path(file, pathname, sizeof(pathname) - 1); @@ -4633,7 +4634,8 @@ TRACE_EVENT(xmbuf_create, path = "(unknown)"; strncpy(__entry->pathname, path, sizeof(__entry->pathname)); ), - TP_printk("xmino 0x%lx path '%s'", + TP_printk("dev %d:%d xmino 0x%lx path '%s'", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pathname) ); @@ -4642,6 +4644,7 @@ TRACE_EVENT(xmbuf_free, TP_PROTO(struct xfs_buftarg *btp), TP_ARGS(btp), TP_STRUCT__entry( + __field(dev_t, dev) __field(unsigned long, ino) __field(unsigned long long, bytes) __field(loff_t, size) @@ -4650,11 +4653,13 @@ TRACE_EVENT(xmbuf_free, struct file *file = btp->bt_file; struct inode *inode = file_inode(file); + __entry->dev = btp->bt_mount->m_super->s_dev; __entry->size = i_size_read(inode); __entry->bytes = (inode->i_blocks << SECTOR_SHIFT) + inode->i_bytes; __entry->ino = inode->i_ino; ), - TP_printk("xmino 0x%lx mem_bytes 0x%llx isize 0x%llx", + TP_printk("dev %d:%d xmino 0x%lx mem_bytes 0x%llx isize 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->bytes, __entry->size) -- cgit 1.2.3-korg From 0c6ca06aad84bac097f5c005d911db92dba3ae94 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 15 Mar 2024 12:16:39 +1100 Subject: xfs: quota radix tree allocations need to be NOFS on insert In converting the XFS code from GFP_NOFS to scoped contexts, we converted the quota radix tree to GFP_KERNEL. Unfortunately, it was not clearly documented that this set was because there is a dependency on the quotainfo->qi_tree_lock being taken in memory reclaim to remove dquots from the radix tree. In hindsight this is obvious, but the radix tree allocations on insert are not immediately obvious, and we avoid this for the inode cache radix trees by using preloading and hence completely avoiding the radix tree node allocation under tree lock constraints. Hence there are a few solutions here. The first is to reinstate GFP_NOFS for the radix tree and add a comment explaining why GFP_NOFS is used. The second is to use memalloc_nofs_save() on the radix tree insert context, which makes it obvious that the radix tree insert runs under GFP_NOFS constraints. The third option is to simply replace the radix tree and it's lock with an xarray which can do memory allocation safely in an insert context. The first is OK, but not really the direction we want to head. The second is my preferred short term solution. The third - converting XFS radix trees to xarray - is the longer term solution. Hence to fix the regression here, we take option 2 as it moves us in the direction we want to head with memory allocation and GFP_NOFS removal. Reported-by: syzbot+8fdff861a781522bda4d@syzkaller.appspotmail.com Reported-by: syzbot+d247769793ec169e4bf9@syzkaller.appspotmail.com Fixes: 94a69db2367e ("xfs: use __GFP_NOLOCKDEP instead of GFP_NOFS") Signed-off-by: Dave Chinner Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/xfs_dquot.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 30d36596a2e46a..c98cb468c35780 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -811,6 +811,12 @@ restart: * caller should throw away the dquot and start over. Otherwise, the dquot * is returned locked (and held by the cache) as if there had been a cache * hit. + * + * The insert needs to be done under memalloc_nofs context because the radix + * tree can do memory allocation during insert. The qi->qi_tree_lock is taken in + * memory reclaim when freeing unused dquots, so we cannot have the radix tree + * node allocation recursing into filesystem reclaim whilst we hold the + * qi_tree_lock. */ static int xfs_qm_dqget_cache_insert( @@ -820,25 +826,27 @@ xfs_qm_dqget_cache_insert( xfs_dqid_t id, struct xfs_dquot *dqp) { + unsigned int nofs_flags; int error; + nofs_flags = memalloc_nofs_save(); mutex_lock(&qi->qi_tree_lock); error = radix_tree_insert(tree, id, dqp); if (unlikely(error)) { /* Duplicate found! Caller must try again. */ - mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_dup(dqp); - return error; + goto out_unlock; } /* Return a locked dquot to the caller, with a reference taken. */ xfs_dqlock(dqp); dqp->q_nrefs = 1; - qi->qi_dquots++; - mutex_unlock(&qi->qi_tree_lock); - return 0; +out_unlock: + mutex_unlock(&qi->qi_tree_lock); + memalloc_nofs_restore(nofs_flags); + return error; } /* Check our input parameters. */ -- cgit 1.2.3-korg