aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-12-12 11:06:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-12-12 11:06:41 -0800
commiteaadbbaaff74ac9a7f84f412fbaac221a04896c1 (patch)
tree985f25e3d532535549890c1c669ccaed415141c7
parent8b8cd4beea4f6c68092736c544a797dcd5e094c5 (diff)
parent3f29f1c336c0e8a4bec52f1e5217f88835553e5b (diff)
downloadlinux-eaadbbaaff74ac9a7f84f412fbaac221a04896c1.tar.gz
Merge tag 'fuse-fixes-6.7-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse fixes from Miklos Szeredi: - Fix a couple of potential crashes, one introduced in 6.6 and one in 5.10 - Fix misbehavior of virtiofs submounts on memory pressure - Clarify naming in the uAPI for a recent feature * tag 'fuse-fixes-6.7-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: disable FOPEN_PARALLEL_DIRECT_WRITES with FUSE_DIRECT_IO_ALLOW_MMAP fuse: dax: set fc->dax to NULL in fuse_dax_conn_free() fuse: share lookup state between submount and its parent docs/fuse-io: Document the usage of DIRECT_IO_ALLOW_MMAP fuse: Rename DIRECT_IO_RELAX to DIRECT_IO_ALLOW_MMAP
-rw-r--r--Documentation/filesystems/fuse-io.rst3
-rw-r--r--fs/fuse/dax.c1
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/fuse_i.h19
-rw-r--r--fs/fuse/inode.c81
-rw-r--r--include/uapi/linux/fuse.h10
6 files changed, 106 insertions, 16 deletions
diff --git a/Documentation/filesystems/fuse-io.rst b/Documentation/filesystems/fuse-io.rst
index 255a368fe534b4..6464de4266ad50 100644
--- a/Documentation/filesystems/fuse-io.rst
+++ b/Documentation/filesystems/fuse-io.rst
@@ -15,7 +15,8 @@ The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the
FUSE_OPEN reply.
In direct-io mode the page cache is completely bypassed for reads and writes.
-No read-ahead takes place. Shared mmap is disabled.
+No read-ahead takes place. Shared mmap is disabled by default. To allow shared
+mmap, the FUSE_DIRECT_IO_ALLOW_MMAP flag may be enabled in the FUSE_INIT reply.
In cached mode reads may be satisfied from the page cache, and data may be
read-ahead by the kernel to fill the cache. The cache is always kept consistent
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 23904a6a9a96f7..12ef91d170bb30 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1222,6 +1222,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc)
if (fc->dax) {
fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
kfree(fc->dax);
+ fc->dax = NULL;
}
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1cdb6327511ef8..a660f1f21540ab 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1448,7 +1448,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (!ia)
return -ENOMEM;
- if (fopen_direct_io && fc->direct_io_relax) {
+ if (fopen_direct_io && fc->direct_io_allow_mmap) {
res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
if (res) {
fuse_io_free(ia);
@@ -1574,6 +1574,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t res;
bool exclusive_lock =
!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
+ get_fuse_conn(inode)->direct_io_allow_mmap ||
iocb->ki_flags & IOCB_APPEND ||
fuse_direct_write_extending_i_size(iocb, from);
@@ -1581,6 +1582,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
* Take exclusive lock if
* - Parallel direct writes are disabled - a user space decision
* - Parallel direct writes are enabled and i_size is being extended.
+ * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP).
* This might not be needed at all, but needs further investigation.
*/
if (exclusive_lock)
@@ -2466,9 +2468,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
if (ff->open_flags & FOPEN_DIRECT_IO) {
/* Can't provide the coherency needed for MAP_SHARED
- * if FUSE_DIRECT_IO_RELAX isn't set.
+ * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set.
*/
- if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax)
+ if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap)
return -ENODEV;
invalidate_inode_pages2(file->f_mapping);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6e6e721f421b9d..1df83eebda9277 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -63,6 +63,19 @@ struct fuse_forget_link {
struct fuse_forget_link *next;
};
+/* Submount lookup tracking */
+struct fuse_submount_lookup {
+ /** Refcount */
+ refcount_t count;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_forget_link *forget;
+};
+
/** FUSE inode */
struct fuse_inode {
/** Inode data */
@@ -158,6 +171,8 @@ struct fuse_inode {
*/
struct fuse_inode_dax *dax;
#endif
+ /** Submount specific lookup tracking */
+ struct fuse_submount_lookup *submount_lookup;
};
/** FUSE inode state bits */
@@ -797,8 +812,8 @@ struct fuse_conn {
/* Is tmpfile not implemented by fs? */
unsigned int no_tmpfile:1;
- /* relax restrictions in FOPEN_DIRECT_IO mode */
- unsigned int direct_io_relax:1;
+ /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */
+ unsigned int direct_io_allow_mmap:1;
/* Is statx not implemented by fs? */
unsigned int no_statx:1;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 74d4f09d5827e8..2a6d44f91729bb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -68,6 +68,24 @@ struct fuse_forget_link *fuse_alloc_forget(void)
return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
}
+static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
+{
+ struct fuse_submount_lookup *sl;
+
+ sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
+ if (!sl)
+ return NULL;
+ sl->forget = fuse_alloc_forget();
+ if (!sl->forget)
+ goto out_free;
+
+ return sl;
+
+out_free:
+ kfree(sl);
+ return NULL;
+}
+
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
struct fuse_inode *fi;
@@ -83,6 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->attr_version = 0;
fi->orig_ino = 0;
fi->state = 0;
+ fi->submount_lookup = NULL;
mutex_init(&fi->mutex);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
@@ -113,6 +132,17 @@ static void fuse_free_inode(struct inode *inode)
kmem_cache_free(fuse_inode_cachep, fi);
}
+static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
+ struct fuse_submount_lookup *sl)
+{
+ if (!refcount_dec_and_test(&sl->count))
+ return;
+
+ fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
+ sl->forget = NULL;
+ kfree(sl);
+}
+
static void fuse_evict_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -132,6 +162,11 @@ static void fuse_evict_inode(struct inode *inode)
fi->nlookup);
fi->forget = NULL;
}
+
+ if (fi->submount_lookup) {
+ fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
+ fi->submount_lookup = NULL;
+ }
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(!list_empty(&fi->write_files));
@@ -330,6 +365,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
+static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
+ u64 nodeid)
+{
+ sl->nodeid = nodeid;
+ refcount_set(&sl->count, 1);
+}
+
static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
struct fuse_conn *fc)
{
@@ -392,12 +434,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
*/
if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
S_ISDIR(attr->mode)) {
+ struct fuse_inode *fi;
+
inode = new_inode(sb);
if (!inode)
return NULL;
fuse_init_inode(inode, attr, fc);
- get_fuse_inode(inode)->nodeid = nodeid;
+ fi = get_fuse_inode(inode);
+ fi->nodeid = nodeid;
+ fi->submount_lookup = fuse_alloc_submount_lookup();
+ if (!fi->submount_lookup) {
+ iput(inode);
+ return NULL;
+ }
+ /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
+ fuse_init_submount_lookup(fi->submount_lookup, nodeid);
inode->i_flags |= S_AUTOMOUNT;
goto done;
}
@@ -420,11 +472,11 @@ retry:
iput(inode);
goto retry;
}
-done:
fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
fi->nlookup++;
spin_unlock(&fi->lock);
+done:
fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
return inode;
@@ -1230,8 +1282,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->init_security = 1;
if (flags & FUSE_CREATE_SUPP_GROUP)
fc->create_supp_group = 1;
- if (flags & FUSE_DIRECT_IO_RELAX)
- fc->direct_io_relax = 1;
+ if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
+ fc->direct_io_allow_mmap = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -1278,7 +1330,7 @@ void fuse_send_init(struct fuse_mount *fm)
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
- FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX;
+ FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
flags |= FUSE_MAP_ALIGNMENT;
@@ -1465,6 +1517,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
struct super_block *parent_sb = parent_fi->inode.i_sb;
struct fuse_attr root_attr;
struct inode *root;
+ struct fuse_submount_lookup *sl;
+ struct fuse_inode *fi;
fuse_sb_defaults(sb);
fm->sb = sb;
@@ -1487,12 +1541,27 @@ static int fuse_fill_super_submount(struct super_block *sb,
* its nlookup should not be incremented. fuse_iget() does
* that, though, so undo it here.
*/
- get_fuse_inode(root)->nlookup--;
+ fi = get_fuse_inode(root);
+ fi->nlookup--;
+
sb->s_d_op = &fuse_dentry_operations;
sb->s_root = d_make_root(root);
if (!sb->s_root)
return -ENOMEM;
+ /*
+ * Grab the parent's submount_lookup pointer and take a
+ * reference on the shared nlookup from the parent. This is to
+ * prevent the last forget for this nodeid from getting
+ * triggered until all users have finished with it.
+ */
+ sl = parent_fi->submount_lookup;
+ WARN_ON(!sl);
+ if (sl) {
+ refcount_inc(&sl->count);
+ fi->submount_lookup = sl;
+ }
+
return 0;
}
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index db92a7202b342b..e7418d15fe3906 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -209,7 +209,7 @@
* - add FUSE_HAS_EXPIRE_ONLY
*
* 7.39
- * - add FUSE_DIRECT_IO_RELAX
+ * - add FUSE_DIRECT_IO_ALLOW_MMAP
* - add FUSE_STATX and related structures
*/
@@ -409,8 +409,7 @@ struct fuse_file_lock {
* FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
* symlink and mknod (single group that matches parent)
* FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
- * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
- * allow shared mmap
+ * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode.
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -449,7 +448,10 @@ struct fuse_file_lock {
#define FUSE_HAS_INODE_DAX (1ULL << 33)
#define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35)
-#define FUSE_DIRECT_IO_RELAX (1ULL << 36)
+#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36)
+
+/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
+#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
/**
* CUSE INIT request/reply flags