aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-02-21 09:34:34 +0100
committerChristian Brauner <brauner@kernel.org>2024-02-22 10:03:26 +0100
commit4af6ccb4694482d98903d1dfa16867d1cbaa6b6a (patch)
tree4c4365c561dc38bf59be8268fd7cf7b936489b8e
parentbae8bc46987ed8b9e8d00d0a87ac698a85d15904 (diff)
parent0e4a862174f2a8d1653a8a9cf0815020e1d3af24 (diff)
downloadlinux-4af6ccb4694482d98903d1dfa16867d1cbaa6b6a.tar.gz
Merge series 'Use Maple Trees for simple_offset utilities' of https://lore.kernel.org/r/170820083431.6328.16233178852085891453.stgit@91.116.238.104.host.secureserver.net
Pull simple offset series from Chuck Lever In an effort to address slab fragmentation issues reported a few months ago, I've replaced the use of xarrays for the directory offset map in "simple" file systems (including tmpfs). Thanks to Liam Howlett for helping me get this working with Maple Trees. * series 'Use Maple Trees for simple_offset utilities' of https://lore.kernel.org/r/170820083431.6328.16233178852085891453.stgit@91.116.238.104.host.secureserver.net: (6 commits) libfs: Convert simple directory offsets to use a Maple Tree test_maple_tree: testing the cyclic allocation maple_tree: Add mtree_alloc_cyclic() libfs: Add simple_offset_empty() libfs: Define a minimum directory offset libfs: Re-arrange locking in offset_iterate_dir() Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/libfs.c96
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/maple_tree.h7
-rw-r--r--lib/maple_tree.c93
-rw-r--r--lib/test_maple_tree.c44
-rw-r--r--mm/shmem.c4
6 files changed, 215 insertions, 35 deletions
diff --git a/fs/libfs.c b/fs/libfs.c
index 6fb8244b259e8..6182e2b030944 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -240,17 +240,22 @@ const struct inode_operations simple_dir_inode_operations = {
};
EXPORT_SYMBOL(simple_dir_inode_operations);
-static void offset_set(struct dentry *dentry, u32 offset)
+/* 0 is '.', 1 is '..', so always start with offset 2 or more */
+enum {
+ DIR_OFFSET_MIN = 2,
+};
+
+static void offset_set(struct dentry *dentry, long offset)
{
- dentry->d_fsdata = (void *)((uintptr_t)(offset));
+ dentry->d_fsdata = (void *)offset;
}
-static u32 dentry2offset(struct dentry *dentry)
+static long dentry2offset(struct dentry *dentry)
{
- return (u32)((uintptr_t)(dentry->d_fsdata));
+ return (long)dentry->d_fsdata;
}
-static struct lock_class_key simple_offset_xa_lock;
+static struct lock_class_key simple_offset_lock_class;
/**
* simple_offset_init - initialize an offset_ctx
@@ -259,11 +264,9 @@ static struct lock_class_key simple_offset_xa_lock;
*/
void simple_offset_init(struct offset_ctx *octx)
{
- xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
- lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
-
- /* 0 is '.', 1 is '..', so always start with offset 2 */
- octx->next_offset = 2;
+ mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
+ lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
+ octx->next_offset = DIR_OFFSET_MIN;
}
/**
@@ -271,20 +274,19 @@ void simple_offset_init(struct offset_ctx *octx)
* @octx: directory offset ctx to be updated
* @dentry: new dentry being added
*
- * Returns zero on success. @so_ctx and the dentry offset are updated.
+ * Returns zero on success. @octx and the dentry's offset are updated.
* Otherwise, a negative errno value is returned.
*/
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
{
- static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
- u32 offset;
+ unsigned long offset;
int ret;
if (dentry2offset(dentry) != 0)
return -EBUSY;
- ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
- &octx->next_offset, GFP_KERNEL);
+ ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
+ LONG_MAX, &octx->next_offset, GFP_KERNEL);
if (ret < 0)
return ret;
@@ -300,17 +302,49 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
*/
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
{
- u32 offset;
+ long offset;
offset = dentry2offset(dentry);
if (offset == 0)
return;
- xa_erase(&octx->xa, offset);
+ mtree_erase(&octx->mt, offset);
offset_set(dentry, 0);
}
/**
+ * simple_offset_empty - Check if a dentry can be unlinked
+ * @dentry: dentry to be tested
+ *
+ * Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
+ */
+int simple_offset_empty(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct offset_ctx *octx;
+ struct dentry *child;
+ unsigned long index;
+ int ret = 1;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return ret;
+
+ index = DIR_OFFSET_MIN;
+ octx = inode->i_op->get_offset_ctx(inode);
+ mt_for_each(&octx->mt, child, index, LONG_MAX) {
+ spin_lock(&child->d_lock);
+ if (simple_positive(child)) {
+ spin_unlock(&child->d_lock);
+ ret = 0;
+ break;
+ }
+ spin_unlock(&child->d_lock);
+ }
+
+ return ret;
+}
+
+/**
* simple_offset_rename_exchange - exchange rename with directory offsets
* @old_dir: parent of dentry being moved
* @old_dentry: dentry being moved
@@ -327,8 +361,8 @@ int simple_offset_rename_exchange(struct inode *old_dir,
{
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
- u32 old_index = dentry2offset(old_dentry);
- u32 new_index = dentry2offset(new_dentry);
+ long old_index = dentry2offset(old_dentry);
+ long new_index = dentry2offset(new_dentry);
int ret;
simple_offset_remove(old_ctx, old_dentry);
@@ -354,9 +388,9 @@ int simple_offset_rename_exchange(struct inode *old_dir,
out_restore:
offset_set(old_dentry, old_index);
- xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
+ mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL);
offset_set(new_dentry, new_index);
- xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
+ mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
return ret;
}
@@ -369,7 +403,7 @@ out_restore:
*/
void simple_offset_destroy(struct offset_ctx *octx)
{
- xa_destroy(&octx->xa);
+ mtree_destroy(&octx->mt);
}
/**
@@ -399,15 +433,16 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
/* In this case, ->private_data is protected by f_pos_lock */
file->private_data = NULL;
- return vfs_setpos(file, offset, U32_MAX);
+ return vfs_setpos(file, offset, LONG_MAX);
}
-static struct dentry *offset_find_next(struct xa_state *xas)
+static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
{
+ MA_STATE(mas, &octx->mt, offset, offset);
struct dentry *child, *found = NULL;
rcu_read_lock();
- child = xas_next_entry(xas, U32_MAX);
+ child = mas_find(&mas, LONG_MAX);
if (!child)
goto out;
spin_lock(&child->d_lock);
@@ -421,8 +456,8 @@ out:
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
- u32 offset = dentry2offset(dentry);
struct inode *inode = d_inode(dentry);
+ long offset = dentry2offset(dentry);
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
@@ -430,12 +465,11 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
{
- struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
- XA_STATE(xas, &so_ctx->xa, ctx->pos);
+ struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
struct dentry *dentry;
while (true) {
- dentry = offset_find_next(&xas);
+ dentry = offset_find_next(octx, ctx->pos);
if (!dentry)
return ERR_PTR(-ENOENT);
@@ -444,8 +478,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
break;
}
+ ctx->pos = dentry2offset(dentry) + 1;
dput(dentry);
- ctx->pos = xas.xa_index + 1;
}
return NULL;
}
@@ -481,7 +515,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
return 0;
/* In this case, ->private_data is protected by f_pos_lock */
- if (ctx->pos == 2)
+ if (ctx->pos == DIR_OFFSET_MIN)
file->private_data = NULL;
else if (file->private_data == ERR_PTR(-ENOENT))
return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2e07cbbf92e3d..d15d808e8e316 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -43,6 +43,7 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
+#include <linux/maple_tree.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -3288,13 +3289,14 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count);
struct offset_ctx {
- struct xarray xa;
- u32 next_offset;
+ struct maple_tree mt;
+ unsigned long next_offset;
};
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index b3d63123b945b..a53ad4dabd7e8 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -171,6 +171,7 @@ enum maple_type {
#define MT_FLAGS_LOCK_IRQ 0x100
#define MT_FLAGS_LOCK_BH 0x200
#define MT_FLAGS_LOCK_EXTERN 0x300
+#define MT_FLAGS_ALLOC_WRAPPED 0x0800
#define MAPLE_HEIGHT_MAX 31
@@ -319,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first,
int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp);
+int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp);
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp);
@@ -499,6 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max);
void *mas_find_rev(struct ma_state *mas, unsigned long min);
void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
+int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp);
bool mas_nomem(struct ma_state *mas, gfp_t gfp);
void mas_pause(struct ma_state *mas);
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 6f241bb387992..af09702887272 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4290,6 +4290,56 @@ exists:
}
+/**
+ * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
+ * @mas: The maple state.
+ * @startp: Pointer to ID.
+ * @range_lo: Lower bound of range to search.
+ * @range_hi: Upper bound of range to search.
+ * @entry: The entry to store.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Return: 0 if the allocation succeeded without wrapping, 1 if the
+ * allocation succeeded after wrapping, or -EBUSY if there are no
+ * free entries.
+ */
+int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp)
+{
+ unsigned long min = range_lo;
+ int ret = 0;
+
+ range_lo = max(min, *next);
+ ret = mas_empty_area(mas, range_lo, range_hi, 1);
+ if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
+ mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
+ ret = 1;
+ }
+ if (ret < 0 && range_lo > min) {
+ ret = mas_empty_area(mas, min, range_hi, 1);
+ if (ret == 0)
+ ret = 1;
+ }
+ if (ret < 0)
+ return ret;
+
+ do {
+ mas_insert(mas, entry);
+ } while (mas_nomem(mas, gfp));
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ *startp = mas->index;
+ *next = *startp + 1;
+ if (*next == 0)
+ mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
+
+ return ret;
+}
+EXPORT_SYMBOL(mas_alloc_cyclic);
+
static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
{
retry:
@@ -6443,6 +6493,49 @@ unlock:
}
EXPORT_SYMBOL(mtree_alloc_range);
+/**
+ * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
+ * @mt: The maple tree.
+ * @startp: Pointer to ID.
+ * @range_lo: Lower bound of range to search.
+ * @range_hi: Upper bound of range to search.
+ * @entry: The entry to store.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Finds an empty entry in @mt after @next, stores the new index into
+ * the @id pointer, stores the entry at that index, then updates @next.
+ *
+ * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
+ *
+ * Context: Any context. Takes and releases the mt.lock. May sleep if
+ * the @gfp flags permit.
+ *
+ * Return: 0 if the allocation succeeded without wrapping, 1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
+ * free entries.
+ */
+int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp)
+{
+ int ret;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ if (!mt_is_alloc(mt))
+ return -EINVAL;
+ if (WARN_ON_ONCE(mt_is_reserved(entry)))
+ return -EINVAL;
+ mtree_lock(mt);
+ ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
+ next, gfp);
+ mtree_unlock(mt);
+ return ret;
+}
+EXPORT_SYMBOL(mtree_alloc_cyclic);
+
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp)
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 29185ac5c727f..399380db449cd 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -3599,6 +3599,45 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
mas_unlock(&mas);
}
+static noinline void __init alloc_cyclic_testing(struct maple_tree *mt)
+{
+ unsigned long location;
+ unsigned long next;
+ int ret = 0;
+ MA_STATE(mas, mt, 0, 0);
+
+ next = 0;
+ mtree_lock(mt);
+ for (int i = 0; i < 100; i++) {
+ mas_alloc_cyclic(&mas, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MAS_BUG_ON(&mas, i != location - 2);
+ MAS_BUG_ON(&mas, mas.index != location);
+ MAS_BUG_ON(&mas, mas.last != location);
+ MAS_BUG_ON(&mas, i != next - 3);
+ }
+
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+ next = 0;
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (int i = 0; i < 100; i++) {
+ mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, i != location - 2);
+ MT_BUG_ON(mt, i != next - 3);
+ MT_BUG_ON(mt, mtree_load(mt, location) != mt);
+ }
+
+ mtree_destroy(mt);
+ /* Overflow test */
+ next = ULONG_MAX - 1;
+ ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, ret != 0);
+ ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, ret != 0);
+ ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, ret != 1);
+}
+
static DEFINE_MTREE(tree);
static int __init maple_tree_seed(void)
{
@@ -3880,6 +3919,11 @@ static int __init maple_tree_seed(void)
check_state_handling(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ alloc_cyclic_testing(&tree);
+ mtree_destroy(&tree);
+
+
#if defined(BENCH)
skip:
#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index d7c84ff621860..6fed524343cb2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3374,7 +3374,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
- if (!simple_empty(dentry))
+ if (!simple_offset_empty(dentry))
return -ENOTEMPTY;
drop_nlink(d_inode(dentry));
@@ -3431,7 +3431,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return simple_offset_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
- if (!simple_empty(new_dentry))
+ if (!simple_offset_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {