aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-11-02 14:39:01 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-11-02 14:39:01 -0400
commit0a08ddf78c9cf4b6671ba64b049c37da64233f4f (patch)
tree5c373e315fd49da181fdf0b06ff6ba0fffe5dccf
parentd320a4e927fd706b34c714b77130965a385ea4fb (diff)
downloadbcachefs-tools-0a08ddf78c9cf4b6671ba64b049c37da64233f4f.tar.gz
Update bcachefs sources to b9bd69421f73 bcachefs: x-macro-ify inode flags enum
-rw-r--r--.bcachefs_revision2
-rw-r--r--include/linux/generic-radix-tree.h4
-rw-r--r--libbcachefs/alloc_background.c11
-rw-r--r--libbcachefs/alloc_background.h1
-rw-r--r--libbcachefs/alloc_foreground.c38
-rw-r--r--libbcachefs/bcachefs_format.h56
-rw-r--r--libbcachefs/btree_iter.c51
-rw-r--r--libbcachefs/btree_iter.h4
-rw-r--r--libbcachefs/btree_locking.c6
-rw-r--r--libbcachefs/btree_trans_commit.c6
-rw-r--r--libbcachefs/btree_types.h1
-rw-r--r--libbcachefs/btree_update_interior.c2
-rw-r--r--libbcachefs/btree_update_interior.h2
-rw-r--r--libbcachefs/darray.h6
-rw-r--r--libbcachefs/fs-common.c2
-rw-r--r--libbcachefs/fs-ioctl.c4
-rw-r--r--libbcachefs/fs-ioctl.h28
-rw-r--r--libbcachefs/fs.c24
-rw-r--r--libbcachefs/fsck.c34
-rw-r--r--libbcachefs/inode.c36
-rw-r--r--libbcachefs/inode.h6
-rw-r--r--libbcachefs/io_write.c2
-rw-r--r--libbcachefs/move.c13
-rw-r--r--libbcachefs/move.h1
-rw-r--r--libbcachefs/movinggc.c45
-rw-r--r--libbcachefs/rebalance.c12
-rw-r--r--libbcachefs/recovery.c2
-rw-r--r--libbcachefs/reflink.c2
-rw-r--r--libbcachefs/sb-errors.c5
-rw-r--r--libbcachefs/sb-members.c19
-rw-r--r--libbcachefs/sb-members.h11
-rw-r--r--libbcachefs/six.c113
-rw-r--r--libbcachefs/six.h2
-rw-r--r--libbcachefs/super-io.c2
-rw-r--r--libbcachefs/super.c60
-rw-r--r--libbcachefs/util.c18
-rw-r--r--libbcachefs/util.h21
-rw-r--r--linux/closure.c5
38 files changed, 348 insertions, 309 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 0476cc0e..f7786253 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-6628827a87075d3f807c974045ed293ac1e8965b
+b9bd69421f7364ca4ff11c827fd0e171a8b826ea
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index c74b7376..84741316 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -191,8 +191,8 @@ void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *,
size_t, size_t);
/**
- * genradix_iter_peek - get first entry at or below iterator's current
- * position
+ * genradix_iter_peek_prev - get first entry at or below iterator's current
+ * position
* @_iter: a genradix_iter
* @_radix: genradix being iterated over
*
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index c342ec3b..bcfae916 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -2085,6 +2085,17 @@ void bch2_recalc_capacity(struct bch_fs *c)
closure_wake_up(&c->freelist_wait);
}
+u64 bch2_min_rw_member_capacity(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i;
+ u64 ret = U64_MAX;
+
+ for_each_rw_member(ca, c, i)
+ ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
+ return ret;
+}
+
static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
{
struct open_bucket *ob;
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index e1ce38ef..73faf99a 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
int bch2_fs_freespace_init(struct bch_fs *);
void bch2_recalc_capacity(struct bch_fs *);
+u64 bch2_min_rw_member_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 3bc4abd3..b85c7765 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -399,12 +399,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
struct bucket_alloc_state *s,
struct closure *cl)
{
- struct btree_iter iter;
- struct bkey_s_c k;
+ struct btree_iter iter, citer;
+ struct bkey_s_c k, ck;
struct open_bucket *ob = NULL;
- u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
- u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
+ u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+ u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
+ u64 alloc_cursor = alloc_start;
int ret;
+
+ /*
+ * Scan with an uncached iterator to avoid polluting the key cache. An
+ * uncached iter will return a cached key if one exists, but if not
+ * there is no other underlying protection for the associated key cache
+ * slot. To avoid racing bucket allocations, look up the cached key slot
+ * of any likely allocation candidate before attempting to proceed with
+ * the allocation. This provides proper exclusion on the associated
+ * bucket.
+ */
again:
for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
BTREE_ITER_SLOTS, k, ret) {
@@ -419,25 +430,38 @@ again:
continue;
a = bch2_alloc_to_v4(k, &a_convert);
-
if (a->data_type != BCH_DATA_free)
continue;
+ /* now check the cached key to serialize concurrent allocs of the bucket */
+ ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
+ ret = bkey_err(ck);
+ if (ret)
+ break;
+
+ a = bch2_alloc_to_v4(ck, &a_convert);
+ if (a->data_type != BCH_DATA_free)
+ goto next;
+
s->buckets_seen++;
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+next:
+ citer.path->preserve = false;
+ bch2_trans_iter_exit(trans, &citer);
if (ob)
break;
}
bch2_trans_iter_exit(trans, &iter);
+ alloc_cursor = iter.pos.offset;
ca->alloc_cursor = alloc_cursor;
if (!ob && ret)
ob = ERR_PTR(ret);
- if (!ob && alloc_cursor > alloc_start) {
- alloc_cursor = alloc_start;
+ if (!ob && alloc_start > first_bucket) {
+ alloc_cursor = alloc_start = first_bucket;
goto again;
}
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 29b000c6..7a1c2440 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -824,34 +824,30 @@ enum inode_opt_id {
Inode_opt_nr,
};
-enum {
- /*
- * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
- * flags)
- */
- __BCH_INODE_SYNC = 0,
- __BCH_INODE_IMMUTABLE = 1,
- __BCH_INODE_APPEND = 2,
- __BCH_INODE_NODUMP = 3,
- __BCH_INODE_NOATIME = 4,
-
- __BCH_INODE_I_SIZE_DIRTY = 5, /* obsolete */
- __BCH_INODE_I_SECTORS_DIRTY = 6, /* obsolete */
- __BCH_INODE_UNLINKED = 7,
- __BCH_INODE_BACKPTR_UNTRUSTED = 8,
-
- /* bits 20+ reserved for packed fields below: */
-};
-
-#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC)
-#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE)
-#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND)
-#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP)
-#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
-#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
+#define BCH_INODE_FLAGS() \
+ x(sync, 0) \
+ x(immutable, 1) \
+ x(append, 2) \
+ x(nodump, 3) \
+ x(noatime, 4) \
+ x(i_size_dirty, 5) \
+ x(i_sectors_dirty, 6) \
+ x(unlinked, 7) \
+ x(backptr_untrusted, 8)
+
+/* bits 20+ reserved for packed fields below: */
+
+enum bch_inode_flags {
+#define x(t, n) BCH_INODE_##t = 1U << n,
+ BCH_INODE_FLAGS()
+#undef x
+};
+
+enum __bch_inode_flags {
+#define x(t, n) __BCH_INODE_##t = n,
+ BCH_INODE_FLAGS()
+#undef x
+};
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
@@ -1617,9 +1613,7 @@ struct journal_seq_blacklist_entry {
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
-
- struct journal_seq_blacklist_entry start[0];
- __u64 _data[];
+ struct journal_seq_blacklist_entry start[];
};
struct bch_sb_field_errors {
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index feba9a31..ba392eb0 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
if (unlikely(ret))
goto out;
+ if (unlikely(!trans->srcu_held))
+ bch2_trans_srcu_lock(trans);
+
/*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart:
@@ -2830,18 +2833,35 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p;
}
-static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+static inline void check_srcu_held_too_long(struct btree_trans *trans)
{
- struct bch_fs *c = trans->c;
- struct btree_path *path;
+ WARN(time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+ "btree trans held srcu lock (delaying memory reclaim) by more than 10 seconds");
+}
- trans_for_each_path(trans, path)
- if (path->cached && !btree_node_locked(path, 0))
- path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+void bch2_trans_srcu_unlock(struct btree_trans *trans)
+{
+ if (trans->srcu_held) {
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
- srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
- trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
- trans->srcu_lock_time = jiffies;
+ trans_for_each_path(trans, path)
+ if (path->cached && !btree_node_locked(path, 0))
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+ check_srcu_held_too_long(trans);
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ trans->srcu_held = false;
+ }
+}
+
+void bch2_trans_srcu_lock(struct btree_trans *trans)
+{
+ if (!trans->srcu_held) {
+ trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
+ trans->srcu_held = true;
+ }
}
/**
@@ -2895,8 +2915,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
}
trans->last_begin_time = now;
- if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
- bch2_trans_reset_srcu_lock(trans);
+ if (unlikely(trans->srcu_held &&
+ time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
+ bch2_trans_srcu_unlock(trans);
trans->last_begin_ip = _RET_IP_;
if (trans->restarted) {
@@ -2983,8 +3004,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->wb_updates_size = s->wb_updates_size;
}
- trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
+ trans->srcu_held = true;
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
struct btree_trans *pos;
@@ -3061,7 +3083,10 @@ void bch2_trans_put(struct btree_trans *trans)
check_btree_paths_leaked(trans);
- srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ if (trans->srcu_held) {
+ check_srcu_held_too_long(trans);
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ }
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 70759ee3..5e103f51 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
int bch2_trans_relock(struct btree_trans *);
int bch2_trans_relock_notrace(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
+void bch2_trans_unlock_long(struct btree_trans *);
bool bch2_trans_locked(struct btree_trans *);
static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
__bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \
KEY_TYPE_##_type, sizeof(*_val), _val)
+void bch2_trans_srcu_unlock(struct btree_trans *);
+void bch2_trans_srcu_lock(struct btree_trans *);
+
u32 bch2_trans_begin(struct btree_trans *);
/*
diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c
index ba263302..c4266835 100644
--- a/libbcachefs/btree_locking.c
+++ b/libbcachefs/btree_locking.c
@@ -759,6 +759,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
bch2_assert_btree_nodes_not_locked();
}
+void bch2_trans_unlock_long(struct btree_trans *trans)
+{
+ bch2_trans_unlock(trans);
+ bch2_trans_srcu_unlock(trans);
+}
+
bool bch2_trans_locked(struct btree_trans *trans)
{
struct btree_path *path;
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 8140b6e6..32693f7c 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -681,7 +681,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
BCH_JSET_ENTRY_overwrite,
i->btree_id, i->level,
i->old_k.u64s);
- bkey_reassemble(&entry->start[0],
+ bkey_reassemble((struct bkey_i *) entry->start,
(struct bkey_s_c) { &i->old_k, i->old_v });
}
@@ -689,7 +689,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
BCH_JSET_ENTRY_btree_keys,
i->btree_id, i->level,
i->k->k.u64s);
- bkey_copy(&entry->start[0], i->k);
+ bkey_copy((struct bkey_i *) entry->start, i->k);
}
trans_for_each_wb_update(trans, wb) {
@@ -697,7 +697,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
BCH_JSET_ENTRY_btree_keys,
wb->btree, 0,
wb->k.k.u64s);
- bkey_copy(&entry->start[0], &wb->k);
+ bkey_copy((struct bkey_i *) entry->start, &wb->k);
}
if (trans->journal_seq)
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index cbcb04a4..4b9cc61a 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -432,6 +432,7 @@ struct btree_trans {
u8 nr_updates;
u8 nr_wb_updates;
u8 wb_updates_size;
+ bool srcu_held:1;
bool used_mempool:1;
bool in_traverse_all:1;
bool paths_sorted:1;
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index d029e034..89ada89e 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -2411,7 +2411,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
r->level = entry->level;
r->alive = true;
- bkey_copy(&r->key, &entry->start[0]);
+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
mutex_unlock(&c->btree_root_lock);
}
diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h
index 5e0a467f..d92b3cf5 100644
--- a/libbcachefs/btree_update_interior.h
+++ b/libbcachefs/btree_update_interior.h
@@ -271,7 +271,7 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
struct btree_node_entry *bne = max(write_block(b),
(void *) btree_bkey_last(b, bset_tree_last(b)));
ssize_t remaining_space =
- __bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
+ __bch_btree_u64s_remaining(c, b, bne->keys.start);
if (unlikely(bset_written(b, bset(b, t)))) {
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h
index 114f86b4..87b4b2d1 100644
--- a/libbcachefs/darray.h
+++ b/libbcachefs/darray.h
@@ -69,9 +69,15 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
_ret; \
})
+#define darray_remove_item(_d, _pos) \
+ array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
+
#define darray_for_each(_d, _i) \
for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++)
+#define darray_for_each_reverse(_d, _i) \
+ for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
+
#define darray_init(_d) \
do { \
(_d)->data = NULL; \
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index bb530544..4496cf91 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -51,7 +51,7 @@ int bch2_create_trans(struct btree_trans *trans,
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
if (flags & BCH_CREATE_TMPFILE)
- new_inode->bi_flags |= BCH_INODE_UNLINKED;
+ new_inode->bi_flags |= BCH_INODE_unlinked;
ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
if (ret)
diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c
index 6040bd3f..5a39bcb5 100644
--- a/libbcachefs/fs-ioctl.c
+++ b/libbcachefs/fs-ioctl.c
@@ -45,13 +45,13 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
unsigned newflags = s->flags;
unsigned oldflags = bi->bi_flags & s->mask;
- if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
+ if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
if (!S_ISREG(bi->bi_mode) &&
!S_ISDIR(bi->bi_mode) &&
- (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
+ (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
return -EINVAL;
if (s->set_projinherit) {
diff --git a/libbcachefs/fs-ioctl.h b/libbcachefs/fs-ioctl.h
index 54a9c21a..d30f9bb0 100644
--- a/libbcachefs/fs-ioctl.h
+++ b/libbcachefs/fs-ioctl.h
@@ -6,28 +6,28 @@
/* bcachefs inode flags -> vfs inode flags: */
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
- [__BCH_INODE_SYNC] = S_SYNC,
- [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
- [__BCH_INODE_APPEND] = S_APPEND,
- [__BCH_INODE_NOATIME] = S_NOATIME,
+ [__BCH_INODE_sync] = S_SYNC,
+ [__BCH_INODE_immutable] = S_IMMUTABLE,
+ [__BCH_INODE_append] = S_APPEND,
+ [__BCH_INODE_noatime] = S_NOATIME,
};
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
- [__BCH_INODE_SYNC] = FS_SYNC_FL,
- [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
- [__BCH_INODE_APPEND] = FS_APPEND_FL,
- [__BCH_INODE_NODUMP] = FS_NODUMP_FL,
- [__BCH_INODE_NOATIME] = FS_NOATIME_FL,
+ [__BCH_INODE_sync] = FS_SYNC_FL,
+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
+ [__BCH_INODE_append] = FS_APPEND_FL,
+ [__BCH_INODE_nodump] = FS_NODUMP_FL,
+ [__BCH_INODE_noatime] = FS_NOATIME_FL,
};
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
static const __maybe_unused unsigned bch_flags_to_xflags[] = {
- [__BCH_INODE_SYNC] = FS_XFLAG_SYNC,
- [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
- [__BCH_INODE_APPEND] = FS_XFLAG_APPEND,
- [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP,
- [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME,
+ [__BCH_INODE_sync] = FS_XFLAG_SYNC,
+ [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
+ [__BCH_INODE_append] = FS_XFLAG_APPEND,
+ [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
+ [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
};
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 89759e6a..693f3474 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -66,11 +66,11 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
inode->v.i_mode = bi->bi_mode;
if (fields & ATTR_ATIME)
- inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
+ inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
if (fields & ATTR_MTIME)
- inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
+ inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
if (fields & ATTR_CTIME)
- inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
+ inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
inode->ei_inode = *bi;
@@ -753,9 +753,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->gid = inode->v.i_gid;
stat->rdev = inode->v.i_rdev;
stat->size = i_size_read(&inode->v);
- stat->atime = inode->v.i_atime;
- stat->mtime = inode->v.i_mtime;
- stat->ctime = inode->v.i_ctime;
+ stat->atime = inode_get_atime(&inode->v);
+ stat->mtime = inode_get_mtime(&inode->v);
+ stat->ctime = inode_get_ctime(&inode->v);
stat->blksize = block_bytes(c);
stat->blocks = inode->v.i_blocks;
@@ -764,15 +764,15 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
}
- if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
+ if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
stat->attributes |= STATX_ATTR_IMMUTABLE;
stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
- if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
+ if (inode->ei_inode.bi_flags & BCH_INODE_append)
stat->attributes |= STATX_ATTR_APPEND;
stat->attributes_mask |= STATX_ATTR_APPEND;
- if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
+ if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= STATX_ATTR_NODUMP;
@@ -1418,9 +1418,9 @@ static int inode_update_times_fn(struct btree_trans *trans,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- bi->bi_atime = timespec_to_bch2_time(c, inode->v.i_atime);
- bi->bi_mtime = timespec_to_bch2_time(c, inode->v.i_mtime);
- bi->bi_ctime = timespec_to_bch2_time(c, inode->v.i_ctime);
+ bi->bi_atime = timespec_to_bch2_time(c, inode_get_atime(&inode->v));
+ bi->bi_mtime = timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
+ bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
return 0;
}
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 0e470ebd..9f3e9bd3 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -854,9 +854,9 @@ static int check_inode(struct btree_trans *trans,
BUG_ON(bch2_inode_unpack(k, &u));
if (!full &&
- !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
- BCH_INODE_I_SECTORS_DIRTY|
- BCH_INODE_UNLINKED)))
+ !(u.bi_flags & (BCH_INODE_i_size_dirty|
+ BCH_INODE_i_sectors_dirty|
+ BCH_INODE_unlinked)))
return 0;
if (prev->bi_inum != u.bi_inum)
@@ -870,7 +870,7 @@ static int check_inode(struct btree_trans *trans,
return -EINVAL;
}
- if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) &&
+ if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) &&
bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
struct bpos new_min_pos;
@@ -878,7 +878,7 @@ static int check_inode(struct btree_trans *trans,
if (ret)
goto err;
- u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED;
+ u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
ret = __write_inode(trans, &u, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck updating inode");
@@ -890,7 +890,7 @@ static int check_inode(struct btree_trans *trans,
return 0;
}
- if (u.bi_flags & BCH_INODE_UNLINKED &&
+ if (u.bi_flags & BCH_INODE_unlinked &&
(!c->sb.clean ||
fsck_err(c, inode_unlinked_but_clean,
"filesystem marked clean, but inode %llu unlinked",
@@ -903,7 +903,7 @@ static int check_inode(struct btree_trans *trans,
return ret;
}
- if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
+ if (u.bi_flags & BCH_INODE_i_size_dirty &&
(!c->sb.clean ||
fsck_err(c, inode_i_size_dirty_but_clean,
"filesystem marked clean, but inode %llu has i_size dirty",
@@ -930,13 +930,13 @@ static int check_inode(struct btree_trans *trans,
* We truncated without our normal sector accounting hook, just
* make sure we recalculate it:
*/
- u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
+ u.bi_flags |= BCH_INODE_i_sectors_dirty;
- u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+ u.bi_flags &= ~BCH_INODE_i_size_dirty;
do_update = true;
}
- if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
+ if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
(!c->sb.clean ||
fsck_err(c, inode_i_sectors_dirty_but_clean,
"filesystem marked clean, but inode %llu has i_sectors dirty",
@@ -953,14 +953,14 @@ static int check_inode(struct btree_trans *trans,
}
u.bi_sectors = sectors;
- u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
+ u.bi_flags &= ~BCH_INODE_i_sectors_dirty;
do_update = true;
}
- if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
+ if (u.bi_flags & BCH_INODE_backptr_untrusted) {
u.bi_dir = 0;
u.bi_dir_offset = 0;
- u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
+ u.bi_flags &= ~BCH_INODE_backptr_untrusted;
do_update = true;
}
@@ -1065,7 +1065,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
return -BCH_ERR_internal_fsck_err;
}
- if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY),
+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
c, inode_i_sectors_wrong,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
w->last_pos.inode, i->snapshot,
@@ -1405,7 +1405,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
continue;
if (k.k->type != KEY_TYPE_whiteout) {
- if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k),
c, extent_past_end_of_inode,
@@ -1588,7 +1588,7 @@ static int check_dirent_target(struct btree_trans *trans,
"inode %llu type %s has multiple links but i_nlink 0",
target->bi_inum, bch2_d_types[d.v->d_type])) {
target->bi_nlink++;
- target->bi_flags &= ~BCH_INODE_UNLINKED;
+ target->bi_flags &= ~BCH_INODE_unlinked;
ret = __write_inode(trans, target, target_snapshot);
if (ret)
@@ -2160,7 +2160,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
break;
}
- if (u.bi_flags & BCH_INODE_UNLINKED)
+ if (u.bi_flags & BCH_INODE_unlinked)
continue;
ret = check_path(trans, &path, &u, iter.pos.snapshot);
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 925d1b7f..8b3c675e 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -20,13 +20,18 @@
#include <asm/unaligned.h>
-const char * const bch2_inode_opts[] = {
#define x(name, ...) #name,
+const char * const bch2_inode_opts[] = {
BCH_INODE_OPTS()
-#undef x
NULL,
};
+static const char * const bch2_inode_flag_strs[] = {
+ BCH_INODE_FLAGS()
+ NULL
+};
+#undef x
+
static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
static int inode_decode_field(const u8 *in, const u8 *end,
@@ -425,7 +430,7 @@ static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct prin
inode_compression_type_invalid,
"invalid compression opt %u", unpacked.bi_compression - 1);
- bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
+ bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) &&
unpacked.bi_nlink != 0, c, err,
inode_unlinked_but_nlink_nonzero,
"flagged as unlinked but bi_nlink != 0");
@@ -499,15 +504,20 @@ fsck_err:
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
- prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
- inode->bi_mode, inode->bi_flags,
+ prt_printf(out, "mode=%o ", inode->bi_mode);
+
+ prt_str(out, "flags=");
+ prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
+ prt_printf(out, " (%x)", inode->bi_flags);
+
+ prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
inode->bi_journal_seq,
inode->bi_size,
inode->bi_sectors,
inode->bi_version);
#define x(_name, _bits) \
- prt_printf(out, " "#_name " %llu", (u64) inode->_name);
+ prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
BCH_INODE_FIELDS_v3()
#undef x
}
@@ -546,7 +556,7 @@ static inline u64 bkey_inode_flags(struct bkey_s_c k)
static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
{
- return bkey_inode_flags(k) & BCH_INODE_UNLINKED;
+ return bkey_inode_flags(k) & BCH_INODE_unlinked;
}
int bch2_trans_mark_inode(struct btree_trans *trans,
@@ -927,8 +937,8 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
{
- if (bi->bi_flags & BCH_INODE_UNLINKED)
- bi->bi_flags &= ~BCH_INODE_UNLINKED;
+ if (bi->bi_flags & BCH_INODE_unlinked)
+ bi->bi_flags &= ~BCH_INODE_unlinked;
else {
if (bi->bi_nlink == U32_MAX)
return -EINVAL;
@@ -941,13 +951,13 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi)
{
- if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_UNLINKED)) {
+ if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) {
bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero",
bi->bi_inum);
return;
}
- if (bi->bi_flags & BCH_INODE_UNLINKED) {
+ if (bi->bi_flags & BCH_INODE_unlinked) {
bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum);
return;
}
@@ -955,7 +965,7 @@ void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *
if (bi->bi_nlink)
bi->bi_nlink--;
else
- bi->bi_flags |= BCH_INODE_UNLINKED;
+ bi->bi_flags |= BCH_INODE_unlinked;
}
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
@@ -1094,7 +1104,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos)
pos.offset, pos.snapshot))
goto delete;
- if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c,
+ if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
deleted_inode_not_unlinked,
"non-deleted inode %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 74c62e6c..5068ba9c 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -177,7 +177,7 @@ static inline unsigned nlink_bias(umode_t mode)
static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
{
- return bi->bi_flags & BCH_INODE_UNLINKED
+ return bi->bi_flags & BCH_INODE_unlinked
? 0
: bi->bi_nlink + nlink_bias(bi->bi_mode);
}
@@ -187,10 +187,10 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
{
if (nlink) {
bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
- bi->bi_flags &= ~BCH_INODE_UNLINKED;
+ bi->bi_flags &= ~BCH_INODE_unlinked;
} else {
bi->bi_nlink = 0;
- bi->bi_flags |= BCH_INODE_UNLINKED;
+ bi->bi_flags |= BCH_INODE_unlinked;
}
}
diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c
index 613f3843..fbfc42ff 100644
--- a/libbcachefs/io_write.c
+++ b/libbcachefs/io_write.c
@@ -223,7 +223,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
inode = bkey_i_to_inode_v3(k);
- if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
+ if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
new_i_size > le64_to_cpu(inode->v.bi_size)) {
inode->v.bi_size = cpu_to_le64(new_i_size);
inode_update_flags = 0;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 1b15b010..ab749bf2 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -147,9 +147,8 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
{
struct moving_io *io;
- bch2_trans_unlock(ctxt->trans);
-
while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
+ bch2_trans_unlock_long(ctxt->trans);
list_del(&io->read_list);
move_write(io);
}
@@ -485,8 +484,8 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
struct bch_fs *c = ctxt->trans->c;
u64 delay;
- if (ctxt->wait_on_copygc) {
- bch2_trans_unlock(ctxt->trans);
+ if (ctxt->wait_on_copygc && !c->copygc_running) {
+ bch2_trans_unlock_long(ctxt->trans);
wait_event_killable(c->copygc_running_wq,
!c->copygc_running ||
kthread_should_stop());
@@ -495,8 +494,12 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
do {
delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
+
if (delay) {
- bch2_trans_unlock(ctxt->trans);
+ if (delay > HZ / 10)
+ bch2_trans_unlock_long(ctxt->trans);
+ else
+ bch2_trans_unlock(ctxt->trans);
set_current_state(TASK_INTERRUPTIBLE);
}
diff --git a/libbcachefs/move.h b/libbcachefs/move.h
index 1b1e8678..07cf9d42 100644
--- a/libbcachefs/move.h
+++ b/libbcachefs/move.h
@@ -45,6 +45,7 @@ do { \
\
if (_cond) \
break; \
+ bch2_trans_unlock_long((_ctxt)->trans); \
__wait_event((_ctxt)->wait, \
bch2_moving_ctxt_next_pending_write(_ctxt) || \
(cond_finished = (_cond))); \
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index f73b9b7f..0158c7aa 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -128,7 +128,7 @@ static void move_buckets_wait(struct moving_context *ctxt,
kfree(i);
}
- bch2_trans_unlock(ctxt->trans);
+ bch2_trans_unlock_long(ctxt->trans);
}
static bool bucket_in_flight(struct buckets_in_flight *list,
@@ -188,7 +188,8 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
noinline
static int bch2_copygc(struct moving_context *ctxt,
- struct buckets_in_flight *buckets_in_flight)
+ struct buckets_in_flight *buckets_in_flight,
+ bool *did_work)
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
@@ -224,6 +225,8 @@ static int bch2_copygc(struct moving_context *ctxt,
f->bucket.k.gen, data_opts);
if (ret)
goto err;
+
+ *did_work = true;
}
err:
darray_exit(&buckets);
@@ -302,14 +305,16 @@ static int bch2_copygc_thread(void *arg)
struct moving_context ctxt;
struct bch_move_stats move_stats;
struct io_clock *clock = &c->io_clock[WRITE];
- struct buckets_in_flight buckets;
+ struct buckets_in_flight *buckets;
u64 last, wait;
int ret = 0;
- memset(&buckets, 0, sizeof(buckets));
-
- ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
+ buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL);
+ if (!buckets)
+ return -ENOMEM;
+ ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
if (ret) {
+ kfree(buckets);
bch_err_msg(c, ret, "allocating copygc buckets in flight");
return ret;
}
@@ -322,16 +327,18 @@ static int bch2_copygc_thread(void *arg)
false);
while (!ret && !kthread_should_stop()) {
- bch2_trans_unlock(ctxt.trans);
+ bool did_work = false;
+
+ bch2_trans_unlock_long(ctxt.trans);
cond_resched();
if (!c->copy_gc_enabled) {
- move_buckets_wait(&ctxt, &buckets, true);
+ move_buckets_wait(&ctxt, buckets, true);
kthread_wait_freezable(c->copy_gc_enabled);
}
if (unlikely(freezing(current))) {
- move_buckets_wait(&ctxt, &buckets, true);
+ move_buckets_wait(&ctxt, buckets, true);
__refrigerator(false);
continue;
}
@@ -342,7 +349,7 @@ static int bch2_copygc_thread(void *arg)
if (wait > clock->max_slop) {
c->copygc_wait_at = last;
c->copygc_wait = last + wait;
- move_buckets_wait(&ctxt, &buckets, true);
+ move_buckets_wait(&ctxt, buckets, true);
trace_and_count(c, copygc_wait, c, wait, last + wait);
bch2_kthread_io_clock_wait(clock, last + wait,
MAX_SCHEDULE_TIMEOUT);
@@ -352,14 +359,26 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
c->copygc_running = true;
- ret = bch2_copygc(&ctxt, &buckets);
+ ret = bch2_copygc(&ctxt, buckets, &did_work);
c->copygc_running = false;
wake_up(&c->copygc_running_wq);
+
+ if (!wait && !did_work) {
+ u64 min_member_capacity = bch2_min_rw_member_capacity(c);
+
+ if (min_member_capacity == U64_MAX)
+ min_member_capacity = 128 * 2048;
+
+ bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
+ MAX_SCHEDULE_TIMEOUT);
+ }
}
- move_buckets_wait(&ctxt, &buckets, true);
- rhashtable_destroy(&buckets.table);
+ move_buckets_wait(&ctxt, buckets, true);
+
+ rhashtable_destroy(&buckets->table);
+ kfree(buckets);
bch2_moving_ctxt_exit(&ctxt);
bch2_move_stats_exit(&move_stats, c);
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 6ee4d2e0..3319190b 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "alloc_background.h"
#include "alloc_foreground.h"
#include "btree_iter.h"
#include "btree_update.h"
@@ -282,15 +283,12 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
static void rebalance_wait(struct bch_fs *c)
{
struct bch_fs_rebalance *r = &c->rebalance;
- struct bch_dev *ca;
struct io_clock *clock = &c->io_clock[WRITE];
u64 now = atomic64_read(&clock->now);
- u64 min_member_capacity = 128 * 2048;
- unsigned i;
+ u64 min_member_capacity = bch2_min_rw_member_capacity(c);
- for_each_rw_member(ca, c, i)
- min_member_capacity = min(min_member_capacity,
- ca->mi.nbuckets * ca->mi.bucket_size);
+ if (min_member_capacity == U64_MAX)
+ min_member_capacity = 128 * 2048;
r->wait_iotime_end = now + (min_member_capacity >> 6);
@@ -350,7 +348,7 @@ static int do_rebalance(struct moving_context *ctxt)
!kthread_should_stop() &&
!atomic64_read(&r->work_stats.sectors_seen) &&
!atomic64_read(&r->scan_stats.sectors_seen)) {
- bch2_trans_unlock(trans);
+ bch2_trans_unlock_long(trans);
rebalance_wait(c);
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index f73338f3..9600b808 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -226,7 +226,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
if (entry->u64s) {
r->level = entry->level;
- bkey_copy(&r->key, &entry->start[0]);
+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
r->error = 0;
} else {
r->error = -EIO;
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index eb31df60..6e1bfe9f 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -255,7 +255,7 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bpos dst_end = dst_start, src_end = src_start;
struct bch_io_opts opts;
struct bpos src_want;
- u64 dst_done;
+ u64 dst_done = 0;
u32 dst_snapshot, src_snapshot;
int ret = 0, ret2 = 0;
diff --git a/libbcachefs/sb-errors.c b/libbcachefs/sb-errors.c
index 3d66f15a..9215d414 100644
--- a/libbcachefs/sb-errors.c
+++ b/libbcachefs/sb-errors.c
@@ -61,7 +61,6 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
{
struct bch_sb_field_errors *e = field_to_type(f, errors);
unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
- u64 now = ktime_get_real_seconds();
if (out->nr_tabstops <= 1)
printbuf_tabstop_push(out, 16);
@@ -71,9 +70,7 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
prt_tab(out);
prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
prt_tab(out);
- bch2_pr_time_units(out, (now - le64_to_cpu(e->entries[i].last_error_time)) *
- NSEC_PER_SEC);
- prt_str(out, " ago");
+ bch2_prt_date_seconds(out, le64_to_cpu(e->entries[i].last_error_time));
prt_newline(out);
}
}
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index ab5de12e..6a7e20de 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -21,19 +21,14 @@ char * const bch2_member_error_strs[] = {
/* Code for bch_sb_field_members_v1: */
-static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i)
-{
- return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
-}
-
struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
{
- return members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
+ return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
}
static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
{
- struct bch_member ret, *p = members_v2_get_mut(mi, i);
+ struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
memset(&ret, 0, sizeof(ret));
memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
return ret;
@@ -75,7 +70,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c)
for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
- memmove(dst, members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
+ memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
memset(dst + le16_to_cpu(mi->member_bytes),
0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
}
@@ -118,7 +113,7 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
- memcpy(members_v1_get_mut(mi1, i), members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
+ memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
return 0;
}
@@ -235,7 +230,7 @@ static void member_to_text(struct printbuf *out,
prt_printf(out, "Last mount:");
prt_tab(out);
if (m.last_mount)
- pr_time(out, le64_to_cpu(m.last_mount));
+ bch2_prt_date_seconds(out, le64_to_cpu(m.last_mount));
else
prt_printf(out, "(never)");
prt_newline(out);
@@ -332,7 +327,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb,
struct printbuf *err)
{
struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
- size_t mi_bytes = (void *) members_v2_get_mut(mi, sb->nr_devices) -
+ size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
(void *) mi;
if (mi_bytes > vstruct_bytes(&mi->field)) {
@@ -363,7 +358,7 @@ void bch2_sb_members_from_cpu(struct bch_fs *c)
rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL) {
- struct bch_member *m = members_v2_get_mut(mi, i);
+ struct bch_member *m = __bch2_members_v2_get_mut(mi, i);
for (e = 0; e < BCH_MEMBER_ERROR_NR; e++)
m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index 1583e80a..03613e3e 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -4,6 +4,12 @@
extern char * const bch2_member_error_strs[];
+static inline struct bch_member *
+__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i)
+{
+ return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
+}
+
int bch2_sb_members_v2_init(struct bch_fs *c);
int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
@@ -186,11 +192,10 @@ static inline bool bch2_member_exists(struct bch_member *m)
return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
}
-static inline bool bch2_dev_exists(struct bch_sb *sb,
- unsigned dev)
+static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev)
{
if (dev < sb->nr_devices) {
- struct bch_member m = bch2_sb_member_get(sb, dev);
+ struct bch_member m = bch2_sb_member_get(sb, dev);
return bch2_member_exists(&m);
}
return false;
diff --git a/libbcachefs/six.c b/libbcachefs/six.c
index 458a1de0..d22826ca 100644
--- a/libbcachefs/six.c
+++ b/libbcachefs/six.c
@@ -323,99 +323,55 @@ EXPORT_SYMBOL_GPL(six_relock_ip);
#ifdef CONFIG_LOCK_SPIN_ON_OWNER
-static inline bool six_can_spin_on_owner(struct six_lock *lock)
+static inline bool six_owner_running(struct six_lock *lock)
{
- struct task_struct *owner;
- bool ret;
-
- if (need_resched())
- return false;
-
+ /*
+ * When there's no owner, we might have preempted between the owner
+ * acquiring the lock and setting the owner field. If we're an RT task
+ * that will live-lock because we won't let the owner complete.
+ */
rcu_read_lock();
- owner = READ_ONCE(lock->owner);
- ret = !owner || owner_on_cpu(owner);
+ struct task_struct *owner = READ_ONCE(lock->owner);
+ bool ret = owner ? owner_on_cpu(owner) : !rt_task(current);
rcu_read_unlock();
return ret;
}
-static inline bool six_spin_on_owner(struct six_lock *lock,
- struct task_struct *owner,
- u64 end_time)
+static inline bool six_optimistic_spin(struct six_lock *lock,
+ struct six_lock_waiter *wait,
+ enum six_lock_type type)
{
- bool ret = true;
unsigned loop = 0;
-
- rcu_read_lock();
- while (lock->owner == owner) {
- /*
- * Ensure we emit the owner->on_cpu, dereference _after_
- * checking lock->owner still matches owner. If that fails,
- * owner might point to freed memory. If it still matches,
- * the rcu_read_lock() ensures the memory stays valid.
- */
- barrier();
-
- if (!owner_on_cpu(owner) || need_resched()) {
- ret = false;
- break;
- }
-
- if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
- six_set_bitmask(lock, SIX_LOCK_NOSPIN);
- ret = false;
- break;
- }
-
- cpu_relax();
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-{
- struct task_struct *task = current;
u64 end_time;
if (type == SIX_LOCK_write)
return false;
- preempt_disable();
- if (!six_can_spin_on_owner(lock))
- goto fail;
+ if (lock->wait_list.next != &wait->list)
+ return false;
- if (!osq_lock(&lock->osq))
- goto fail;
+ if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN)
+ return false;
+ preempt_disable();
end_time = sched_clock() + 10 * NSEC_PER_USEC;
- while (1) {
- struct task_struct *owner;
-
+ while (!need_resched() && six_owner_running(lock)) {
/*
- * If there's an owner, wait for it to either
- * release the lock or go to sleep.
+ * Ensures that writes to the waitlist entry happen after we see
+ * wait->lock_acquired: pairs with the smp_store_release in
+ * __six_lock_wakeup
*/
- owner = READ_ONCE(lock->owner);
- if (owner && !six_spin_on_owner(lock, owner, end_time))
- break;
-
- if (do_six_trylock(lock, type, false)) {
- osq_unlock(&lock->osq);
+ if (smp_load_acquire(&wait->lock_acquired)) {
preempt_enable();
return true;
}
- /*
- * When there's no owner, we might have preempted between the
- * owner acquiring the lock and setting the owner field. If
- * we're an RT task that will live-lock because we won't let
- * the owner complete.
- */
- if (!owner && (need_resched() || rt_task(task)))
+ if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
+ six_set_bitmask(lock, SIX_LOCK_NOSPIN);
break;
+ }
/*
* The cpu_relax() call is a compiler barrier which forces
@@ -426,24 +382,15 @@ static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type
cpu_relax();
}
- osq_unlock(&lock->osq);
-fail:
preempt_enable();
-
- /*
- * If we fell out of the spin path because of need_resched(),
- * reschedule now, before we try-lock again. This avoids getting
- * scheduled out right after we obtained the lock.
- */
- if (need_resched())
- schedule();
-
return false;
}
#else /* CONFIG_LOCK_SPIN_ON_OWNER */
-static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
+static inline bool six_optimistic_spin(struct six_lock *lock,
+ struct six_lock_waiter *wait,
+ enum six_lock_type type)
{
return false;
}
@@ -467,9 +414,6 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
trace_contention_begin(lock, 0);
lock_contended(&lock->dep_map, ip);
- if (six_optimistic_spin(lock, type))
- goto out;
-
wait->task = current;
wait->lock_want = type;
wait->lock_acquired = false;
@@ -507,6 +451,9 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
ret = 0;
}
+ if (six_optimistic_spin(lock, wait, type))
+ goto out;
+
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/libbcachefs/six.h b/libbcachefs/six.h
index 394da423..a7104ac1 100644
--- a/libbcachefs/six.h
+++ b/libbcachefs/six.h
@@ -124,7 +124,6 @@
*/
#include <linux/lockdep.h>
-#include <linux/osq_lock.h>
#include <linux/sched.h>
#include <linux/types.h>
@@ -140,7 +139,6 @@ struct six_lock {
unsigned intent_lock_recurse;
struct task_struct *owner;
unsigned __percpu *readers;
- struct optimistic_spin_queue osq;
raw_spinlock_t wait_lock;
struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 83bdb436..a93e53d0 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -1183,7 +1183,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
prt_printf(out, "Created:");
prt_tab(out);
if (sb->time_base_lo)
- pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
+ bch2_prt_date_seconds(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
else
prt_printf(out, "(not set)");
prt_newline(out);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 1b5c2a1b..24672bb3 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -1885,9 +1885,9 @@ found:
struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
struct bch_opts opts)
{
- struct bch_sb_handle *sb = NULL;
+ DARRAY(struct bch_sb_handle) sbs = { 0 };
struct bch_fs *c = NULL;
- unsigned i, best_sb = 0;
+ struct bch_sb_handle *sb, *best = NULL;
struct printbuf errbuf = PRINTBUF;
int ret = 0;
@@ -1899,49 +1899,46 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
goto err;
}
- sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
- if (!sb) {
- ret = -ENOMEM;
+ ret = darray_make_room(&sbs, nr_devices);
+ if (ret)
goto err;
- }
- for (i = 0; i < nr_devices; i++) {
- ret = bch2_read_super(devices[i], &opts, &sb[i]);
+ for (unsigned i = 0; i < nr_devices; i++) {
+ struct bch_sb_handle sb = { NULL };
+
+ ret = bch2_read_super(devices[i], &opts, &sb);
if (ret)
goto err;
+ BUG_ON(darray_push(&sbs, sb));
}
- for (i = 1; i < nr_devices; i++)
- if (le64_to_cpu(sb[i].sb->seq) >
- le64_to_cpu(sb[best_sb].sb->seq))
- best_sb = i;
-
- i = 0;
- while (i < nr_devices) {
- if (i != best_sb &&
- !bch2_dev_exists(sb[best_sb].sb, sb[i].sb->dev_idx)) {
- pr_info("%pg has been removed, skipping", sb[i].bdev);
- bch2_free_super(&sb[i]);
- array_remove_item(sb, nr_devices, i);
+ darray_for_each(sbs, sb)
+ if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq))
+ best = sb;
+
+ darray_for_each_reverse(sbs, sb) {
+ if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) {
+ pr_info("%pg has been removed, skipping", sb->bdev);
+ bch2_free_super(sb);
+ darray_remove_item(&sbs, sb);
+ best -= best > sb;
continue;
}
- ret = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
+ ret = bch2_dev_in_fs(best->sb, sb->sb);
if (ret)
goto err_print;
- i++;
}
- c = bch2_fs_alloc(sb[best_sb].sb, opts);
- if (IS_ERR(c)) {
- ret = PTR_ERR(c);
+ c = bch2_fs_alloc(best->sb, opts);
+ ret = PTR_ERR_OR_ZERO(c);
+ if (ret)
goto err;
- }
down_write(&c->state_lock);
- for (i = 0; i < nr_devices; i++) {
- ret = bch2_dev_attach_bdev(c, &sb[i]);
+ darray_for_each(sbs, sb) {
+ ret = bch2_dev_attach_bdev(c, sb);
if (ret) {
up_write(&c->state_lock);
goto err;
@@ -1960,7 +1957,9 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
goto err;
}
out:
- kfree(sb);
+ darray_for_each(sbs, sb)
+ bch2_free_super(sb);
+ darray_exit(&sbs);
printbuf_exit(&errbuf);
module_put(THIS_MODULE);
return c;
@@ -1970,9 +1969,6 @@ err_print:
err:
if (!IS_ERR_OR_NULL(c))
bch2_fs_stop(c);
- if (sb)
- for (i = 0; i < nr_devices; i++)
- bch2_free_super(&sb[i]);
c = ERR_PTR(ret);
goto out;
}
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index adeec805..7ba5df4e 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -467,6 +467,24 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
prt_printf(out, "%s", u->name);
}
+#ifndef __KERNEL__
+#include <time.h>
+void bch2_prt_date_seconds(struct printbuf *out, time64_t sec)
+{
+ time_t t = sec;
+ char buf[64];
+ ctime_r(&t, buf);
+ prt_str(out, buf);
+}
+#else
+void bch2_prt_date_seconds(struct printbuf *out, time64_t sec)
+{
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%ptT", &sec);
+ prt_u64(out, sec);
+}
+#endif
+
#define TABSTOP_SIZE 12
static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index 67f1a1d2..0595605e 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -244,26 +244,7 @@ do { \
#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
void bch2_pr_time_units(struct printbuf *, u64);
-
-#ifdef __KERNEL__
-static inline void pr_time(struct printbuf *out, u64 time)
-{
- prt_printf(out, "%llu", time);
-}
-#else
-#include <time.h>
-static inline void pr_time(struct printbuf *out, u64 _time)
-{
- char time_str[64];
- time_t time = _time;
- struct tm *tm = localtime(&time);
- size_t err = strftime(time_str, sizeof(time_str), "%c", tm);
- if (!err)
- prt_printf(out, "(formatting error)");
- else
- prt_printf(out, "%s", time_str);
-}
-#endif
+void bch2_prt_date_seconds(struct printbuf *, time64_t);
#ifdef __KERNEL__
static inline void uuid_unparse_lower(u8 *uuid, char *out)
diff --git a/linux/closure.c b/linux/closure.c
index 1faa24d6..f86c9eea 100644
--- a/linux/closure.c
+++ b/linux/closure.c
@@ -17,9 +17,8 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
{
int r = flags & CLOSURE_REMAINING_MASK;
- if ((flags & CLOSURE_GUARD_MASK) ||
- (!r && (flags & ~CLOSURE_DESTRUCTOR)))
- panic("closure_put_after_sub: bogus flags %x remaining %i", flags, r);
+ BUG_ON(flags & CLOSURE_GUARD_MASK);
+ BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
if (!r) {
smp_acquire__after_ctrl_dep();