aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/backpointers.c8
-rw-r--r--fs/bcachefs/backpointers.h9
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/bcachefs_format.h11
-rw-r--r--fs/bcachefs/bkey.h6
-rw-r--r--fs/bcachefs/bkey_methods.c8
-rw-r--r--fs/bcachefs/btree_cache.c73
-rw-r--r--fs/bcachefs/btree_gc.c13
-rw-r--r--fs/bcachefs/btree_io.c19
-rw-r--r--fs/bcachefs/btree_iter.h9
-rw-r--r--fs/bcachefs/btree_node_scan.c19
-rw-r--r--fs/bcachefs/btree_trans_commit.c34
-rw-r--r--fs/bcachefs/btree_update_interior.c81
-rw-r--r--fs/bcachefs/btree_write_buffer.c14
-rw-r--r--fs/bcachefs/buckets.h8
-rw-r--r--fs/bcachefs/checksum.c23
-rw-r--r--fs/bcachefs/checksum.h5
-rw-r--r--fs/bcachefs/compress.h8
-rw-r--r--fs/bcachefs/ec.c54
-rw-r--r--fs/bcachefs/ec.h2
-rw-r--r--fs/bcachefs/extents.c11
-rw-r--r--fs/bcachefs/fs-io-direct.c19
-rw-r--r--fs/bcachefs/fs-io.c16
-rw-r--r--fs/bcachefs/journal_io.c17
-rw-r--r--fs/bcachefs/opts.c29
-rw-r--r--fs/bcachefs/opts.h10
-rw-r--r--fs/bcachefs/recovery_passes.c2
-rw-r--r--fs/bcachefs/sb-downgrade.c5
-rw-r--r--fs/bcachefs/sb-errors_types.h5
-rw-r--r--fs/bcachefs/sb-members.c53
-rw-r--r--fs/bcachefs/sb-members.h21
-rw-r--r--fs/bcachefs/super-io.c7
-rw-r--r--fs/bcachefs/super_types.h2
-rw-r--r--fs/bcachefs/sysfs.c11
34 files changed, 432 insertions, 182 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 114328acde720..fadb1078903d2 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -49,13 +49,15 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
if (!bch2_dev_exists2(c, bp.k->p.inode))
return 0;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
int ret = 0;
- bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+ bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
+ !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
c, err,
- backpointer_pos_wrong,
- "backpointer at wrong pos");
+ backpointer_bucket_offset_wrong,
+ "backpointer bucket_offset wrong");
fsck_err:
return ret;
}
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index da012ca7daee5..85949b9fd880c 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
- struct bpos ret;
-
- ret = POS(bucket.inode,
- (bucket_to_sector(ca, bucket.offset) <<
- MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+ struct bpos ret = POS(bucket.inode,
+ (bucket_to_sector(ca, bucket.offset) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-
return ret;
}
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index a31a5f706929e..91c3c1fef233d 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -709,6 +709,8 @@ struct btree_trans_buf {
x(stripe_delete) \
x(reflink) \
x(fallocate) \
+ x(fsync) \
+ x(dio_write) \
x(discard) \
x(discard_fast) \
x(invalidate) \
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 364ae42022af1..085987435a5ea 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -578,7 +578,8 @@ struct bch_member {
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
__le16 bucket_size; /* sectors */
- __le32 pad;
+ __u8 btree_bitmap_shift;
+ __u8 pad[3];
__le64 last_mount; /* time_t */
__le64 flags;
@@ -587,6 +588,7 @@ struct bch_member {
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
__le64 seq;
+ __le64 btree_allocated_bitmap;
};
#define BCH_MEMBER_V1_BYTES 56
@@ -876,7 +878,8 @@ struct bch_sb_field_downgrade {
x(rebalance_work, BCH_VERSION(1, 3)) \
x(member_seq, BCH_VERSION(1, 4)) \
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
- x(btree_subvolume_children, BCH_VERSION(1, 6))
+ x(btree_subvolume_children, BCH_VERSION(1, 6)) \
+ x(mi_btree_bitmap, BCH_VERSION(1, 7))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1314,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(write_buffer_keys, 11) \
x(datetime, 12)
-enum {
+enum bch_jset_entry_type {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
BCH_JSET_ENTRY_TYPES()
#undef x
@@ -1360,7 +1363,7 @@ struct jset_entry_blacklist_v2 {
x(inodes, 1) \
x(key_version, 2)
-enum {
+enum bch_fs_usage_type {
#define x(f, nr) BCH_FS_USAGE_##f = nr,
BCH_FS_USAGE_TYPES()
#undef x
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index cf23ff47bed8b..3a45d128f608d 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
}
+static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
+}
+
static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
const struct bkey_packed *k)
{
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 5e52684764eb1..db336a43fc083 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
if (type >= BKEY_TYPE_NR)
return 0;
- bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+ bkey_fsck_err_on((type == BKEY_TYPE_btree ||
+ (flags & BKEY_INVALID_COMMIT)) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
- bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+ bch2_btree_node_type_str(type),
+ k.k->type < KEY_TYPE_MAX
+ ? bch2_bkey_types[k.k->type]
+ : "(unknown)");
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
bkey_fsck_err_on(k.k->size == 0, c, err,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 84474324dba9b..02c70e813face 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
- u32 seq;
- BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+ if (unlikely(level >= BTREE_MAX_DEPTH)) {
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+ level, BTREE_MAX_DEPTH);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
/*
* Parent node must be locked, else we could read in a btree node that's
* been freed:
@@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
}
set_btree_node_read_in_flight(b);
-
six_unlock_write(&b->c.lock);
- seq = six_lock_seq(&b->c.lock);
- six_unlock_intent(&b->c.lock);
- /* Unlock before doing IO: */
- if (path && sync)
- bch2_trans_unlock_noassert(trans);
-
- bch2_btree_node_read(trans, b, sync);
+ if (path) {
+ u32 seq = six_lock_seq(&b->c.lock);
- if (!sync)
- return NULL;
+ /* Unlock before doing IO: */
+ six_unlock_intent(&b->c.lock);
+ bch2_trans_unlock_noassert(trans);
- if (path) {
- int ret = bch2_trans_relock(trans) ?:
- bch2_btree_path_relock_intent(trans, path);
- if (ret) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(ret);
- }
- }
+ bch2_btree_node_read(trans, b, sync);
- if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- BUG_ON(!path);
+ if (!sync)
+ return NULL;
- trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+ if (!six_relock_type(&b->c.lock, lock_type, seq))
+ b = NULL;
+ } else {
+ bch2_btree_node_read(trans, b, sync);
+ if (lock_type == SIX_LOCK_read)
+ six_lock_downgrade(&b->c.lock);
}
return b;
@@ -1112,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
BUG_ON(path && !btree_node_locked(path, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_cache_find(bc, k);
+ struct btree *b = btree_cache_find(bc, k);
if (b)
return 0;
b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
- return PTR_ERR_OR_ZERO(b);
+ if (!IS_ERR_OR_NULL(b))
+ six_unlock_read(&b->c.lock);
+ return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1148,6 +1163,8 @@ wait_on_io:
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+ goto out;
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1162,7 +1179,7 @@ wait_on_io:
btree_node_data_free(c, b);
bch2_btree_node_hash_remove(bc, b);
mutex_unlock(&bc->lock);
-
+out:
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index d2555da55c6da..ecbd9598f69fd 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -828,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
struct bch_fs *c = trans->c;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+ struct printbuf buf = PRINTBUF;
int ret = 0;
deleted.p = k->k->p;
@@ -848,11 +849,23 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (ret)
goto err;
+ if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
+ c, btree_bitmap_not_marked,
+ "btree ptr not marked in member info btree allocated bitmap\n %s",
+ (bch2_bkey_val_to_text(&buf, c, *k),
+ buf.buf))) {
+ mutex_lock(&c->sb_lock);
+ bch2_dev_btree_bitmap_mark(c, *k);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
ret = commit_do(trans, NULL, NULL, 0,
bch2_key_trigger(trans, btree_id, level, old,
unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
fsck_err:
err:
+ printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index d7de82ac38935..9678b2375bedd 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -831,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
}
-static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
struct bset *i, struct bkey_packed *k)
{
if (bkey_p_next(k) > vstruct_last(i))
@@ -840,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
if (k->format > KEY_FORMAT_CURRENT)
return false;
- if (k->u64s < bkeyp_key_u64s(&b->format, k))
+ if (!bkeyp_u64s_valid(&b->format, k))
return false;
struct printbuf buf = PRINTBUF;
@@ -884,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
"invalid bkey format %u", k->format))
goto drop_this_key;
- if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+ if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_bad_u64s,
- "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+ "bad k->u64s %u (min %u max %lu)", k->u64s,
+ bkeyp_key_u64s(&b->format, k),
+ U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
goto drop_this_key;
if (!write)
@@ -947,13 +949,12 @@ drop_this_key:
* do
*/
- if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+ if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
for (next_good_key = 1;
next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
next_good_key++)
- if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+ if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
goto got_good_key;
-
}
/*
@@ -1339,7 +1340,9 @@ start:
rb->start_time);
bio_put(&rb->bio);
- if (saw_error && !btree_node_read_error(b)) {
+ if (saw_error &&
+ !btree_node_read_error(b) &&
+ c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 1d58d447b386c..1c70836dd7cce 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
- if (!trans->restarted)
- btree_iter_path(trans, iter)->preserve = false;
+ if (!iter->path || trans->restarted)
+ return;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ path->preserve = false;
+ if (path->ref == 1)
+ path->should_be_locked = false;
}
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 556f76f5c84e1..866bd278439f8 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -133,9 +133,19 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
if (le64_to_cpu(bn->magic) != bset_magic(c))
return;
+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+ struct nonce nonce = btree_nonce(&bn->keys, 0);
+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+ bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
+ }
+
if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
return;
+ if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
+ return;
+
rcu_read_lock();
struct found_btree_node n = {
.btree_id = BTREE_NODE_ID(bn),
@@ -195,8 +205,13 @@ static int read_btree_nodes_worker(void *p)
last_print = jiffies;
}
- try_read_btree_node(w->f, ca, bio, buf,
- bucket * ca->mi.bucket_size + bucket_offset);
+ u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
+
+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
+ !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
+ continue;
+
+ try_read_btree_node(w->f, ca, bio, buf, sector);
}
err:
bio_put(bio);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index aa9da49707404..bbec91e8e6506 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
struct bkey_cached *ck = (void *) path->l[0].b;
unsigned new_u64s;
struct bkey_i *new_k;
+ unsigned watermark = flags & BCH_WATERMARK_MASK;
EBUG_ON(path->level);
- if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
- bch2_btree_key_cache_must_wait(c) &&
- !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+ if (watermark < BCH_WATERMARK_reclaim &&
+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+ bch2_btree_key_cache_must_wait(c))
return -BCH_ERR_btree_insert_need_journal_reclaim;
/*
@@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
}
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
- struct btree_insert_entry *btree_id_start)
+ unsigned btree_id_start)
{
- struct btree_insert_entry *i;
bool trans_trigger_run;
int ret, overwrite;
@@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
do {
trans_trigger_run = false;
- for (i = btree_id_start;
- i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+ for (unsigned i = btree_id_start;
+ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
i++) {
- if (i->btree_id != btree_id)
+ if (trans->updates[i].btree_id != btree_id)
continue;
- ret = run_one_trans_trigger(trans, i, overwrite);
+ ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
if (ret < 0)
return ret;
if (ret)
@@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
{
- struct btree_insert_entry *btree_id_start = trans->updates;
- unsigned btree_id = 0;
+ unsigned btree_id = 0, btree_id_start = 0;
int ret = 0;
/*
@@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
if (btree_id == BTREE_ID_alloc)
continue;
- while (btree_id_start < trans->updates + trans->nr_updates &&
- btree_id_start->btree_id < btree_id)
+ while (btree_id_start < trans->nr_updates &&
+ trans->updates[btree_id_start].btree_id < btree_id)
btree_id_start++;
ret = run_btree_triggers(trans, btree_id, btree_id_start);
@@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
return ret;
}
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
+
if (i->btree_id > BTREE_ID_alloc)
break;
if (i->btree_id == BTREE_ID_alloc) {
- ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+ ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
if (ret)
return ret;
break;
@@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
struct bch_fs *c = trans->c;
int ret = 0, u64s_delta = 0;
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
if (i->cached)
continue;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index c4a5e83a56a43..6030c396754f6 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -21,6 +21,7 @@
#include "keylist.h"
#include "recovery_passes.h"
#include "replicas.h"
+#include "sb-members.h"
#include "super-io.h"
#include "trace.h"
@@ -605,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as,
bch2_keylist_push(keys);
}
+static bool btree_update_new_nodes_marked_sb(struct btree_update *as)
+{
+ for_each_keylist_key(&as->new_keys, k)
+ if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k)))
+ return false;
+ return true;
+}
+
+static void btree_update_new_nodes_mark_sb(struct btree_update *as)
+{
+ struct bch_fs *c = as->c;
+
+ mutex_lock(&c->sb_lock);
+ for_each_keylist_key(&as->new_keys, k)
+ bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
/*
* The transactional part of an interior btree node update, where we journal the
* update we did to the interior node and update alloc info:
@@ -662,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as)
if (ret)
goto err;
+ if (!btree_update_new_nodes_marked_sb(as))
+ btree_update_new_nodes_mark_sb(as);
+
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk:
@@ -1280,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
bch2_recalc_btree_reserve(c);
}
-static void bch2_btree_set_root(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b)
+static int bch2_btree_set_root(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ bool nofail)
{
struct bch_fs *c = as->c;
- struct btree *old;
trace_and_count(c, btree_node_set_root, trans, b);
- old = btree_node_root(c, b);
+ struct btree *old = btree_node_root(c, b);
/*
* Ensure no one is using the old root while we switch to the
* new root:
*/
- bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ if (nofail) {
+ bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ } else {
+ int ret = bch2_btree_node_lock_write(trans, path, &old->c);
+ if (ret)
+ return ret;
+ }
bch2_btree_set_root_inmem(c, b);
@@ -1310,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as,
* depend on the new root would have to update the new root.
*/
bch2_btree_node_unlock_write(trans, path, old);
+ return 0;
}
/* Interior node updates: */
@@ -1652,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (parent) {
/* Split a non root node */
ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else if (n3) {
- bch2_btree_set_root(as, trans, trans->paths + path, n3);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false);
} else {
/* Root filled up but didn't need to be split */
- bch2_btree_set_root(as, trans, trans->paths + path, n1);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false);
}
+ if (ret)
+ goto err;
+
if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
@@ -1863,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
bch2_keylist_add(&as->parent_keys, &b->key);
btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
- bch2_btree_set_root(as, trans, path, n);
+ int ret = bch2_btree_set_root(as, trans, path, n, true);
+ BUG_ON(ret);
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_trans_node_add(trans, path, n);
@@ -1916,6 +1950,18 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
BUG_ON(!trans->paths[path].should_be_locked);
BUG_ON(!btree_node_locked(&trans->paths[path], level));
+ /*
+ * Work around a deadlock caused by the btree write buffer not doing
+ * merges and leaving tons of merges for us to do - we really don't need
+ * to be doing merges at all from the interior update path, and if the
+ * interior update path is generating too many new interior updates we
+ * deadlock:
+ */
+ if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
+ return 0;
+
+ flags &= ~BCH_WATERMARK_MASK;
+
b = trans->paths[path].l[level].b;
if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
@@ -2061,6 +2107,10 @@ err:
bch2_path_put(trans, new_path, true);
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
+ if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
+ ret = 0;
+ if (!ret)
+ ret = bch2_trans_relock(trans);
return ret;
err_free_update:
bch2_btree_node_free_never_used(as, trans, n);
@@ -2106,12 +2156,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else {
- bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+ ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false);
}
+ if (ret)
+ goto err;
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index baf63e2fddb64..36a6f42aba5e6 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -316,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
write_locked = false;
+
+ ret = lockrestart_do(trans,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_foreground_maybe_merge(trans, iter.path, 0,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc));
+ if (ret)
+ goto err;
}
}
@@ -382,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
ret = commit_do(trans, NULL, NULL,
BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
- BCH_TRANS_COMMIT_no_journal_res|
- BCH_TRANS_COMMIT_journal_reclaim,
+ BCH_TRANS_COMMIT_no_journal_res ,
btree_write_buffered_insert(trans, i));
if (ret)
goto err;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 00aaf4bb51397..f9af5adabe836 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -395,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type)
: "(invalid data type)";
}
-static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
-{
- if (type < BCH_DATA_NR)
- prt_str(out, __bch2_data_types[type]);
- else
- prt_printf(out, "(invalid data type %u)", type);
-}
-
/* disk reservations: */
static inline void bch2_disk_reservation_put(struct bch_fs *c,
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index 4701457f6381c..7ed779b411f61 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
extent_nonce(version, crc_old), bio);
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
- bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
- "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
- __func__,
- crc_old.csum.hi,
- crc_old.csum.lo,
- merged.hi,
- merged.lo,
- bch2_csum_types[crc_old.csum_type],
- bch2_csum_types[new_csum_type]);
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
+ "expected %0llx:%0llx got %0llx:%0llx (old type ",
+ __func__,
+ crc_old.csum.hi,
+ crc_old.csum.lo,
+ merged.hi,
+ merged.lo);
+ bch2_prt_csum_type(&buf, crc_old.csum_type);
+ prt_str(&buf, " new type ");
+ bch2_prt_csum_type(&buf, new_csum_type);
+ prt_str(&buf, ")");
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
return -EIO;
}
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 1b8c2c1016dc6..e40499fde9a40 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
struct bch_csum expected,
struct bch_csum got)
{
- prt_printf(out, "checksum error: got ");
+ prt_str(out, "checksum error, type ");
+ bch2_prt_csum_type(out, type);
+ prt_str(out, ": got ");
bch2_csum_to_text(out, type, got);
prt_str(out, " should be ");
bch2_csum_to_text(out, type, expected);
- prt_printf(out, " type %s", bch2_csum_types[type]);
}
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index 58c2eb45570ff..607fd5e232c90 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
-static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
-{
- if (type < BCH_COMPRESSION_TYPE_NR)
- prt_str(out, __bch2_compression_types[type]);
- else
- prt_printf(out, "(invalid compression type %u)", type);
-}
-
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 082075244e16a..556a217108d32 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -131,29 +131,33 @@ fsck_err:
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
- const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
- unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+ const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
+ struct bch_stripe s = {};
+
+ memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
+
+ unsigned nr_data = s.nr_blocks - s.nr_redundant;
+
+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
+ s.algorithm,
+ le16_to_cpu(s.sectors),
+ nr_data,
+ s.nr_redundant);
+ bch2_prt_csum_type(out, s.csum_type);
+ prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
+
+ for (unsigned i = 0; i < s.nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = sp->ptrs + i;
+
+ if ((void *) ptr >= bkey_val_end(k))
+ break;
+
+ bch2_extent_ptr_to_text(out, c, ptr);
- prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
- s->algorithm,
- le16_to_cpu(s->sectors),
- nr_data,
- s->nr_redundant,
- s->csum_type,
- 1U << s->csum_granularity_bits);
-
- for (i = 0; i < s->nr_blocks; i++) {
- const struct bch_extent_ptr *ptr = s->ptrs + i;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
- if (i < nr_data)
- prt_printf(out, "#%u", stripe_blockcount_get(s, i));
- prt_printf(out, " gen %u", ptr->gen);
- if (ptr_stale(ca, ptr))
- prt_printf(out, " stale");
+ if (s.csum_type < BCH_CSUM_NR &&
+ i < nr_data &&
+ stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
+ prt_printf(out, "#%u", stripe_blockcount_get(sp, i));
}
}
@@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
struct printbuf err = PRINTBUF;
struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
- prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
- want.hi, want.lo,
- got.hi, got.lo,
- bch2_csum_types[v->csum_type]);
+ prt_str(&err, "stripe ");
+ bch2_csum_err_msg(&err, v->csum_type, want, got);
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(ca, "%s", err.buf);
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index f4369b02e805f..f042616888b0a 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
+ EBUG_ON(s->csum_type >= BCH_CSUM_NR);
+
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
return sizeof(struct bch_stripe) +
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 0e3ca99fbd2de..1a331e5392048 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -998,7 +998,9 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
- if (ca && ptr_stale(ca, ptr))
+ if (b >= ca->mi.first_bucket &&
+ b < ca->mi.nbuckets &&
+ ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
}
@@ -1028,11 +1030,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
crc.compressed_size,
crc.uncompressed_size,
- crc.offset, crc.nonce,
- bch2_csum_types[crc.csum_type]);
+ crc.offset, crc.nonce);
+ bch2_prt_csum_type(out, crc.csum_type);
+ prt_str(out, " compress ");
bch2_prt_compression_type(out, crc.compression_type);
break;
}
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index f49e6c0f0f683..b889370a50881 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
+ bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write);
+
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
@@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+ return -EROFS;
+
inode_lock(&inode->v);
ret = generic_write_checks(req, iter);
if (unlikely(ret <= 0))
- goto err;
+ goto err_put_write_ref;
ret = file_remove_privs(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
ret = file_update_time(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
- goto err;
+ goto err_put_write_ref;
inode_dio_begin(&inode->v);
bch2_pagecache_block_get(inode);
@@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
}
ret = bch2_dio_write_loop(dio);
-err:
+out:
if (locked)
inode_unlock(&inode->v);
return ret;
@@ -653,7 +658,9 @@ err_put_bio:
bch2_pagecache_block_put(inode);
bio_put(bio);
inode_dio_end(&inode->v);
- goto err;
+err_put_write_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ goto out;
}
void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 8c70123b6a0c8..20b40477425f4 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
static int bch2_flush_inode(struct bch_fs *c,
struct bch_inode_info *inode)
{
- struct bch_inode_unpacked u;
- int ret;
-
if (c->opts.journal_flush_disabled)
return 0;
- ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
- if (ret)
- return ret;
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+ return -EROFS;
- return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
- bch2_inode_flush_nocow_writes(c, inode);
+ struct bch_inode_unpacked u;
+ int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
+ bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+ bch2_inode_flush_nocow_writes(c, inode);
+ bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+ return ret;
}
int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 725fcf46f6312..9aa28b52ab926 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out,
if (entry) {
prt_str(out, " type=");
- prt_str(out, bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
}
if (!jset) {
@@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
jset_entry_for_each_key(entry, k) {
if (!first) {
prt_newline(out);
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
+ prt_str(out, ": ");
}
prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
@@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
- prt_printf(out, "type=%s v=%llu",
- bch2_fs_usage_types[u->entry.btree_id],
- le64_to_cpu(u->v));
+ prt_str(out, "type=");
+ bch2_prt_fs_usage_type(out, u->entry.btree_id);
+ prt_printf(out, " v=%llu", le64_to_cpu(u->v));
}
static int journal_entry_data_usage_validate(struct bch_fs *c,
@@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c,
void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry *entry)
{
+ bch2_prt_jset_entry_type(out, entry->type);
+
if (entry->type < BCH_JSET_ENTRY_NR) {
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ prt_str(out, ": ");
bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
- } else {
- prt_printf(out, "(unknown type %u)", entry->type);
}
}
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index e1800c4119b5f..bb068fd724656 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -43,7 +43,7 @@ const char * const __bch2_btree_ids[] = {
NULL
};
-const char * const bch2_csum_types[] = {
+static const char * const __bch2_csum_types[] = {
BCH_CSUM_TYPES()
NULL
};
@@ -53,7 +53,7 @@ const char * const bch2_csum_opts[] = {
NULL
};
-const char * const __bch2_compression_types[] = {
+static const char * const __bch2_compression_types[] = {
BCH_COMPRESSION_TYPES()
NULL
};
@@ -83,18 +83,39 @@ const char * const bch2_member_states[] = {
NULL
};
-const char * const bch2_jset_entry_types[] = {
+static const char * const __bch2_jset_entry_types[] = {
BCH_JSET_ENTRY_TYPES()
NULL
};
-const char * const bch2_fs_usage_types[] = {
+static const char * const __bch2_fs_usage_types[] = {
BCH_FS_USAGE_TYPES()
NULL
};
#undef x
+static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
+ unsigned nr, const char *type, unsigned idx)
+{
+ if (idx < nr)
+ prt_str(out, opts[idx]);
+ else
+ prt_printf(out, "(unknown %s %u)", type, idx);
+}
+
+#define PRT_STR_OPT_BOUNDSCHECKED(name, type) \
+void bch2_prt_##name(struct printbuf *out, type t) \
+{ \
+ prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\
+}
+
+PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type);
+PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type);
+PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type);
+PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type);
+PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type);
+
static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 1ac4135cca1c3..84e452835a17d 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const __bch2_btree_ids[];
-extern const char * const bch2_csum_types[];
extern const char * const bch2_csum_opts[];
-extern const char * const __bch2_compression_types[];
extern const char * const bch2_compression_opts[];
extern const char * const bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const __bch2_data_types[];
extern const char * const bch2_member_states[];
-extern const char * const bch2_jset_entry_types[];
-extern const char * const bch2_fs_usage_types[];
extern const char * const bch2_d_types[];
+void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type);
+void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type);
+void bch2_prt_data_type(struct printbuf *, enum bch_data_type);
+void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type);
+void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type);
+
static inline const char *bch2_d_type_str(unsigned d_type)
{
return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index cb501460d6152..0cec0f7d97035 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -44,7 +44,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
set_bit(BCH_FS_may_go_rw, &c->flags);
- if (keys->nr || c->opts.fsck || !c->sb.clean)
+ if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
return bch2_fs_read_write_early(c);
return 0;
}
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index d6f81179c3a29..a98ef940b7a32 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -51,7 +51,10 @@
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
x(btree_subvolume_children, \
BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
- BCH_FSCK_ERR_subvol_children_not_set)
+ BCH_FSCK_ERR_subvol_children_not_set) \
+ x(mi_btree_bitmap, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_btree_bitmap_not_marked)
#define DOWNGRADE_TABLE()
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index d7d609131030a..4ca6e7b0d8aae 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -130,7 +130,7 @@
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
- x(backpointer_pos_wrong, 125) \
+ x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
@@ -270,7 +270,8 @@
x(btree_ptr_v2_min_key_bad, 262) \
x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
- x(dup_backpointer_to_bad_csum_extent, 265)
+ x(dup_backpointer_to_bad_csum_extent, 265) \
+ x(btree_bitmap_not_marked, 266)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index eff5ce18c69c0..522a969345e52 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
#include "disk_groups.h"
#include "opts.h"
#include "replicas.h"
@@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
+
+/*
+ * Per member "range has btree nodes" bitmap:
+ *
+ * This is so that if we ever have to run the btree node scan to repair we don't
+ * have to scan full devices:
+ */
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
+{
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
+ ptr->offset, btree_sectors(c)))
+ return false;
+ return true;
+}
+
+static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
+ u64 start, unsigned sectors)
+{
+ struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
+ u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
+
+ u64 end = start + sectors;
+
+ int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
+ if (resize > 0) {
+ u64 new_bitmap = 0;
+
+ for (unsigned i = 0; i < 64; i++)
+ if (bitmap & BIT_ULL(i))
+ new_bitmap |= BIT_ULL(i >> resize);
+ bitmap = new_bitmap;
+ m->btree_bitmap_shift += resize;
+ }
+
+ for (unsigned bit = sectors >> m->btree_bitmap_shift;
+ bit << m->btree_bitmap_shift < end;
+ bit++)
+ bitmap |= BIT_ULL(bit);
+
+ m->btree_allocated_bitmap = cpu_to_le64(bitmap);
+}
+
+void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index be0a941832715..b27c3e4467cf2 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -3,6 +3,7 @@
#define _BCACHEFS_SB_MEMBERS_H
#include "darray.h"
+#include "bkey_types.h"
extern char * const bch2_member_error_strs[];
@@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
.valid = bch2_member_exists(mi),
+ .btree_bitmap_shift = mi->btree_bitmap_shift,
+ .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
};
}
@@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *);
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *);
+static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
+{
+ u64 end = start + sectors;
+
+ if (end > 64 << ca->mi.btree_bitmap_shift)
+ return false;
+
+ for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift;
+ bit << ca->mi.btree_bitmap_shift < end;
+ bit++)
+ if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
+ return false;
+ return true;
+}
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
+void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
+
#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 5eee055ee2721..08ea3dbbbe97c 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -700,8 +700,11 @@ retry:
return -ENOMEM;
sb->sb_name = kstrdup(path, GFP_KERNEL);
- if (!sb->sb_name)
- return -ENOMEM;
+ if (!sb->sb_name) {
+ ret = -ENOMEM;
+ prt_printf(&err, "error allocating memory for sb_name");
+ goto err;
+ }
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index ec784d975f665..11bcef170c2c2 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -37,6 +37,8 @@ struct bch_member_cpu {
u8 durability;
u8 freespace_initialized;
u8 valid;
+ u8 btree_bitmap_shift;
+ u64 btree_allocated_bitmap;
};
#endif /* _BCACHEFS_SUPER_TYPES_H */
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index b18b0cc81b594..5be92fe3f4ea4 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -25,6 +25,7 @@
#include "ec.h"
#include "inode.h"
#include "journal.h"
+#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
#include "movinggc.h"
@@ -138,6 +139,7 @@ do { \
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_flush);
write_attribute(prune_cache);
write_attribute(btree_wakeup);
rw_attribute(btree_gc_periodic);
@@ -500,7 +502,7 @@ STORE(bch2_fs)
/* Debugging: */
- if (!test_bit(BCH_FS_rw, &c->flags))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_prune_cache) {
@@ -533,6 +535,11 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_invalidates)
bch2_do_invalidates(c);
+ if (attr == &sysfs_trigger_journal_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -553,6 +560,7 @@ STORE(bch2_fs)
size = ret;
}
#endif
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
@@ -651,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_gc,
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
+ &sysfs_trigger_journal_flush,
&sysfs_prune_cache,
&sysfs_btree_wakeup,