aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-07-10 14:17:18 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-07-10 14:27:14 -0400
commit21ae8a4b715acd326e6404ce6409ae329566eb64 (patch)
treec41113a1d64a16ed2872b55b17e73b4a0d4c765b
parent73bf371f4c2b7b5323cef9b6813fc813ac9d385b (diff)
downloadbcachefs-tools-21ae8a4b715acd326e6404ce6409ae329566eb64.tar.gz
Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--include/linux/rhashtable.h1
-rw-r--r--libbcachefs/bcachefs.h17
-rw-r--r--libbcachefs/bcachefs_format.h76
-rw-r--r--libbcachefs/btree_iter.h8
-rw-r--r--libbcachefs/recovery.c42
-rw-r--r--libbcachefs/recovery.h2
-rw-r--r--libbcachefs/subvolume.c237
-rw-r--r--libbcachefs/subvolume.h33
-rw-r--r--libbcachefs/subvolume_types.h2
-rw-r--r--libbcachefs/super-io.c32
-rw-r--r--libbcachefs/super-io.h4
12 files changed, 354 insertions, 102 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 381d6456..87a4c7de 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-fb39031ade476044b4d89e6a8f20de8e025be39c
+070ec8d07bcab34fde39499a79b9da6f4254ec7c
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index c5e717bf..adeef329 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -28,6 +28,7 @@
#include <linux/bit_spinlock.h>
#define BIT(nr) (1UL << (nr))
+#define BIT_ULL(nr) (1ULL << (nr))
#include <linux/rhashtable-types.h>
/*
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 88a1782b..d8c02064 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -660,12 +660,11 @@ enum bch_write_ref {
#define PASS_FSCK BIT(1)
#define PASS_UNCLEAN BIT(2)
#define PASS_ALWAYS BIT(3)
-#define PASS_UPGRADE(v) ((v) << 4)
#define BCH_RECOVERY_PASSES() \
x(alloc_read, PASS_ALWAYS) \
x(stripes_read, PASS_ALWAYS) \
- x(initialize_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \
+ x(initialize_subvolumes, 0) \
x(snapshots_read, PASS_ALWAYS) \
x(check_allocations, PASS_FSCK) \
x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
@@ -677,8 +676,8 @@ enum bch_write_ref {
x(check_extents_to_backpointers,PASS_FSCK) \
x(check_alloc_to_lru_refs, PASS_FSCK) \
x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
- x(bucket_gens_init, PASS_UPGRADE(bcachefs_metadata_version_bucket_gens)) \
- x(fs_upgrade_for_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \
+ x(bucket_gens_init, 0) \
+ x(fs_upgrade_for_subvolumes, 0) \
x(check_snapshot_trees, PASS_FSCK) \
x(check_snapshots, PASS_FSCK) \
x(check_subvols, PASS_FSCK) \
@@ -690,7 +689,7 @@ enum bch_write_ref {
x(check_root, PASS_FSCK) \
x(check_directory_structure, PASS_FSCK) \
x(check_nlinks, PASS_FSCK) \
- x(fix_reflink_p, PASS_UPGRADE(bcachefs_metadata_version_reflink_p_fix)) \
+ x(fix_reflink_p, 0) \
enum bch_recovery_pass {
#define x(n, when) BCH_RECOVERY_PASS_##n,
@@ -1033,6 +1032,8 @@ struct bch_fs {
u64 journal_replay_seq_start;
u64 journal_replay_seq_end;
enum bch_recovery_pass curr_recovery_pass;
+ /* bitmap of explicitly enabled recovery passes: */
+ u64 recovery_passes_explicit;
/* DEBUG JUNK */
struct dentry *fs_debug_dir;
@@ -1177,12 +1178,6 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
return dev < c->sb.nr_devices && c->devs[dev];
}
-static inline bool bch2_version_upgrading_to(const struct bch_fs *c, unsigned new_version)
-{
- return c->sb.version_upgrade_complete < new_version &&
- c->sb.version >= new_version;
-}
-
#define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 78771d8d..6d693e4d 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -1148,6 +1148,8 @@ struct bch_snapshot {
__le32 children[2];
__le32 subvol;
__le32 tree;
+ __le32 depth;
+ __le32 skip[3];
};
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
@@ -1578,32 +1580,60 @@ struct bch_sb_field_journal_seq_blacklist {
#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
-#define BCH_METADATA_VERSIONS() \
- x(bkey_renumber, BCH_VERSION(0, 10)) \
- x(inode_btree_change, BCH_VERSION(0, 11)) \
- x(snapshot, BCH_VERSION(0, 12)) \
- x(inode_backpointers, BCH_VERSION(0, 13)) \
- x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \
- x(snapshot_2, BCH_VERSION(0, 15)) \
- x(reflink_p_fix, BCH_VERSION(0, 16)) \
- x(subvol_dirent, BCH_VERSION(0, 17)) \
- x(inode_v2, BCH_VERSION(0, 18)) \
- x(freespace, BCH_VERSION(0, 19)) \
- x(alloc_v4, BCH_VERSION(0, 20)) \
- x(new_data_types, BCH_VERSION(0, 21)) \
- x(backpointers, BCH_VERSION(0, 22)) \
- x(inode_v3, BCH_VERSION(0, 23)) \
- x(unwritten_extents, BCH_VERSION(0, 24)) \
- x(bucket_gens, BCH_VERSION(0, 25)) \
- x(lru_v2, BCH_VERSION(0, 26)) \
- x(fragmentation_lru, BCH_VERSION(0, 27)) \
- x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \
- x(snapshot_trees, BCH_VERSION(0, 29)) \
- x(major_minor, BCH_VERSION(1, 0))
+#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
+
+#define BCH_METADATA_VERSIONS() \
+ x(bkey_renumber, BCH_VERSION(0, 10), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(inode_btree_change, BCH_VERSION(0, 11), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot, BCH_VERSION(0, 12), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(inode_backpointers, BCH_VERSION(0, 13), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot_2, BCH_VERSION(0, 15), \
+ BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \
+ BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(reflink_p_fix, BCH_VERSION(0, 16), \
+ BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \
+ x(subvol_dirent, BCH_VERSION(0, 17), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(inode_v2, BCH_VERSION(0, 18), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(freespace, BCH_VERSION(0, 19), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(alloc_v4, BCH_VERSION(0, 20), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(new_data_types, BCH_VERSION(0, 21), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(backpointers, BCH_VERSION(0, 22), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(inode_v3, BCH_VERSION(0, 23), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(unwritten_extents, BCH_VERSION(0, 24), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(bucket_gens, BCH_VERSION(0, 25), \
+ BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(lru_v2, BCH_VERSION(0, 26), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(fragmentation_lru, BCH_VERSION(0, 27), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot_trees, BCH_VERSION(0, 29), \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(major_minor, BCH_VERSION(1, 0), \
+ 0) \
+ x(snapshot_skiplists, BCH_VERSION(1, 1), \
+ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
-#define x(t, n) bcachefs_metadata_version_##t = n,
+#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n,
BCH_METADATA_VERSIONS()
#undef x
bcachefs_metadata_version_max
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 0e9c1cb9..c472aa8c 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -795,6 +795,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
+#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \
+ _start, _iter_flags, _k, \
+ _disk_res, _journal_seq, _commit_flags,\
+ _do) \
+ for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_commit_flags)))
+
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
_start, _end, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 47adb667..b04e1565 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -594,10 +594,21 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
unsigned iter_flags =
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS;
+ unsigned update_flags = BTREE_TRIGGER_NORUN;
int ret;
+ /*
+ * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
+ * keep the key cache coherent with the underlying btree. Nothing
+ * besides the allocator is doing updates yet so we don't need key cache
+ * coherency for non-alloc btrees, and key cache fills for snapshots
+ * btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until
+ * the snapshots recovery pass runs.
+ */
if (!k->level && k->btree_id == BTREE_ID_alloc)
iter_flags |= BTREE_ITER_CACHED;
+ else
+ update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
@@ -610,7 +621,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
if (k->overwritten)
goto out;
- ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
+ ret = bch2_trans_update(trans, &iter, k->k, update_flags);
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -1115,6 +1126,7 @@ static void check_version_upgrade(struct bch_fs *c)
unsigned latest_version = bcachefs_metadata_version_current;
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
+ u64 recovery_passes;
if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@@ -1161,12 +1173,15 @@ static void check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, new_version);
prt_newline(&buf);
- prt_str(&buf, "fsck required");
+ recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
+ if (recovery_passes) {
+ prt_str(&buf, "fsck required");
- bch_info(c, "%s", buf.buf);
+ c->recovery_passes_explicit |= recovery_passes;
+ c->opts.fix_errors = FSCK_OPT_YES;
+ }
- c->opts.fsck = true;
- c->opts.fix_errors = FSCK_OPT_YES;
+ bch_info(c, "%s", buf.buf);
mutex_lock(&c->sb_lock);
bch2_sb_upgrade(c, new_version);
@@ -1199,21 +1214,30 @@ static struct recovery_pass_fn recovery_passes[] = {
#undef x
};
+u64 bch2_fsck_recovery_passes(void)
+{
+ u64 ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_passes); i++)
+ if (recovery_passes[i].when & PASS_FSCK)
+ ret |= BIT_ULL(i);
+ return ret;
+}
+
static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
struct recovery_pass_fn *p = recovery_passes + c->curr_recovery_pass;
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
return false;
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return true;
if ((p->when & PASS_FSCK) && c->opts.fsck)
return true;
if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
return true;
if (p->when & PASS_ALWAYS)
return true;
- if (p->when >= PASS_UPGRADE(0) &&
- bch2_version_upgrading_to(c, p->when >> 4))
- return true;
return false;
}
@@ -1297,7 +1321,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (!c->opts.nochanges)
+ if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery))
check_version_upgrade(c);
if (c->opts.fsck && c->opts.norecovery) {
diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h
index 8c0348e8..f8e796c0 100644
--- a/libbcachefs/recovery.h
+++ b/libbcachefs/recovery.h
@@ -52,6 +52,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct bch_fs *);
+u64 bch2_fsck_recovery_passes(void);
+
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index f3852c43..f118e585 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -8,8 +8,41 @@
#include "fs.h"
#include "subvolume.h"
+#include <linux/random.h>
+
static int bch2_subvolume_delete(struct btree_trans *, u32);
+static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ struct snapshot_t *s = snapshot_t(c, id);
+
+ if (s->skip[2] <= ancestor)
+ return s->skip[2];
+ if (s->skip[1] <= ancestor)
+ return s->skip[1];
+ if (s->skip[0] <= ancestor)
+ return s->skip[0];
+ return s->parent;
+}
+
+bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
+
+ while (id && id < ancestor)
+ id = get_ancestor_below(c, id, ancestor);
+
+ return id == ancestor;
+}
+
+static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ while (id && id < ancestor)
+ id = snapshot_t(c, id)->parent;
+
+ return id == ancestor;
+}
+
/* Snapshot tree: */
void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
@@ -140,6 +173,25 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
}
}
+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
+ if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
+ le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
+ prt_printf(err, "skiplist not normalized");
+ return -BCH_ERR_invalid_bkey;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
+ id = le32_to_cpu(s.v->skip[i]);
+
+ if (!id != !s.v->parent ||
+ (s.v->parent &&
+ id <= k.k->p.offset)) {
+ prt_printf(err, "bad skiplist node %u)", id);
+ return -BCH_ERR_invalid_bkey;
+ }
+ }
+ }
+
return 0;
}
@@ -161,10 +213,17 @@ int bch2_mark_snapshot(struct btree_trans *trans,
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
t->parent = le32_to_cpu(s.v->parent);
+ t->skip[0] = le32_to_cpu(s.v->skip[0]);
+ t->skip[1] = le32_to_cpu(s.v->skip[1]);
+ t->skip[2] = le32_to_cpu(s.v->skip[2]);
+ t->depth = le32_to_cpu(s.v->depth);
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
t->tree = le32_to_cpu(s.v->tree);
+
+ if (BCH_SNAPSHOT_DELETED(s.v))
+ set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
} else {
t->parent = 0;
t->children[0] = 0;
@@ -370,9 +429,9 @@ static int check_snapshot_tree(struct btree_trans *trans,
"snapshot tree points to missing subvolume:\n %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
- fsck_err_on(!bch2_snapshot_is_ancestor(c,
- le32_to_cpu(subvol.snapshot),
- root_id), c,
+ fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
+ le32_to_cpu(subvol.snapshot),
+ root_id), c,
"snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
@@ -441,7 +500,48 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
if (ret)
return ret;
- return bch2_snapshot_is_ancestor(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
+ return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
+}
+
+static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
+{
+ struct snapshot_t *s;
+
+ if (!id)
+ return 0;
+
+ s = snapshot_t(c, id);
+ if (!s->parent)
+ return id;
+
+ return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
+}
+
+static int snapshot_rand_ancestor_good(struct btree_trans *trans,
+ struct bch_snapshot s)
+{
+ struct bch_snapshot a;
+ unsigned i;
+ int ret;
+
+ for (i = 0; i < 3; i++) {
+ if (!s.parent != !s.skip[i])
+ return false;
+
+ if (!s.parent)
+ continue;
+
+ ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a);
+ if (bch2_err_matches(ret, ENOENT))
+ return false;
+ if (ret)
+ return ret;
+
+ if (a.tree != s.tree)
+ return false;
+ }
+
+ return true;
}
/*
@@ -451,14 +551,15 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
*/
static int snapshot_tree_ptr_repair(struct btree_trans *trans,
struct btree_iter *iter,
- struct bkey_s_c_snapshot *s)
+ struct bkey_s_c k,
+ struct bch_snapshot *s)
{
struct bch_fs *c = trans->c;
struct btree_iter root_iter;
struct bch_snapshot_tree s_t;
struct bkey_s_c_snapshot root;
struct bkey_i_snapshot *u;
- u32 root_id = bch2_snapshot_root(c, s->k->p.offset), tree_id;
+ u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
int ret;
root = bch2_bkey_get_iter_typed(trans, &root_iter,
@@ -484,18 +585,18 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
goto err;
u->v.tree = cpu_to_le32(tree_id);
- if (s->k->p.snapshot == root_id)
- *s = snapshot_i_to_s_c(u);
+ if (k.k->p.offset == root_id)
+ *s = u->v;
}
- if (s->k->p.snapshot != root_id) {
- u = bch2_bkey_make_mut_typed(trans, iter, &s->s_c, 0, snapshot);
+ if (k.k->p.offset != root_id) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
u->v.tree = cpu_to_le32(tree_id);
- *s = snapshot_i_to_s_c(u);
+ *s = u->v;
}
err:
bch2_trans_iter_exit(trans, &root_iter);
@@ -507,9 +608,14 @@ static int check_snapshot(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c_snapshot s;
+ struct bch_snapshot s;
struct bch_subvolume subvol;
struct bch_snapshot v;
+ struct bkey_i_snapshot *u;
+ u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
+ struct snapshot_t *parent = parent_id
+ ? snapshot_t(c, parent_id)
+ : NULL;
struct printbuf buf = PRINTBUF;
bool should_have_subvol;
u32 i, id;
@@ -518,94 +624,119 @@ static int check_snapshot(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_snapshot)
return 0;
- s = bkey_s_c_to_snapshot(k);
- id = le32_to_cpu(s.v->parent);
+ memset(&s, 0, sizeof(s));
+ memcpy(&s, k.v, bkey_val_bytes(k.k));
+
+ id = le32_to_cpu(s.parent);
if (id) {
ret = snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot with nonexistent parent:\n %s",
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret)
goto err;
- if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
- le32_to_cpu(v.children[1]) != s.k->p.offset) {
+ if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
+ le32_to_cpu(v.children[1]) != k.k->p.offset) {
bch_err(c, "snapshot parent %u missing pointer to child %llu",
- id, s.k->p.offset);
+ id, k.k->p.offset);
ret = -EINVAL;
goto err;
}
}
- for (i = 0; i < 2 && s.v->children[i]; i++) {
- id = le32_to_cpu(s.v->children[i]);
+ for (i = 0; i < 2 && s.children[i]; i++) {
+ id = le32_to_cpu(s.children[i]);
ret = snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot node %llu has nonexistent child %u",
- s.k->p.offset, id);
+ k.k->p.offset, id);
if (ret)
goto err;
- if (le32_to_cpu(v.parent) != s.k->p.offset) {
+ if (le32_to_cpu(v.parent) != k.k->p.offset) {
bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
- id, le32_to_cpu(v.parent), s.k->p.offset);
+ id, le32_to_cpu(v.parent), k.k->p.offset);
ret = -EINVAL;
goto err;
}
}
- should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) &&
- !BCH_SNAPSHOT_DELETED(s.v);
+ should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+ !BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
- id = le32_to_cpu(s.v->subvol);
+ id = le32_to_cpu(s.subvol);
ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret)
goto err;
- if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
+ if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
- s.k->p.offset);
+ k.k->p.offset);
ret = -EINVAL;
goto err;
}
} else {
- if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s",
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u));
-
+ if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
- bkey_reassemble(&u->k_i, s.s_c);
u->v.subvol = 0;
- ret = bch2_trans_update(trans, iter, &u->k_i, 0);
- if (ret)
- goto err;
-
- s = snapshot_i_to_s_c(u);
+ s = u->v;
}
}
- ret = snapshot_tree_ptr_good(trans, s.k->p.offset, le32_to_cpu(s.v->tree));
+ ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
if (ret < 0)
goto err;
if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = snapshot_tree_ptr_repair(trans, iter, &s);
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
if (ret)
goto err;
}
ret = 0;
- if (BCH_SNAPSHOT_DELETED(s.v))
- set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
+ if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c,
+ "snapshot with incorrect depth fields, should be %u:\n %s",
+ parent->depth + 1,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0);
+ s = u->v;
+ }
+
+ ret = snapshot_rand_ancestor_good(trans, s);
+ if (ret < 0)
+ goto err;
+
+ if (fsck_err_on(!ret, c, "snapshot with bad rand_ancestor field:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
+ u->v.skip[i] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent_id));
+
+ bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_int);
+ s = u->v;
+ }
+ ret = 0;
err:
fsck_err:
printbuf_exit(&buf);
@@ -618,9 +749,13 @@ int bch2_check_snapshots(struct bch_fs *c)
struct bkey_s_c k;
int ret;
+ /*
+ * We iterate backwards as checking/fixing the depth field requires that
+ * the parent's depth already be correct:
+ */
ret = bch2_trans_run(c,
- for_each_btree_key_commit(&trans, iter,
- BTREE_ID_snapshots, POS_MIN,
+ for_each_btree_key_reverse_commit(&trans, iter,
+ BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot(&trans, &iter, k)));
@@ -847,10 +982,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
u32 *snapshot_subvols,
unsigned nr_snapids)
{
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_i_snapshot *n;
struct bkey_s_c k;
- unsigned i;
+ unsigned i, j;
+ u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
@@ -880,6 +1017,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
n->v.parent = cpu_to_le32(parent);
n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
n->v.tree = cpu_to_le32(tree);
+ n->v.depth = cpu_to_le32(depth);
+
+ for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
+ n->v.skip[j] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent));
+
+ bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_int);
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h
index daa9a6b0..ab0b4a6d 100644
--- a/libbcachefs/subvolume.h
+++ b/libbcachefs/subvolume.h
@@ -37,9 +37,34 @@ static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
return genradix_ptr(&c->snapshots, U32_MAX - id);
}
+static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
+{
+ return snapshot_t(c, id)->parent;
+}
+
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ u32 parent = snapshot_t(c, id)->parent;
+
+ if (parent &&
+ snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
+ panic("id %u depth=%u parent %u depth=%u\n",
+ id, snapshot_t(c, id)->depth,
+ parent, snapshot_t(c, parent)->depth);
+
+ return parent;
+#else
return snapshot_t(c, id)->parent;
+#endif
+}
+
+static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
+{
+ while (n--)
+ id = bch2_snapshot_parent(c, id);
+
+ return id;
}
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
@@ -84,13 +109,7 @@ static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
return 0;
}
-static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
-{
- while (id && id < ancestor)
- id = bch2_snapshot_parent(c, id);
-
- return id == ancestor;
-}
+bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{
diff --git a/libbcachefs/subvolume_types.h b/libbcachefs/subvolume_types.h
index c6c1cbad..750d975a 100644
--- a/libbcachefs/subvolume_types.h
+++ b/libbcachefs/subvolume_types.h
@@ -8,6 +8,8 @@ typedef DARRAY(u32) snapshot_id_list;
struct snapshot_t {
u32 parent;
+ u32 skip[3];
+ u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 1ac36060..1b5bc4eb 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -4,6 +4,7 @@
#include "btree_update_interior.h"
#include "buckets.h"
#include "checksum.h"
+#include "counters.h"
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
@@ -12,24 +13,29 @@
#include "journal_io.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
+#include "recovery.h"
#include "replicas.h"
#include "quota.h"
#include "super-io.h"
#include "super.h"
#include "trace.h"
#include "vstructs.h"
-#include "counters.h"
#include <linux/backing-dev.h>
#include <linux/sort.h>
-struct bch2_metadata_version_str {
+struct bch2_metadata_version {
u16 version;
const char *name;
+ u64 recovery_passes;
};
-static const struct bch2_metadata_version_str bch2_metadata_versions[] = {
-#define x(n, v) { .version = v, .name = #n },
+static const struct bch2_metadata_version bch2_metadata_versions[] = {
+#define x(n, v, _recovery_passes) { \
+ .version = v, \
+ .name = #n, \
+ .recovery_passes = _recovery_passes, \
+},
BCH_METADATA_VERSIONS()
#undef x
};
@@ -61,6 +67,24 @@ unsigned bch2_latest_compatible_version(unsigned v)
return v;
}
+u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
+ unsigned old_version,
+ unsigned new_version)
+{
+ u64 ret = 0;
+
+ for (const struct bch2_metadata_version *i = bch2_metadata_versions;
+ i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
+ i++)
+ if (i->version > old_version && i->version <= new_version) {
+ if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
+ ret |= bch2_fsck_recovery_passes();
+ ret |= i->recovery_passes;
+ }
+
+ return ret &= ~RECOVERY_PASS_ALL_FSCK;
+}
+
const char * const bch2_sb_fields[] = {
#define x(name, nr) #name,
BCH_SB_FIELDS()
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
index b365f088..904adea6 100644
--- a/libbcachefs/super-io.h
+++ b/libbcachefs/super-io.h
@@ -18,6 +18,10 @@ static inline bool bch2_version_compatible(u16 version)
void bch2_version_to_text(struct printbuf *, unsigned);
unsigned bch2_latest_compatible_version(unsigned);
+u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
+ unsigned,
+ unsigned);
+
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
enum bch_sb_field_type, unsigned);