aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-03 11:35:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-03 11:35:48 -0800
commit981d04137a4b5ea95133572bdb3d888c9b515850 (patch)
tree8bae477aeae1ec7d281b700c33a3d2db0dc30b0c
parent610a9b8f49fbcf1100716370d3b5f6f884a2835a (diff)
parent0d72ab35a925d66b044cb62b709e53141c3f0143 (diff)
downloadmisc-981d04137a4b5ea95133572bdb3d888c9b515850.tar.gz
Merge tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs from Kent Overstreet: "More bcachefs bugfixes for 6.7, and forwards compatibility work: - fix for a nasty extents + snapshot interaction, reported when reflink of a snapshotted file wouldn't complete but turned out to be a more general bug - fix for an invalid free in dio write path when iov vector was longer than our inline vector - fix for a buffer overflow in the nocow write path - BCH_REPLICAS_MAX doesn't actually limit the number of pointers in an extent when cached pointers are included - RO snapshots are actually RO now - And, a new superblock section to avoid future breakage when the disk space acounting rewrite rolls out: the new superblock section describes versions that need work to downgrade, where the work required is a list of recovery passes and errors to silently fix" * tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs: bcachefs: make RO snapshots actually RO bcachefs: bch_sb_field_downgrade bcachefs: bch_sb.recovery_passes_required bcachefs: Add persistent identifiers for recovery passes bcachefs: prt_bitflags_vector() bcachefs: move BCH_SB_ERRS() to sb-errors_types.h bcachefs: fix buffer overflow in nocow write path bcachefs: DARRAY_PREALLOCATED() bcachefs: Switch darray to kvmalloc() bcachefs: Factor out darray resize slowpath bcachefs: fix setting version_upgrade_complete bcachefs: fix invalid free in dio write path bcachefs: Fix extents iteration + snapshots interaction
-rw-r--r--fs/bcachefs/Makefile2
-rw-r--r--fs/bcachefs/acl.c3
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bcachefs_format.h51
-rw-r--r--fs/bcachefs/btree_iter.c35
-rw-r--r--fs/bcachefs/darray.c24
-rw-r--r--fs/bcachefs/darray.h48
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/error.c3
-rw-r--r--fs/bcachefs/fs-io-direct.c13
-rw-r--r--fs/bcachefs/fs-ioctl.c12
-rw-r--r--fs/bcachefs/fs.c38
-rw-r--r--fs/bcachefs/io_write.c82
-rw-r--r--fs/bcachefs/printbuf.c22
-rw-r--r--fs/bcachefs/printbuf.h2
-rw-r--r--fs/bcachefs/recovery.c137
-rw-r--r--fs/bcachefs/recovery.h3
-rw-r--r--fs/bcachefs/recovery_types.h86
-rw-r--r--fs/bcachefs/sb-clean.c2
-rw-r--r--fs/bcachefs/sb-downgrade.c188
-rw-r--r--fs/bcachefs/sb-downgrade.h10
-rw-r--r--fs/bcachefs/sb-errors.c6
-rw-r--r--fs/bcachefs/sb-errors.h253
-rw-r--r--fs/bcachefs/sb-errors_types.h253
-rw-r--r--fs/bcachefs/subvolume.c18
-rw-r--r--fs/bcachefs/subvolume.h3
-rw-r--r--fs/bcachefs/super-io.c86
-rw-r--r--fs/bcachefs/super-io.h12
-rw-r--r--fs/bcachefs/util.h1
-rw-r--r--fs/bcachefs/xattr.c3
30 files changed, 977 insertions, 423 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 45b64f89258c2e..b8126841817448 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -28,6 +28,7 @@ bcachefs-y := \
clock.o \
compress.o \
counters.o \
+ darray.o \
debug.o \
dirent.o \
disk_groups.o \
@@ -70,6 +71,7 @@ bcachefs-y := \
reflink.o \
replicas.o \
sb-clean.o \
+ sb-downgrade.o \
sb-errors.o \
sb-members.o \
siphash.o \
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index f3809897f00a7d..3640f417cce118 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -366,7 +366,8 @@ retry:
bch2_trans_begin(trans);
acl = _acl;
- ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
+ ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
+ bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index dfa22f9d9a1d34..b62737fdf5abce 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -737,6 +737,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
+ unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
} sb;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 1ab1f08d763b02..fe78e87603fcf3 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1207,19 +1207,21 @@ struct bch_sb_field {
};
#define BCH_SB_FIELDS() \
- x(journal, 0) \
- x(members_v1, 1) \
- x(crypt, 2) \
- x(replicas_v0, 3) \
- x(quota, 4) \
- x(disk_groups, 5) \
- x(clean, 6) \
- x(replicas, 7) \
- x(journal_seq_blacklist, 8) \
- x(journal_v2, 9) \
- x(counters, 10) \
- x(members_v2, 11) \
- x(errors, 12)
+ x(journal, 0) \
+ x(members_v1, 1) \
+ x(crypt, 2) \
+ x(replicas_v0, 3) \
+ x(quota, 4) \
+ x(disk_groups, 5) \
+ x(clean, 6) \
+ x(replicas, 7) \
+ x(journal_seq_blacklist, 8) \
+ x(journal_v2, 9) \
+ x(counters, 10) \
+ x(members_v2, 11) \
+ x(errors, 12) \
+ x(ext, 13) \
+ x(downgrade, 14)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@@ -1631,6 +1633,24 @@ struct bch_sb_field_errors {
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
+struct bch_sb_field_ext {
+ struct bch_sb_field field;
+ __le64 recovery_passes_required[2];
+ __le64 errors_silent[8];
+};
+
+struct bch_sb_field_downgrade_entry {
+ __le16 version;
+ __le64 recovery_passes[2];
+ __le16 nr_errors;
+ __le16 errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+ struct bch_sb_field field;
+ struct bch_sb_field_downgrade_entry entries[];
+};
+
/* Superblock: */
/*
@@ -1644,6 +1664,11 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
+/*
+ * field 1: version name
+ * field 2: BCH_VERSION(major, minor)
+ * field 3: recovery passess required on upgrade
+ */
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10), \
RECOVERY_PASS_ALL_FSCK) \
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 6be79129738d0f..da594e0067697c 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2085,18 +2085,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
goto out_no_locked;
/*
- * iter->pos should be mononotically increasing, and always be
- * equal to the key we just returned - except extents can
- * straddle iter->pos:
+ * We need to check against @end before FILTER_SNAPSHOTS because
+ * if we get to a different inode that requested we might be
+ * seeing keys for a different snapshot tree that will all be
+ * filtered out.
+ *
+ * But we can't do the full check here, because bkey_start_pos()
+ * isn't monotonically increasing before FILTER_SNAPSHOTS, and
+ * that's what we check against in extents mode:
*/
- if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
- iter_pos = k.k->p;
- else
- iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
-
- if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
- ? bkey_gt(iter_pos, end)
- : bkey_ge(iter_pos, end)))
+ if (k.k->p.inode > end.inode)
goto end;
if (iter->update_path &&
@@ -2155,6 +2153,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
continue;
}
+ /*
+ * iter->pos should be mononotically increasing, and always be
+ * equal to the key we just returned - except extents can
+ * straddle iter->pos:
+ */
+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+ iter_pos = k.k->p;
+ else
+ iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
+
+ if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? bkey_gt(iter_pos, end)
+ : bkey_ge(iter_pos, end)))
+ goto end;
+
break;
}
diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c
new file mode 100644
index 00000000000000..ac35b8b705ae1c
--- /dev/null
+++ b/fs/bcachefs/darray.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include "darray.h"
+
+int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
+{
+ if (new_size > d->size) {
+ new_size = roundup_pow_of_two(new_size);
+
+ void *data = kvmalloc_array(new_size, element_size, gfp);
+ if (!data)
+ return -ENOMEM;
+
+ memcpy(data, d->data, d->size * element_size);
+ if (d->data != d->preallocated)
+ kvfree(d->data);
+ d->data = data;
+ d->size = new_size;
+ }
+
+ return 0;
+}
diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h
index 87b4b2d1ec766f..e367c625f057c2 100644
--- a/fs/bcachefs/darray.h
+++ b/fs/bcachefs/darray.h
@@ -8,39 +8,48 @@
* Inspired by CCAN's darray
*/
-#include "util.h"
#include <linux/slab.h>
-#define DARRAY(type) \
+#define DARRAY_PREALLOCATED(_type, _nr) \
struct { \
size_t nr, size; \
- type *data; \
+ _type *data; \
+ _type preallocated[_nr]; \
}
-typedef DARRAY(void) darray_void;
+#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
-static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
+typedef DARRAY(char) darray_char;
+
+int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
+
+static inline int __darray_resize(darray_char *d, size_t element_size,
+ size_t new_size, gfp_t gfp)
{
- if (d->nr + more > d->size) {
- size_t new_size = roundup_pow_of_two(d->nr + more);
- void *data = krealloc_array(d->data, new_size, t_size, gfp);
+ return unlikely(new_size > d->size)
+ ? __bch2_darray_resize(d, element_size, new_size, gfp)
+ : 0;
+}
- if (!data)
- return -ENOMEM;
+#define darray_resize_gfp(_d, _new_size, _gfp) \
+ unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))
- d->data = data;
- d->size = new_size;
- }
+#define darray_resize(_d, _new_size) \
+ darray_resize_gfp(_d, _new_size, GFP_KERNEL)
- return 0;
+static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
+{
+ return __darray_resize(d, t_size, d->nr + more, gfp);
}
#define darray_make_room_gfp(_d, _more, _gfp) \
- __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
+ __darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
#define darray_make_room(_d, _more) \
darray_make_room_gfp(_d, _more, GFP_KERNEL)
+#define darray_room(_d) ((_d).size - (_d).nr)
+
#define darray_top(_d) ((_d).data[(_d).nr])
#define darray_push_gfp(_d, _item, _gfp) \
@@ -80,13 +89,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
#define darray_init(_d) \
do { \
- (_d)->data = NULL; \
- (_d)->nr = (_d)->size = 0; \
+ (_d)->nr = 0; \
+ (_d)->size = ARRAY_SIZE((_d)->preallocated); \
+ (_d)->data = (_d)->size ? (_d)->preallocated : NULL; \
} while (0)
#define darray_exit(_d) \
do { \
- kfree((_d)->data); \
+ if (!ARRAY_SIZE((_d)->preallocated) || \
+ (_d)->data != (_d)->preallocated) \
+ kvfree((_d)->data); \
darray_init(_d); \
} while (0)
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index ae7910bf2228c4..9ce29681eec963 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -95,6 +95,7 @@
x(ENOSPC, ENOSPC_sb_members) \
x(ENOSPC, ENOSPC_sb_members_v2) \
x(ENOSPC, ENOSPC_sb_crypt) \
+ x(ENOSPC, ENOSPC_sb_downgrade) \
x(ENOSPC, ENOSPC_btree_slot) \
x(ENOSPC, ENOSPC_snapshot_tree) \
x(ENOENT, ENOENT_bkey_type_mismatch) \
@@ -218,6 +219,8 @@
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
x(BCH_ERR_invalid_sb, invalid_sb_errors) \
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
+ x(BCH_ERR_invalid_sb, invalid_sb_ext) \
+ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 7b28d37922fd0e..25cf78a7b946b2 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c,
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
+ if (test_bit(err, c->sb.errors_silent))
+ return -BCH_ERR_fsck_fix;
+
bch2_sb_error_count(c, err);
va_start(args, fmt);
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 9a479e4de6b36a..84e20c3ada6cbb 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -216,11 +216,11 @@ struct dio_write {
struct address_space *mapping;
struct bch_inode_info *inode;
struct mm_struct *mm;
+ const struct iovec *iov;
unsigned loop:1,
extending:1,
sync:1,
- flush:1,
- free_iov:1;
+ flush:1;
struct quota_res quota_res;
u64 written;
@@ -312,12 +312,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
return -1;
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
- iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
+ dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
GFP_KERNEL);
if (unlikely(!iov))
return -ENOMEM;
-
- dio->free_iov = true;
}
memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
@@ -381,8 +379,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
bch2_pagecache_block_put(inode);
- if (dio->free_iov)
- kfree(dio->iter.__iov);
+ kfree(dio->iov);
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
@@ -626,11 +623,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
dio->mapping = mapping;
dio->inode = inode;
dio->mm = current->mm;
+ dio->iov = NULL;
dio->loop = false;
dio->extending = extending;
dio->sync = is_sync_kiocb(req) || extending;
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
- dio->free_iov = false;
dio->quota_res.sectors = 0;
dio->written = 0;
dio->iter = *iter;
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index a70b7a03057d7f..14d5cc6f90d7dd 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -100,7 +100,8 @@ static int bch2_ioc_setflags(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
- ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
+ ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
@@ -183,13 +184,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
- ret = bch2_set_projid(c, inode, fa.fsx_projid);
- if (ret)
- goto err_unlock;
-
- ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+ ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ bch2_set_projid(c, inode, fa.fsx_projid) ?:
+ bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
-err_unlock:
mutex_unlock(&inode->ei_update_lock);
err:
inode_unlock(&inode->v);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index ba93e32d7708e6..49da8db1d9e9fc 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -258,7 +258,8 @@ __bch2_create(struct mnt_idmap *idmap,
retry:
bch2_trans_begin(trans);
- ret = bch2_create_trans(trans,
+ ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
+ bch2_create_trans(trans,
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
? &dentry->d_name : NULL,
@@ -430,7 +431,9 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
lockdep_assert_held(&inode->v.i_rwsem);
- ret = __bch2_link(c, inode, dir, dentry);
+ ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ __bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
return ret;
@@ -481,7 +484,11 @@ err:
static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
{
- return __bch2_unlink(vdir, dentry, false);
+ struct bch_inode_info *dir= to_bch_ei(vdir);
+ struct bch_fs *c = dir->v.i_sb->s_fs_info;
+
+ return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ __bch2_unlink(vdir, dentry, false);
}
static int bch2_symlink(struct mnt_idmap *idmap,
@@ -562,6 +569,11 @@ static int bch2_rename2(struct mnt_idmap *idmap,
src_inode,
dst_inode);
+ ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
+ bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
+ if (ret)
+ goto err;
+
if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
ret = bch2_fs_quota_transfer(c, src_inode,
dst_dir->ei_qid,
@@ -783,11 +795,13 @@ static int bch2_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret;
lockdep_assert_held(&inode->v.i_rwsem);
- ret = setattr_prepare(idmap, dentry, iattr);
+ ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ setattr_prepare(idmap, dentry, iattr);
if (ret)
return ret;
@@ -1010,12 +1024,26 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
return bch2_err_class(ret);
}
+static int bch2_open(struct inode *vinode, struct file *file)
+{
+ if (file->f_flags & (O_WRONLY|O_RDWR)) {
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
+ if (ret)
+ return ret;
+ }
+
+ return generic_file_open(vinode, file);
+}
+
static const struct file_operations bch_file_operations = {
+ .open = bch2_open,
.llseek = bch2_llseek,
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
- .open = generic_file_open,
.fsync = bch2_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 8ede46b1e35463..8c8cb1541ac924 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done)
bch2_write_done(cl);
}
+struct bucket_to_lock {
+ struct bpos b;
+ unsigned gen;
+ struct nocow_lock_bucket *l;
+};
+
static void bch2_nocow_write(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
@@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op)
struct bkey_s_c k;
struct bkey_ptrs_c ptrs;
const struct bch_extent_ptr *ptr;
- struct {
- struct bpos b;
- unsigned gen;
- struct nocow_lock_bucket *l;
- } buckets[BCH_REPLICAS_MAX];
- unsigned nr_buckets = 0;
+ DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
+ struct bucket_to_lock *i;
u32 snapshot;
- int ret, i;
+ struct bucket_to_lock *stale_at;
+ int ret;
if (op->flags & BCH_WRITE_MOVE)
return;
+ darray_init(&buckets);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
@@ -1250,7 +1254,7 @@ retry:
while (1) {
struct bio *bio = &op->wbio.bio;
- nr_buckets = 0;
+ buckets.nr = 0;
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
@@ -1263,26 +1267,26 @@ retry:
break;
if (bch2_keylist_realloc(&op->insert_keys,
- op->inline_keys,
- ARRAY_SIZE(op->inline_keys),
- k.k->u64s))
+ op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ k.k->u64s))
break;
/* Get iorefs before dropping btree locks: */
ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) {
- buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
- buckets[nr_buckets].gen = ptr->gen;
- buckets[nr_buckets].l =
- bucket_nocow_lock(&c->nocow_locks,
- bucket_to_u64(buckets[nr_buckets].b));
-
- prefetch(buckets[nr_buckets].l);
+ struct bpos b = PTR_BUCKET_POS(c, ptr);
+ struct nocow_lock_bucket *l =
+ bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
+ prefetch(l);
if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
goto err_get_ioref;
- nr_buckets++;
+ /* XXX allocating memory with btree locks held - rare */
+ darray_push_gfp(&buckets, ((struct bucket_to_lock) {
+ .b = b, .gen = ptr->gen, .l = l,
+ }), GFP_KERNEL|__GFP_NOFAIL);
if (ptr->unwritten)
op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
@@ -1296,21 +1300,21 @@ retry:
if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
- for (i = 0; i < nr_buckets; i++) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
- struct nocow_lock_bucket *l = buckets[i].l;
- bool stale;
+ darray_for_each(buckets, i) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);
- __bch2_bucket_nocow_lock(&c->nocow_locks, l,
- bucket_to_u64(buckets[i].b),
+ __bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
+ bucket_to_u64(i->b),
BUCKET_NOCOW_LOCK_UPDATE);
rcu_read_lock();
- stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
+ bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
rcu_read_unlock();
- if (unlikely(stale))
+ if (unlikely(stale)) {
+ stale_at = i;
goto err_bucket_stale;
+ }
}
bio = &op->wbio.bio;
@@ -1346,15 +1350,14 @@ err:
if (ret) {
bch_err_inum_offset_ratelimited(c,
- op->pos.inode,
- op->pos.offset << 9,
- "%s: btree lookup error %s",
- __func__, bch2_err_str(ret));
+ op->pos.inode, op->pos.offset << 9,
+ "%s: btree lookup error %s", __func__, bch2_err_str(ret));
op->error = ret;
op->flags |= BCH_WRITE_DONE;
}
bch2_trans_put(trans);
+ darray_exit(&buckets);
/* fallback to cow write path? */
if (!(op->flags & BCH_WRITE_DONE)) {
@@ -1374,24 +1377,21 @@ err:
}
return;
err_get_ioref:
- for (i = 0; i < nr_buckets; i++)
- percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
+ darray_for_each(buckets, i)
+ percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);
/* Fall back to COW path: */
goto out;
err_bucket_stale:
- while (i >= 0) {
- bch2_bucket_nocow_unlock(&c->nocow_locks,
- buckets[i].b,
- BUCKET_NOCOW_LOCK_UPDATE);
- --i;
+ darray_for_each(buckets, i) {
+ bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE);
+ if (i == stale_at)
+ break;
}
- for (i = 0; i < nr_buckets; i++)
- percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
/* We can retry this: */
ret = -BCH_ERR_transaction_restart;
- goto out;
+ goto err_get_ioref;
}
static void __bch2_write(struct bch_write_op *op)
diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c
index 5e653eb81d54f8..accf246c323309 100644
--- a/fs/bcachefs/printbuf.c
+++ b/fs/bcachefs/printbuf.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: LGPL-2.1+
/* Copyright (C) 2022 Kent Overstreet */
+#include <linux/bitmap.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/kernel.h>
@@ -423,3 +424,24 @@ void bch2_prt_bitflags(struct printbuf *out,
flags ^= BIT_ULL(bit);
}
}
+
+void bch2_prt_bitflags_vector(struct printbuf *out,
+ const char * const list[],
+ unsigned long *v, unsigned nr)
+{
+ bool first = true;
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ if (!list[i]) {
+ nr = i - 1;
+ break;
+ }
+
+ for_each_set_bit(i, v, nr) {
+ if (!first)
+ bch2_prt_printf(out, ",");
+ first = false;
+ bch2_prt_printf(out, "%s", list[i]);
+ }
+}
diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
index 2191423d9f2289..9a4a56c4093715 100644
--- a/fs/bcachefs/printbuf.h
+++ b/fs/bcachefs/printbuf.h
@@ -124,6 +124,8 @@ void bch2_prt_units_u64(struct printbuf *, u64);
void bch2_prt_units_s64(struct printbuf *, s64);
void bch2_prt_string_option(struct printbuf *, const char * const[], size_t);
void bch2_prt_bitflags(struct printbuf *, const char * const[], u64);
+void bch2_prt_bitflags_vector(struct printbuf *, const char * const[],
+ unsigned long *, unsigned);
/* Initializer for a heap allocated printbuf: */
#define PRINTBUF ((struct printbuf) { .heap_allocated = true })
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index c7d9074c82d97b..5cf7d053200279 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -27,6 +27,7 @@
#include "recovery.h"
#include "replicas.h"
#include "sb-clean.h"
+#include "sb-downgrade.h"
#include "snapshot.h"
#include "subvolume.h"
#include "super-io.h"
@@ -481,7 +482,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
}
const char * const bch2_recovery_passes[] = {
-#define x(_fn, _when) #_fn,
+#define x(_fn, ...) #_fn,
BCH_RECOVERY_PASSES()
#undef x
NULL
@@ -504,18 +505,47 @@ struct recovery_pass_fn {
};
static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when },
+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
BCH_RECOVERY_PASSES()
#undef x
};
-static void check_version_upgrade(struct bch_fs *c)
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+static bool check_version_upgrade(struct bch_fs *c)
{
unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
unsigned latest_version = bcachefs_metadata_version_current;
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
- u64 recovery_passes;
if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@@ -559,7 +589,7 @@ static void check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, new_version);
prt_newline(&buf);
- recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
+ u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
if (recovery_passes) {
if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
prt_str(&buf, "fsck required");
@@ -574,12 +604,13 @@ static void check_version_upgrade(struct bch_fs *c)
bch_info(c, "%s", buf.buf);
- mutex_lock(&c->sb_lock);
bch2_sb_upgrade(c, new_version);
- mutex_unlock(&c->sb_lock);
printbuf_exit(&buf);
+ return true;
}
+
+ return false;
}
u64 bch2_fsck_recovery_passes(void)
@@ -654,7 +685,6 @@ int bch2_fs_recovery(struct bch_fs *c)
struct bch_sb_field_clean *clean = NULL;
struct jset *last_journal_entry = NULL;
u64 last_seq = 0, blacklist_seq, journal_seq;
- bool write_sb = false;
int ret = 0;
if (c->sb.clean) {
@@ -682,15 +712,73 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery))
- check_version_upgrade(c);
-
if (c->opts.fsck && c->opts.norecovery) {
bch_err(c, "cannot select both norecovery and fsck");
ret = -EINVAL;
goto err;
}
+ if (!(c->opts.nochanges && c->opts.norecovery)) {
+ mutex_lock(&c->sb_lock);
+ bool write_sb = false;
+
+ struct bch_sb_field_ext *ext =
+ bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
+ if (!ext) {
+ ret = -BCH_ERR_ENOSPC_sb;
+ mutex_unlock(&c->sb_lock);
+ goto err;
+ }
+
+ if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
+ ext->recovery_passes_required[0] |=
+ cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
+ write_sb = true;
+ }
+
+ u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ if (sb_passes) {
+ struct printbuf buf = PRINTBUF;
+ prt_str(&buf, "superblock requires following recovery passes to be run:\n ");
+ prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
+ bch_info(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ if (bch2_check_version_downgrade(c)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Version downgrade required:\n");
+
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_downgrade(c,
+ BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+ BCH_VERSION_MINOR(c->sb.version));
+ passes = ext->recovery_passes_required[0] & ~passes;
+ if (passes) {
+ prt_str(&buf, " running recovery passes: ");
+ prt_bitflags(&buf, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
+
+ bch_info(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ write_sb = true;
+ }
+
+ if (check_version_upgrade(c))
+ write_sb = true;
+
+ if (write_sb)
+ bch2_write_super(c);
+
+ c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ mutex_unlock(&c->sb_lock);
+ }
+
+ if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+
ret = bch2_blacklist_table_initialize(c);
if (ret) {
bch_err(c, "error initializing blacklist table");
@@ -827,11 +915,6 @@ use_clean:
if (ret)
goto err;
- if (c->opts.fsck &&
- (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
- BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)))
- c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
-
ret = bch2_run_recovery_passes(c);
if (ret)
goto err;
@@ -868,16 +951,30 @@ use_clean:
}
mutex_lock(&c->sb_lock);
- if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) {
- SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version);
+ bool write_sb = false;
+
+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
write_sb = true;
}
- if (!test_bit(BCH_FS_ERROR, &c->flags)) {
+ if (!test_bit(BCH_FS_ERROR, &c->flags) &&
+ !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) {
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
write_sb = true;
}
+ if (!test_bit(BCH_FS_ERROR, &c->flags)) {
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ if (ext &&
+ (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
+ !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
+ memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
+ memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+ write_sb = true;
+ }
+ }
+
if (c->opts.fsck &&
!test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
@@ -947,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
- bch2_sb_maybe_downgrade(c);
+ bch2_check_version_downgrade(c);
if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
bch2_sb_upgrade(c, bcachefs_metadata_version_current);
diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
index d266aae90200e5..3a554b0751d014 100644
--- a/fs/bcachefs/recovery.h
+++ b/fs/bcachefs/recovery.h
@@ -4,6 +4,9 @@
extern const char * const bch2_recovery_passes[];
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
/*
* For when we need to rewind recovery passes and run a pass we skipped:
*/
diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h
index 515e3d62c2ac9e..d37c6fd30e3849 100644
--- a/fs/bcachefs/recovery_types.h
+++ b/fs/bcachefs/recovery_types.h
@@ -7,45 +7,57 @@
#define PASS_UNCLEAN BIT(2)
#define PASS_ALWAYS BIT(3)
-#define BCH_RECOVERY_PASSES() \
- x(alloc_read, PASS_ALWAYS) \
- x(stripes_read, PASS_ALWAYS) \
- x(initialize_subvolumes, 0) \
- x(snapshots_read, PASS_ALWAYS) \
- x(check_topology, 0) \
- x(check_allocations, PASS_FSCK) \
- x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \
- x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \
- x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
- x(journal_replay, PASS_ALWAYS) \
- x(check_alloc_info, PASS_FSCK) \
- x(check_lrus, PASS_FSCK) \
- x(check_btree_backpointers, PASS_FSCK) \
- x(check_backpointers_to_extents,PASS_FSCK) \
- x(check_extents_to_backpointers,PASS_FSCK) \
- x(check_alloc_to_lru_refs, PASS_FSCK) \
- x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
- x(bucket_gens_init, 0) \
- x(check_snapshot_trees, PASS_FSCK) \
- x(check_snapshots, PASS_FSCK) \
- x(check_subvols, PASS_FSCK) \
- x(delete_dead_snapshots, PASS_FSCK) \
- x(fs_upgrade_for_subvolumes, 0) \
- x(resume_logged_ops, PASS_ALWAYS) \
- x(check_inodes, PASS_FSCK) \
- x(check_extents, PASS_FSCK) \
- x(check_indirect_extents, PASS_FSCK) \
- x(check_dirents, PASS_FSCK) \
- x(check_xattrs, PASS_FSCK) \
- x(check_root, PASS_FSCK) \
- x(check_directory_structure, PASS_FSCK) \
- x(check_nlinks, PASS_FSCK) \
- x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \
- x(fix_reflink_p, 0) \
- x(set_fs_needs_rebalance, 0) \
+/*
+ * Passes may be reordered, but the second field is a persistent identifier and
+ * must never change:
+ */
+#define BCH_RECOVERY_PASSES() \
+ x(alloc_read, 0, PASS_ALWAYS) \
+ x(stripes_read, 1, PASS_ALWAYS) \
+ x(initialize_subvolumes, 2, 0) \
+ x(snapshots_read, 3, PASS_ALWAYS) \
+ x(check_topology, 4, 0) \
+ x(check_allocations, 5, PASS_FSCK) \
+ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
+ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
+ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
+ x(journal_replay, 9, PASS_ALWAYS) \
+ x(check_alloc_info, 10, PASS_FSCK) \
+ x(check_lrus, 11, PASS_FSCK) \
+ x(check_btree_backpointers, 12, PASS_FSCK) \
+ x(check_backpointers_to_extents, 13, PASS_FSCK) \
+ x(check_extents_to_backpointers, 14, PASS_FSCK) \
+ x(check_alloc_to_lru_refs, 15, PASS_FSCK) \
+ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
+ x(bucket_gens_init, 17, 0) \
+ x(check_snapshot_trees, 18, PASS_FSCK) \
+ x(check_snapshots, 19, PASS_FSCK) \
+ x(check_subvols, 20, PASS_FSCK) \
+ x(delete_dead_snapshots, 21, PASS_FSCK) \
+ x(fs_upgrade_for_subvolumes, 22, 0) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
+ x(check_inodes, 24, PASS_FSCK) \
+ x(check_extents, 25, PASS_FSCK) \
+ x(check_indirect_extents, 26, PASS_FSCK) \
+ x(check_dirents, 27, PASS_FSCK) \
+ x(check_xattrs, 28, PASS_FSCK) \
+ x(check_root, 29, PASS_FSCK) \
+ x(check_directory_structure, 30, PASS_FSCK) \
+ x(check_nlinks, 31, PASS_FSCK) \
+ x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
+ x(fix_reflink_p, 33, 0) \
+ x(set_fs_needs_rebalance, 34, 0) \
+/* We normally enumerate recovery passes in the order we run them: */
enum bch_recovery_pass {
-#define x(n, when) BCH_RECOVERY_PASS_##n,
+#define x(n, id, when) BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+/* But we also need stable identifiers that can be used in the superblock */
+enum bch_recovery_pass_stable {
+#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id,
BCH_RECOVERY_PASSES()
#undef x
};
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index e151ada1c8bd2d..c76ad8ea5e4a51 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
- bch2_sb_maybe_downgrade(c);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
ret = bch2_write_super(c);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
new file mode 100644
index 00000000000000..4919237bbe7361
--- /dev/null
+++ b/fs/bcachefs/sb-downgrade.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Superblock section that contains a list of recovery passes to run when
+ * downgrading past a given version
+ */
+
+#include "bcachefs.h"
+#include "darray.h"
+#include "recovery.h"
+#include "sb-downgrade.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+/*
+ * Downgrade table:
+ * When dowgrading past certain versions, we need to run certain recovery passes
+ * and fix certain errors:
+ *
+ * x(version, recovery_passes, errors...)
+ */
+
+#define DOWNGRADE_TABLE()
+
+struct downgrade_entry {
+ u64 recovery_passes;
+ u16 version;
+ u16 nr_errors;
+ const u16 *errors;
+};
+
+#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
+DOWNGRADE_TABLE()
+#undef x
+
+static const struct downgrade_entry downgrade_table[] = {
+#define x(ver, passes, ...) { \
+ .recovery_passes = passes, \
+ .version = bcachefs_metadata_version_##ver,\
+ .nr_errors = ARRAY_SIZE(ver_##errors), \
+ .errors = ver_##errors, \
+},
+DOWNGRADE_TABLE()
+#undef x
+};
+
+static inline const struct bch_sb_field_downgrade_entry *
+downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
+{
+ return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
+}
+
+#define for_each_downgrade_entry(_d, _i) \
+ for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \
+ (void *) _i < vstruct_end(&(_d)->field) && \
+ (void *) &_i->errors[0] < vstruct_end(&(_d)->field); \
+ _i = downgrade_entry_next_c(_i))
+
+static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+ for_each_downgrade_entry(e, i) {
+ if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
+ BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
+ prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
+ BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
+ BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
+ return -BCH_ERR_invalid_sb_downgrade;
+ }
+ }
+
+ return 0;
+}
+
+static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+ if (out->nr_tabstops <= 1)
+ printbuf_tabstop_push(out, 16);
+
+ for_each_downgrade_entry(e, i) {
+ prt_str(out, "version:");
+ prt_tab(out);
+ bch2_version_to_text(out, le16_to_cpu(i->version));
+ prt_newline(out);
+
+ prt_str(out, "recovery passes:");
+ prt_tab(out);
+ prt_bitflags(out, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
+ prt_newline(out);
+
+ prt_str(out, "errors:");
+ prt_tab(out);
+ bool first = true;
+ for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+ if (!first)
+ prt_char(out, ',');
+ first = false;
+ unsigned e = le16_to_cpu(i->errors[j]);
+ prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
+ }
+ prt_newline(out);
+ }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
+ .validate = bch2_sb_downgrade_validate,
+ .to_text = bch2_sb_downgrade_to_text,
+};
+
+int bch2_sb_downgrade_update(struct bch_fs *c)
+{
+ darray_char table = {};
+ int ret = 0;
+
+ for (const struct downgrade_entry *src = downgrade_table;
+ src < downgrade_table + ARRAY_SIZE(downgrade_table);
+ src++) {
+ if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+ continue;
+
+ struct bch_sb_field_downgrade_entry *dst;
+ unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
+
+ ret = darray_make_room(&table, bytes);
+ if (ret)
+ goto out;
+
+ dst = (void *) &darray_top(table);
+ dst->version = cpu_to_le16(src->version);
+ dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+ dst->recovery_passes[1] = 0;
+ dst->nr_errors = cpu_to_le16(src->nr_errors);
+ for (unsigned i = 0; i < src->nr_errors; i++)
+ dst->errors[i] = cpu_to_le16(src->errors[i]);
+
+ table.nr += bytes;
+ }
+
+ struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+
+ unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
+
+ if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
+ goto out;
+
+ d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
+ if (!d) {
+ ret = -BCH_ERR_ENOSPC_sb_downgrade;
+ goto out;
+ }
+
+ memcpy(d->entries, table.data, table.nr);
+ memset_u64s_tail(d->entries, 0, table.nr);
+out:
+ darray_exit(&table);
+ return ret;
+}
+
+void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+{
+ struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+ if (!d)
+ return;
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ for_each_downgrade_entry(d, i) {
+ unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
+ if (new_minor < minor && minor <= old_minor) {
+ ext->recovery_passes_required[0] |= i->recovery_passes[0];
+ ext->recovery_passes_required[1] |= i->recovery_passes[1];
+
+ for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+ unsigned e = le16_to_cpu(i->errors[j]);
+ if (e < BCH_SB_ERR_MAX)
+ __set_bit(e, c->sb.errors_silent);
+ if (e < sizeof(ext->errors_silent) * 8)
+ ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
+ }
+ }
+ }
+}
diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h
new file mode 100644
index 00000000000000..bc48fd2ca70ec1
--- /dev/null
+++ b/fs/bcachefs/sb-downgrade.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_H
+#define _BCACHEFS_SB_DOWNGRADE_H
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
+
+int bch2_sb_downgrade_update(struct bch_fs *);
+void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_H */
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index f0930ab7f036eb..5f5bcae391fb9f 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -4,7 +4,7 @@
#include "sb-errors.h"
#include "super-io.h"
-static const char * const bch2_sb_error_strs[] = {
+const char * const bch2_sb_error_strs[] = {
#define x(t, n, ...) [n] = #t,
BCH_SB_ERRS()
NULL
@@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id
static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
{
- return e
- ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
- : 0;
+ return bch2_sb_field_nr_entries(e);
}
static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h
index 7557fe94f06de2..8889001e7db4bd 100644
--- a/fs/bcachefs/sb-errors.h
+++ b/fs/bcachefs/sb-errors.h
@@ -4,258 +4,7 @@
#include "sb-errors_types.h"
-#define BCH_SB_ERRS() \
- x(clean_but_journal_not_empty, 0) \
- x(dirty_but_no_journal_entries, 1) \
- x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
- x(sb_clean_journal_seq_mismatch, 3) \
- x(sb_clean_btree_root_mismatch, 4) \
- x(sb_clean_missing, 5) \
- x(jset_unsupported_version, 6) \
- x(jset_unknown_csum, 7) \
- x(jset_last_seq_newer_than_seq, 8) \
- x(jset_past_bucket_end, 9) \
- x(jset_seq_blacklisted, 10) \
- x(journal_entries_missing, 11) \
- x(journal_entry_replicas_not_marked, 12) \
- x(journal_entry_past_jset_end, 13) \
- x(journal_entry_replicas_data_mismatch, 14) \
- x(journal_entry_bkey_u64s_0, 15) \
- x(journal_entry_bkey_past_end, 16) \
- x(journal_entry_bkey_bad_format, 17) \
- x(journal_entry_bkey_invalid, 18) \
- x(journal_entry_btree_root_bad_size, 19) \
- x(journal_entry_blacklist_bad_size, 20) \
- x(journal_entry_blacklist_v2_bad_size, 21) \
- x(journal_entry_blacklist_v2_start_past_end, 22) \
- x(journal_entry_usage_bad_size, 23) \
- x(journal_entry_data_usage_bad_size, 24) \
- x(journal_entry_clock_bad_size, 25) \
- x(journal_entry_clock_bad_rw, 26) \
- x(journal_entry_dev_usage_bad_size, 27) \
- x(journal_entry_dev_usage_bad_dev, 28) \
- x(journal_entry_dev_usage_bad_pad, 29) \
- x(btree_node_unreadable, 30) \
- x(btree_node_fault_injected, 31) \
- x(btree_node_bad_magic, 32) \
- x(btree_node_bad_seq, 33) \
- x(btree_node_unsupported_version, 34) \
- x(btree_node_bset_older_than_sb_min, 35) \
- x(btree_node_bset_newer_than_sb, 36) \
- x(btree_node_data_missing, 37) \
- x(btree_node_bset_after_end, 38) \
- x(btree_node_replicas_sectors_written_mismatch, 39) \
- x(btree_node_replicas_data_mismatch, 40) \
- x(bset_unknown_csum, 41) \
- x(bset_bad_csum, 42) \
- x(bset_past_end_of_btree_node, 43) \
- x(bset_wrong_sector_offset, 44) \
- x(bset_empty, 45) \
- x(bset_bad_seq, 46) \
- x(bset_blacklisted_journal_seq, 47) \
- x(first_bset_blacklisted_journal_seq, 48) \
- x(btree_node_bad_btree, 49) \
- x(btree_node_bad_level, 50) \
- x(btree_node_bad_min_key, 51) \
- x(btree_node_bad_max_key, 52) \
- x(btree_node_bad_format, 53) \
- x(btree_node_bkey_past_bset_end, 54) \
- x(btree_node_bkey_bad_format, 55) \
- x(btree_node_bad_bkey, 56) \
- x(btree_node_bkey_out_of_order, 57) \
- x(btree_root_bkey_invalid, 58) \
- x(btree_root_read_error, 59) \
- x(btree_root_bad_min_key, 60) \
- x(btree_root_bad_max_key, 61) \
- x(btree_node_read_error, 62) \
- x(btree_node_topology_bad_min_key, 63) \
- x(btree_node_topology_bad_max_key, 64) \
- x(btree_node_topology_overwritten_by_prev_node, 65) \
- x(btree_node_topology_overwritten_by_next_node, 66) \
- x(btree_node_topology_interior_node_empty, 67) \
- x(fs_usage_hidden_wrong, 68) \
- x(fs_usage_btree_wrong, 69) \
- x(fs_usage_data_wrong, 70) \
- x(fs_usage_cached_wrong, 71) \
- x(fs_usage_reserved_wrong, 72) \
- x(fs_usage_persistent_reserved_wrong, 73) \
- x(fs_usage_nr_inodes_wrong, 74) \
- x(fs_usage_replicas_wrong, 75) \
- x(dev_usage_buckets_wrong, 76) \
- x(dev_usage_sectors_wrong, 77) \
- x(dev_usage_fragmented_wrong, 78) \
- x(dev_usage_buckets_ec_wrong, 79) \
- x(bkey_version_in_future, 80) \
- x(bkey_u64s_too_small, 81) \
- x(bkey_invalid_type_for_btree, 82) \
- x(bkey_extent_size_zero, 83) \
- x(bkey_extent_size_greater_than_offset, 84) \
- x(bkey_size_nonzero, 85) \
- x(bkey_snapshot_nonzero, 86) \
- x(bkey_snapshot_zero, 87) \
- x(bkey_at_pos_max, 88) \
- x(bkey_before_start_of_btree_node, 89) \
- x(bkey_after_end_of_btree_node, 90) \
- x(bkey_val_size_nonzero, 91) \
- x(bkey_val_size_too_small, 92) \
- x(alloc_v1_val_size_bad, 93) \
- x(alloc_v2_unpack_error, 94) \
- x(alloc_v3_unpack_error, 95) \
- x(alloc_v4_val_size_bad, 96) \
- x(alloc_v4_backpointers_start_bad, 97) \
- x(alloc_key_data_type_bad, 98) \
- x(alloc_key_empty_but_have_data, 99) \
- x(alloc_key_dirty_sectors_0, 100) \
- x(alloc_key_data_type_inconsistency, 101) \
- x(alloc_key_to_missing_dev_bucket, 102) \
- x(alloc_key_cached_inconsistency, 103) \
- x(alloc_key_cached_but_read_time_zero, 104) \
- x(alloc_key_to_missing_lru_entry, 105) \
- x(alloc_key_data_type_wrong, 106) \
- x(alloc_key_gen_wrong, 107) \
- x(alloc_key_dirty_sectors_wrong, 108) \
- x(alloc_key_cached_sectors_wrong, 109) \
- x(alloc_key_stripe_wrong, 110) \
- x(alloc_key_stripe_redundancy_wrong, 111) \
- x(bucket_sector_count_overflow, 112) \
- x(bucket_metadata_type_mismatch, 113) \
- x(need_discard_key_wrong, 114) \
- x(freespace_key_wrong, 115) \
- x(freespace_hole_missing, 116) \
- x(bucket_gens_val_size_bad, 117) \
- x(bucket_gens_key_wrong, 118) \
- x(bucket_gens_hole_wrong, 119) \
- x(bucket_gens_to_invalid_dev, 120) \
- x(bucket_gens_to_invalid_buckets, 121) \
- x(bucket_gens_nonzero_for_invalid_buckets, 122) \
- x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
- x(need_discard_freespace_key_bad, 124) \
- x(backpointer_pos_wrong, 125) \
- x(backpointer_to_missing_device, 126) \
- x(backpointer_to_missing_alloc, 127) \
- x(backpointer_to_missing_ptr, 128) \
- x(lru_entry_at_time_0, 129) \
- x(lru_entry_to_invalid_bucket, 130) \
- x(lru_entry_bad, 131) \
- x(btree_ptr_val_too_big, 132) \
- x(btree_ptr_v2_val_too_big, 133) \
- x(btree_ptr_has_non_ptr, 134) \
- x(extent_ptrs_invalid_entry, 135) \
- x(extent_ptrs_no_ptrs, 136) \
- x(extent_ptrs_too_many_ptrs, 137) \
- x(extent_ptrs_redundant_crc, 138) \
- x(extent_ptrs_redundant_stripe, 139) \
- x(extent_ptrs_unwritten, 140) \
- x(extent_ptrs_written_and_unwritten, 141) \
- x(ptr_to_invalid_device, 142) \
- x(ptr_to_duplicate_device, 143) \
- x(ptr_after_last_bucket, 144) \
- x(ptr_before_first_bucket, 145) \
- x(ptr_spans_multiple_buckets, 146) \
- x(ptr_to_missing_backpointer, 147) \
- x(ptr_to_missing_alloc_key, 148) \
- x(ptr_to_missing_replicas_entry, 149) \
- x(ptr_to_missing_stripe, 150) \
- x(ptr_to_incorrect_stripe, 151) \
- x(ptr_gen_newer_than_bucket_gen, 152) \
- x(ptr_too_stale, 153) \
- x(stale_dirty_ptr, 154) \
- x(ptr_bucket_data_type_mismatch, 155) \
- x(ptr_cached_and_erasure_coded, 156) \
- x(ptr_crc_uncompressed_size_too_small, 157) \
- x(ptr_crc_csum_type_unknown, 158) \
- x(ptr_crc_compression_type_unknown, 159) \
- x(ptr_crc_redundant, 160) \
- x(ptr_crc_uncompressed_size_too_big, 161) \
- x(ptr_crc_nonce_mismatch, 162) \
- x(ptr_stripe_redundant, 163) \
- x(reservation_key_nr_replicas_invalid, 164) \
- x(reflink_v_refcount_wrong, 165) \
- x(reflink_p_to_missing_reflink_v, 166) \
- x(stripe_pos_bad, 167) \
- x(stripe_val_size_bad, 168) \
- x(stripe_sector_count_wrong, 169) \
- x(snapshot_tree_pos_bad, 170) \
- x(snapshot_tree_to_missing_snapshot, 171) \
- x(snapshot_tree_to_missing_subvol, 172) \
- x(snapshot_tree_to_wrong_subvol, 173) \
- x(snapshot_tree_to_snapshot_subvol, 174) \
- x(snapshot_pos_bad, 175) \
- x(snapshot_parent_bad, 176) \
- x(snapshot_children_not_normalized, 177) \
- x(snapshot_child_duplicate, 178) \
- x(snapshot_child_bad, 179) \
- x(snapshot_skiplist_not_normalized, 180) \
- x(snapshot_skiplist_bad, 181) \
- x(snapshot_should_not_have_subvol, 182) \
- x(snapshot_to_bad_snapshot_tree, 183) \
- x(snapshot_bad_depth, 184) \
- x(snapshot_bad_skiplist, 185) \
- x(subvol_pos_bad, 186) \
- x(subvol_not_master_and_not_snapshot, 187) \
- x(subvol_to_missing_root, 188) \
- x(subvol_root_wrong_bi_subvol, 189) \
- x(bkey_in_missing_snapshot, 190) \
- x(inode_pos_inode_nonzero, 191) \
- x(inode_pos_blockdev_range, 192) \
- x(inode_unpack_error, 193) \
- x(inode_str_hash_invalid, 194) \
- x(inode_v3_fields_start_bad, 195) \
- x(inode_snapshot_mismatch, 196) \
- x(inode_unlinked_but_clean, 197) \
- x(inode_unlinked_but_nlink_nonzero, 198) \
- x(inode_checksum_type_invalid, 199) \
- x(inode_compression_type_invalid, 200) \
- x(inode_subvol_root_but_not_dir, 201) \
- x(inode_i_size_dirty_but_clean, 202) \
- x(inode_i_sectors_dirty_but_clean, 203) \
- x(inode_i_sectors_wrong, 204) \
- x(inode_dir_wrong_nlink, 205) \
- x(inode_dir_multiple_links, 206) \
- x(inode_multiple_links_but_nlink_0, 207) \
- x(inode_wrong_backpointer, 208) \
- x(inode_wrong_nlink, 209) \
- x(inode_unreachable, 210) \
- x(deleted_inode_but_clean, 211) \
- x(deleted_inode_missing, 212) \
- x(deleted_inode_is_dir, 213) \
- x(deleted_inode_not_unlinked, 214) \
- x(extent_overlapping, 215) \
- x(extent_in_missing_inode, 216) \
- x(extent_in_non_reg_inode, 217) \
- x(extent_past_end_of_inode, 218) \
- x(dirent_empty_name, 219) \
- x(dirent_val_too_big, 220) \
- x(dirent_name_too_long, 221) \
- x(dirent_name_embedded_nul, 222) \
- x(dirent_name_dot_or_dotdot, 223) \
- x(dirent_name_has_slash, 224) \
- x(dirent_d_type_wrong, 225) \
- x(dirent_d_parent_subvol_wrong, 226) \
- x(dirent_in_missing_dir_inode, 227) \
- x(dirent_in_non_dir_inode, 228) \
- x(dirent_to_missing_inode, 229) \
- x(dirent_to_missing_subvol, 230) \
- x(dirent_to_itself, 231) \
- x(quota_type_invalid, 232) \
- x(xattr_val_size_too_small, 233) \
- x(xattr_val_size_too_big, 234) \
- x(xattr_invalid_type, 235) \
- x(xattr_name_invalid_chars, 236) \
- x(xattr_in_missing_inode, 237) \
- x(root_subvol_missing, 238) \
- x(root_dir_missing, 239) \
- x(root_inode_not_dir, 240) \
- x(dir_loop, 241) \
- x(hash_table_key_duplicate, 242) \
- x(hash_table_key_wrong_offset, 243)
-
-enum bch_sb_error_id {
-#define x(t, n) BCH_FSCK_ERR_##t = n,
- BCH_SB_ERRS()
-#undef x
- BCH_SB_ERR_MAX
-};
+extern const char * const bch2_sb_error_strs[];
extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index b1c099843a396a..3504c2d09c291c 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -4,6 +4,259 @@
#include "darray.h"
+#define BCH_SB_ERRS() \
+ x(clean_but_journal_not_empty, 0) \
+ x(dirty_but_no_journal_entries, 1) \
+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
+ x(sb_clean_journal_seq_mismatch, 3) \
+ x(sb_clean_btree_root_mismatch, 4) \
+ x(sb_clean_missing, 5) \
+ x(jset_unsupported_version, 6) \
+ x(jset_unknown_csum, 7) \
+ x(jset_last_seq_newer_than_seq, 8) \
+ x(jset_past_bucket_end, 9) \
+ x(jset_seq_blacklisted, 10) \
+ x(journal_entries_missing, 11) \
+ x(journal_entry_replicas_not_marked, 12) \
+ x(journal_entry_past_jset_end, 13) \
+ x(journal_entry_replicas_data_mismatch, 14) \
+ x(journal_entry_bkey_u64s_0, 15) \
+ x(journal_entry_bkey_past_end, 16) \
+ x(journal_entry_bkey_bad_format, 17) \
+ x(journal_entry_bkey_invalid, 18) \
+ x(journal_entry_btree_root_bad_size, 19) \
+ x(journal_entry_blacklist_bad_size, 20) \
+ x(journal_entry_blacklist_v2_bad_size, 21) \
+ x(journal_entry_blacklist_v2_start_past_end, 22) \
+ x(journal_entry_usage_bad_size, 23) \
+ x(journal_entry_data_usage_bad_size, 24) \
+ x(journal_entry_clock_bad_size, 25) \
+ x(journal_entry_clock_bad_rw, 26) \
+ x(journal_entry_dev_usage_bad_size, 27) \
+ x(journal_entry_dev_usage_bad_dev, 28) \
+ x(journal_entry_dev_usage_bad_pad, 29) \
+ x(btree_node_unreadable, 30) \
+ x(btree_node_fault_injected, 31) \
+ x(btree_node_bad_magic, 32) \
+ x(btree_node_bad_seq, 33) \
+ x(btree_node_unsupported_version, 34) \
+ x(btree_node_bset_older_than_sb_min, 35) \
+ x(btree_node_bset_newer_than_sb, 36) \
+ x(btree_node_data_missing, 37) \
+ x(btree_node_bset_after_end, 38) \
+ x(btree_node_replicas_sectors_written_mismatch, 39) \
+ x(btree_node_replicas_data_mismatch, 40) \
+ x(bset_unknown_csum, 41) \
+ x(bset_bad_csum, 42) \
+ x(bset_past_end_of_btree_node, 43) \
+ x(bset_wrong_sector_offset, 44) \
+ x(bset_empty, 45) \
+ x(bset_bad_seq, 46) \
+ x(bset_blacklisted_journal_seq, 47) \
+ x(first_bset_blacklisted_journal_seq, 48) \
+ x(btree_node_bad_btree, 49) \
+ x(btree_node_bad_level, 50) \
+ x(btree_node_bad_min_key, 51) \
+ x(btree_node_bad_max_key, 52) \
+ x(btree_node_bad_format, 53) \
+ x(btree_node_bkey_past_bset_end, 54) \
+ x(btree_node_bkey_bad_format, 55) \
+ x(btree_node_bad_bkey, 56) \
+ x(btree_node_bkey_out_of_order, 57) \
+ x(btree_root_bkey_invalid, 58) \
+ x(btree_root_read_error, 59) \
+ x(btree_root_bad_min_key, 60) \
+ x(btree_root_bad_max_key, 61) \
+ x(btree_node_read_error, 62) \
+ x(btree_node_topology_bad_min_key, 63) \
+ x(btree_node_topology_bad_max_key, 64) \
+ x(btree_node_topology_overwritten_by_prev_node, 65) \
+ x(btree_node_topology_overwritten_by_next_node, 66) \
+ x(btree_node_topology_interior_node_empty, 67) \
+ x(fs_usage_hidden_wrong, 68) \
+ x(fs_usage_btree_wrong, 69) \
+ x(fs_usage_data_wrong, 70) \
+ x(fs_usage_cached_wrong, 71) \
+ x(fs_usage_reserved_wrong, 72) \
+ x(fs_usage_persistent_reserved_wrong, 73) \
+ x(fs_usage_nr_inodes_wrong, 74) \
+ x(fs_usage_replicas_wrong, 75) \
+ x(dev_usage_buckets_wrong, 76) \
+ x(dev_usage_sectors_wrong, 77) \
+ x(dev_usage_fragmented_wrong, 78) \
+ x(dev_usage_buckets_ec_wrong, 79) \
+ x(bkey_version_in_future, 80) \
+ x(bkey_u64s_too_small, 81) \
+ x(bkey_invalid_type_for_btree, 82) \
+ x(bkey_extent_size_zero, 83) \
+ x(bkey_extent_size_greater_than_offset, 84) \
+ x(bkey_size_nonzero, 85) \
+ x(bkey_snapshot_nonzero, 86) \
+ x(bkey_snapshot_zero, 87) \
+ x(bkey_at_pos_max, 88) \
+ x(bkey_before_start_of_btree_node, 89) \
+ x(bkey_after_end_of_btree_node, 90) \
+ x(bkey_val_size_nonzero, 91) \
+ x(bkey_val_size_too_small, 92) \
+ x(alloc_v1_val_size_bad, 93) \
+ x(alloc_v2_unpack_error, 94) \
+ x(alloc_v3_unpack_error, 95) \
+ x(alloc_v4_val_size_bad, 96) \
+ x(alloc_v4_backpointers_start_bad, 97) \
+ x(alloc_key_data_type_bad, 98) \
+ x(alloc_key_empty_but_have_data, 99) \
+ x(alloc_key_dirty_sectors_0, 100) \
+ x(alloc_key_data_type_inconsistency, 101) \
+ x(alloc_key_to_missing_dev_bucket, 102) \
+ x(alloc_key_cached_inconsistency, 103) \
+ x(alloc_key_cached_but_read_time_zero, 104) \
+ x(alloc_key_to_missing_lru_entry, 105) \
+ x(alloc_key_data_type_wrong, 106) \
+ x(alloc_key_gen_wrong, 107) \
+ x(alloc_key_dirty_sectors_wrong, 108) \
+ x(alloc_key_cached_sectors_wrong, 109) \
+ x(alloc_key_stripe_wrong, 110) \
+ x(alloc_key_stripe_redundancy_wrong, 111) \
+ x(bucket_sector_count_overflow, 112) \
+ x(bucket_metadata_type_mismatch, 113) \
+ x(need_discard_key_wrong, 114) \
+ x(freespace_key_wrong, 115) \
+ x(freespace_hole_missing, 116) \
+ x(bucket_gens_val_size_bad, 117) \
+ x(bucket_gens_key_wrong, 118) \
+ x(bucket_gens_hole_wrong, 119) \
+ x(bucket_gens_to_invalid_dev, 120) \
+ x(bucket_gens_to_invalid_buckets, 121) \
+ x(bucket_gens_nonzero_for_invalid_buckets, 122) \
+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
+ x(need_discard_freespace_key_bad, 124) \
+ x(backpointer_pos_wrong, 125) \
+ x(backpointer_to_missing_device, 126) \
+ x(backpointer_to_missing_alloc, 127) \
+ x(backpointer_to_missing_ptr, 128) \
+ x(lru_entry_at_time_0, 129) \
+ x(lru_entry_to_invalid_bucket, 130) \
+ x(lru_entry_bad, 131) \
+ x(btree_ptr_val_too_big, 132) \
+ x(btree_ptr_v2_val_too_big, 133) \
+ x(btree_ptr_has_non_ptr, 134) \
+ x(extent_ptrs_invalid_entry, 135) \
+ x(extent_ptrs_no_ptrs, 136) \
+ x(extent_ptrs_too_many_ptrs, 137) \
+ x(extent_ptrs_redundant_crc, 138) \
+ x(extent_ptrs_redundant_stripe, 139) \
+ x(extent_ptrs_unwritten, 140) \
+ x(extent_ptrs_written_and_unwritten, 141) \
+ x(ptr_to_invalid_device, 142) \
+ x(ptr_to_duplicate_device, 143) \
+ x(ptr_after_last_bucket, 144) \
+ x(ptr_before_first_bucket, 145) \
+ x(ptr_spans_multiple_buckets, 146) \
+ x(ptr_to_missing_backpointer, 147) \
+ x(ptr_to_missing_alloc_key, 148) \
+ x(ptr_to_missing_replicas_entry, 149) \
+ x(ptr_to_missing_stripe, 150) \
+ x(ptr_to_incorrect_stripe, 151) \
+ x(ptr_gen_newer_than_bucket_gen, 152) \
+ x(ptr_too_stale, 153) \
+ x(stale_dirty_ptr, 154) \
+ x(ptr_bucket_data_type_mismatch, 155) \
+ x(ptr_cached_and_erasure_coded, 156) \
+ x(ptr_crc_uncompressed_size_too_small, 157) \
+ x(ptr_crc_csum_type_unknown, 158) \
+ x(ptr_crc_compression_type_unknown, 159) \
+ x(ptr_crc_redundant, 160) \
+ x(ptr_crc_uncompressed_size_too_big, 161) \
+ x(ptr_crc_nonce_mismatch, 162) \
+ x(ptr_stripe_redundant, 163) \
+ x(reservation_key_nr_replicas_invalid, 164) \
+ x(reflink_v_refcount_wrong, 165) \
+ x(reflink_p_to_missing_reflink_v, 166) \
+ x(stripe_pos_bad, 167) \
+ x(stripe_val_size_bad, 168) \
+ x(stripe_sector_count_wrong, 169) \
+ x(snapshot_tree_pos_bad, 170) \
+ x(snapshot_tree_to_missing_snapshot, 171) \
+ x(snapshot_tree_to_missing_subvol, 172) \
+ x(snapshot_tree_to_wrong_subvol, 173) \
+ x(snapshot_tree_to_snapshot_subvol, 174) \
+ x(snapshot_pos_bad, 175) \
+ x(snapshot_parent_bad, 176) \
+ x(snapshot_children_not_normalized, 177) \
+ x(snapshot_child_duplicate, 178) \
+ x(snapshot_child_bad, 179) \
+ x(snapshot_skiplist_not_normalized, 180) \
+ x(snapshot_skiplist_bad, 181) \
+ x(snapshot_should_not_have_subvol, 182) \
+ x(snapshot_to_bad_snapshot_tree, 183) \
+ x(snapshot_bad_depth, 184) \
+ x(snapshot_bad_skiplist, 185) \
+ x(subvol_pos_bad, 186) \
+ x(subvol_not_master_and_not_snapshot, 187) \
+ x(subvol_to_missing_root, 188) \
+ x(subvol_root_wrong_bi_subvol, 189) \
+ x(bkey_in_missing_snapshot, 190) \
+ x(inode_pos_inode_nonzero, 191) \
+ x(inode_pos_blockdev_range, 192) \
+ x(inode_unpack_error, 193) \
+ x(inode_str_hash_invalid, 194) \
+ x(inode_v3_fields_start_bad, 195) \
+ x(inode_snapshot_mismatch, 196) \
+ x(inode_unlinked_but_clean, 197) \
+ x(inode_unlinked_but_nlink_nonzero, 198) \
+ x(inode_checksum_type_invalid, 199) \
+ x(inode_compression_type_invalid, 200) \
+ x(inode_subvol_root_but_not_dir, 201) \
+ x(inode_i_size_dirty_but_clean, 202) \
+ x(inode_i_sectors_dirty_but_clean, 203) \
+ x(inode_i_sectors_wrong, 204) \
+ x(inode_dir_wrong_nlink, 205) \
+ x(inode_dir_multiple_links, 206) \
+ x(inode_multiple_links_but_nlink_0, 207) \
+ x(inode_wrong_backpointer, 208) \
+ x(inode_wrong_nlink, 209) \
+ x(inode_unreachable, 210) \
+ x(deleted_inode_but_clean, 211) \
+ x(deleted_inode_missing, 212) \
+ x(deleted_inode_is_dir, 213) \
+ x(deleted_inode_not_unlinked, 214) \
+ x(extent_overlapping, 215) \
+ x(extent_in_missing_inode, 216) \
+ x(extent_in_non_reg_inode, 217) \
+ x(extent_past_end_of_inode, 218) \
+ x(dirent_empty_name, 219) \
+ x(dirent_val_too_big, 220) \
+ x(dirent_name_too_long, 221) \
+ x(dirent_name_embedded_nul, 222) \
+ x(dirent_name_dot_or_dotdot, 223) \
+ x(dirent_name_has_slash, 224) \
+ x(dirent_d_type_wrong, 225) \
+ x(dirent_d_parent_subvol_wrong, 226) \
+ x(dirent_in_missing_dir_inode, 227) \
+ x(dirent_in_non_dir_inode, 228) \
+ x(dirent_to_missing_inode, 229) \
+ x(dirent_to_missing_subvol, 230) \
+ x(dirent_to_itself, 231) \
+ x(quota_type_invalid, 232) \
+ x(xattr_val_size_too_small, 233) \
+ x(xattr_val_size_too_big, 234) \
+ x(xattr_invalid_type, 235) \
+ x(xattr_name_invalid_chars, 236) \
+ x(xattr_in_missing_inode, 237) \
+ x(root_subvol_missing, 238) \
+ x(root_dir_missing, 239) \
+ x(root_inode_not_dir, 240) \
+ x(dir_loop, 241) \
+ x(hash_table_key_duplicate, 242) \
+ x(hash_table_key_wrong_offset, 243)
+
+enum bch_sb_error_id {
+#define x(t, n) BCH_FSCK_ERR_##t = n,
+ BCH_SB_ERRS()
+#undef x
+ BCH_SB_ERR_MAX
+};
+
struct bch_sb_error_entry_cpu {
u64 id:16,
nr:48;
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index fccd25aa32426a..22b34a8e4d6efe 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -146,6 +146,24 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
}
+int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
+{
+ struct bch_subvolume s;
+ int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s);
+ if (ret)
+ return ret;
+
+ if (BCH_SUBVOLUME_RO(&s))
+ return -EROFS;
+ return 0;
+}
+
+int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
+{
+ return bch2_trans_do(c, NULL, NULL, 0,
+ bch2_subvol_is_ro_trans(trans, subvol));
+}
+
int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
struct bch_subvolume *subvol)
{
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index a1003d30ab0a0c..a6f56f66e27cb7 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -23,6 +23,9 @@ int bch2_subvolume_get(struct btree_trans *, unsigned,
bool, int, struct bch_subvolume *);
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
+int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
+int bch2_subvol_is_ro(struct bch_fs *, u32);
+
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index f3e12f7979d5ea..4c98d8cc2a7976 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -13,6 +13,7 @@
#include "replicas.h"
#include "quota.h"
#include "sb-clean.h"
+#include "sb-downgrade.h"
#include "sb-errors.h"
#include "sb-members.h"
#include "super-io.h"
@@ -264,6 +265,17 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
return f;
}
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb,
+ enum bch_sb_field_type type,
+ unsigned u64s)
+{
+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
+
+ if (!f || le32_to_cpu(f->u64s) < u64s)
+ f = bch2_sb_field_resize_id(sb, type, u64s);
+ return f;
+}
+
/* Superblock validate: */
static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
@@ -484,6 +496,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
/* device open: */
+static unsigned long le_ulong_to_cpu(unsigned long v)
+{
+ return sizeof(unsigned long) == 8
+ ? le64_to_cpu(v)
+ : le32_to_cpu(v);
+}
+
+static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr)
+{
+ BUG_ON(nr & (BITS_PER_TYPE(long) - 1));
+
+ for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++)
+ dst[i] = le_ulong_to_cpu(src[i]);
+}
+
static void bch2_sb_update(struct bch_fs *c)
{
struct bch_sb *src = c->disk_sb.sb;
@@ -512,8 +539,15 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.features = le64_to_cpu(src->features[0]);
c->sb.compat = le64_to_cpu(src->compat[0]);
+ memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
+ if (ext)
+ le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
+ sizeof(c->sb.errors_silent) * 8);
+
for_each_member_device(ca, c, i) {
- struct bch_member m = bch2_sb_member_get(src, i);
+ struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
ca->mi = bch2_mi_to_cpu(&m);
}
}
@@ -906,6 +940,7 @@ int bch2_write_super(struct bch_fs *c)
bch2_sb_members_from_cpu(c);
bch2_sb_members_cpy_v2_v1(&c->disk_sb);
bch2_sb_errors_from_cpu(c);
+ bch2_sb_downgrade_update(c);
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
@@ -1029,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
}
/* Downgrade if superblock is at a higher version than currently supported: */
-void bch2_sb_maybe_downgrade(struct bch_fs *c)
+bool bch2_check_version_downgrade(struct bch_fs *c)
{
+ bool ret = bcachefs_metadata_version_current < c->sb.version;
+
lockdep_assert_held(&c->sb_lock);
/*
@@ -1044,16 +1081,61 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
if (c->sb.version_min > bcachefs_metadata_version_current)
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+ return ret;
}
void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
{
lockdep_assert_held(&c->sb_lock);
+ if (BCH_VERSION_MAJOR(new_version) >
+ BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+ bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
+
c->disk_sb.sb->version = cpu_to_le16(new_version);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
}
+static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ if (vstruct_bytes(f) < 88) {
+ prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88);
+ return -BCH_ERR_invalid_sb_ext;
+ }
+
+ return 0;
+}
+
+static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_ext *e = field_to_type(f, ext);
+
+ prt_printf(out, "Recovery passes required:");
+ prt_tab(out);
+ prt_bitflags(out, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0])));
+ prt_newline(out);
+
+ unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL);
+ if (errors_silent) {
+ le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
+
+ prt_printf(out, "Errors to silently fix:");
+ prt_tab(out);
+ prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8);
+ prt_newline(out);
+
+ kfree(errors_silent);
+ }
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
+ .validate = bch2_sb_ext_validate,
+ .to_text = bch2_sb_ext_to_text,
+};
+
static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
#define x(f, nr) \
[BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index f5abd102bff750..e41e5de531a0a2 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -40,6 +40,16 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *,
#define bch2_sb_field_resize(_sb, _name, _u64s) \
field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *,
+ enum bch_sb_field_type, unsigned);
+#define bch2_sb_field_get_minsize(_sb, _name, _u64s) \
+ field_to_type(bch2_sb_field_get_minsize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
+
+#define bch2_sb_field_nr_entries(_f) \
+ (_f ? ((bch2_sb_field_bytes(&_f->field) - sizeof(*_f)) / \
+ sizeof(_f->entries[0])) \
+ : 0)
+
void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
extern const char * const bch2_sb_fields[];
@@ -83,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
__bch2_check_set_feature(c, feat);
}
-void bch2_sb_maybe_downgrade(struct bch_fs *);
+bool bch2_check_version_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2984b57b29584f..b701f7fe0784ef 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -243,6 +243,7 @@ do { \
#define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__)
#define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__)
#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
+#define prt_bitflags_vector(...) bch2_prt_bitflags_vector(__VA_ARGS__)
void bch2_pr_time_units(struct printbuf *, u64);
void bch2_prt_datetime(struct printbuf *, time64_t);
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 79d982674c1803..5a1858fb9879af 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -176,7 +176,8 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
struct btree_iter inode_iter = { NULL };
int ret;
- ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
+ ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?:
+ bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
if (ret)
return ret;